import pandas as pd
df = pd.read_csv("datasets/winequality-white.csv")
print(df.head())
df = pd.read_csv("datasets/winequality-white.csv", sep=";")
df = pd.read_csv("datasets/winequality-white.csv", sep="\t")
df = pd.read_csv("datasets/winequality-white.csv", encoding="utf-8")
df = pd.read_csv("datasets/winequality-white.csv", encoding="cp949")
df = pd.read_csv("datasets/winequality-white.csv", header=None)
df = pd.read_csv("datasets/winequality-white.csv", names=["col1", "col2", "col3"])
df = pd.read_csv("datasets/winequality-white.csv", index_col=0)
df = pd.read_csv("datasets/winequality-white.csv", usecols=["alcohol", "quality"])
df = pd.read_csv("datasets/winequality-white.csv", usecols=[0, 2, 4])
df = pd.read_csv("datasets/winequality-white.csv", na_values=["?", "NA", "-", ""])
df = pd.read_csv("datasets/winequality-white.csv", dtype={"quality": "int32", "alcohol": "float64"})
df = pd.read_csv("datasets/winequality-white.csv", skipinitialspace=True)
df = pd.read_csv("datasets/winequality-white.csv", skiprows=5)
df = pd.read_csv("datasets/winequality-white.csv", skipfooter=10, engine="python")
chunks = pd.read_csv("datasets/winequality-white.csv", chunksize=100)
for chunk in chunks:
print(chunk.head())
df = pd.read_csv("datasets/winequality-white.csv", converters={"alcohol": lambda x: float(x) * 1.2})
df = pd.read_csv("datasets/winequality-white.csv", mangle_dupe_cols=True)
df = pd.read_csv("datasets/winequality-white.csv.gz", compression="gzip")
df = pd.read_csv("datasets/winequality-white.zip", compression="zip")
import pandas as pd
df = pd.read_csv("datasets/winequality-white.csv", sep=";", encoding="utf-8", usecols=["alcohol", "quality"])
df = pd.read_csv("datasets/winequality-white.csv", index_col="quality", na_values=["?", "NA", "-"])
df = pd.read_csv("datasets/winequality-white.csv", dtype={"alcohol": "float64"}, skipinitialspace=True)
df = pd.read_csv("datasets/winequality-white.csv", skiprows=5, skipfooter=10, usecols=["pH", "density"], engine="python")
df = pd.read_csv("datasets/winequality-white.csv", na_values=["?", "NA"], converters={"alcohol": lambda x: float(x) * 1.2})
df = pd.read_csv("datasets/winequality-white.csv.gz", compression="gzip", sep=";")
chunks = pd.read_csv("datasets/winequality-white.csv", chunksize=100)
for chunk in chunks:
print(chunk.head())
교재
import pandas as pd
file_path = 'datasets/winequality-white.csv'
data = pd.read_csv(file_path)
data.head()
excel = "datasets/Online Retail.xlsx"
retail_data1 = pd.read_excel(excel, sheet_name='Online Retail')
print(retail_data1.head())
url_excel = "https://archive.ics.uci.edu/ml/machine-learning-databases/00352/Online%20Retail.xlsx"
retail_data2 = pd.read_excel(url_excel, sheet_name='Online Retail')
print(retail_data.head())
import requests
import pandas as pd
url_json = "https://jsonplaceholder.typicode.com/todos"
response = requests.get(url_json)
todos_data = response.json()
df_todos = pd.DataFrame(todos_data)
print(df_todos.head())
import sqlite3
url_sqlite = "https://github.com/lerocha/chinook-database/blob/master/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite"
local_file = "datasets/Chinook_Sqlite.sqlite"
conn = sqlite3.connect(local_file)
query = "SELECT * from Customer"
df_customers = pd.read_sql_query(query, conn)
conn.close()
print(df_customers.head())
url_html = "https://en.wikipedia.org/wiki/List_of_countries_by_GDP_(nominal)"
tables = pd.read_html(url_html)
gdp_table = tables[2]
print(gdp_table.head())
데이터저장
data.to_csv('processed_wine_data.csv', index=False)
data.to_excel('processed_wine_data.xlsx', index=False)
data.to_json('processed_wine_data.json', orient='records')
import sqlite3
conn = sqlite3.connect('wine_data.db')
data.to_sql('wine_quality', conn, if_exists='replace', index=False)
conn.close()