Data Science
여러 데이터 쉽게 불러오기
태지쌤
2023. 1. 31. 20:00
반응형
import pandas as pd
from time import time
# Load data (pandas version)
start = time()
train = pd.read_csv('./data/transactions_train.csv')
end = time()
print(f"데이터 로드 시간 : {end-start}초")
train
# Check memory usage
mem_usage = train.memory_usage(deep=True).sum() / 1024 / 1024 / 1024
print(f"Memory Usage : {mem_usage:.4} GiB")
# parquet로 다시 만들어봅시다. 데이터의 일부만 가져옵니다.
part = pd.read_csv('./Downloads/transactions_train.csv',
nrows = 1000)
part2 = pd.read_csv('./Downloads/transactions_train.csv',
usecols=['t_dat','sales_channel_id'])
part2
sales = part["sales_channel_id"].value_count() * 0
for chunk in pd.read_csv('./Downloads/transactions_train.csv',
chunksize=3000000):
print(chunk["sales_channel_id"].value_counts())
sales = sales + chunk["sales_channel_id"].value_counts()
sales
train.loc[train["t_dat"] > '2020-06-01']반응형
