Pandas中read_csv()方法参数用法介绍之一

import pandas as pd
from io import StringIO
data = data = ('col1,col2,col3\na,b,1\na,b,2\nc,d,3')
d = pd.read_csv(StringIO(data))

# usecols 过滤列，筛选将要使用的列使用此参数可以大大加快解析时间并降低内存使用量。
d = pd.read_csv(StringIO(data), usecols=lambda x: x.upper() in ['COL1', 'COL3'])

# skiprows 跨行筛选数据
d = pd.read_csv(StringIO(data), skiprows=lambda x: x % 2 != 0)
# print(d)

Pandas中read_csv()方法参数用法介绍之一

# 指定列数据类型
import numpy as np
data = ('a,b,c,d\n1,2,3,4\n5,6,7,8\n9,10,11')
df = pd.read_csv(StringIO(data), dtype=object)
print(df)
df = pd.read_csv(StringIO(data), dtype={'b': object, 'c': np.float64, 'd': 'string'})
type = df.dtypes
print(type)

Pandas中read_csv()方法参数用法介绍之一

# 转换器参数，加载数据时进行转换
data = ("col_1\n1\n2\n'A'\n4.22")
df = pd.read_csv(StringIO(data), converters={'col_1': str})
print(df)
r = df['col_1'].apply(type).value_counts()
print(r)

Pandas中read_csv()方法参数用法介绍之一

# 加载数据时，强制类型转换
df2 = pd.read_csv(StringIO(data))
df2['col_1'] = pd.to_numeric(df2['col_1'], errors='coerce')
df2['col_1'].apply(type).value_counts()

Pandas中read_csv()方法参数用法介绍之一

# 混合类型数据列（数据要达到一定数量级）
col_1 = list(range(500000)) + ['a', 'b'] + list(range(500000))

df = pd.DataFrame({'col_1': col_1})
df.to_csv('foo.csv')
mixed_df = pd.read_csv('foo.csv')
mixed_df['col_1'].apply(type).value_counts()
mixed_df['col_1'].dtype

Pandas中read_csv()方法参数用法介绍之一