import numpy as np
import pandas as pd
pd.Series() 构造数据
s = pd.Series([1, 3, 5, np.nan, 44, 1])
print(s)
# 0 1.0
# 1 3.0
# 2 5.0
# 3 NaN
# 4 44.0
# 5 1.0
# dtype: float64
pd.date_range() 生成数据
dates = pd.date_range(\'20190225\', periods=2)
print(dates)
# DatetimeIndex([\'2019-02-25\', \'2019-02-26\'], dtype=\'datetime64[ns]\', freq=\'D\')
pd.DataFrame() 构造数据
df = pd.DataFrame(np.random.randn(2, 4), index=dates, columns=[\'a\', \'b\', \'c\', \'d\'])
print(df)
# a b c d
# 2019-02-25 1.236639 -0.918432 -0.211460 1.834082
# 2019-02-26 1.191895 -1.680464 0.863866 0.171246
pd.DataFrame() 构造数据
df1 = pd.DataFrame(np.arange(12).reshape(3, 4)
print(df1)
# 0 1 2 3
# 0 0 1 2 3
# 1 4 5 6 7
# 2 8 9 10 11
pd.DataFrame() 构造数据
df2 = pd.DataFrame({\'A\': 1.,
\'B\': pd.Timestamp(\'20130102\'),
\'C\': pd.Series(1, index=list(range(5)), dtype=\'float32\'),
\'D\': np.array([3] * 5, dtype=\'int32\'),
\'E\': pd.Categorical(["test", "train", "test", "train", \'yzn\']),
\'F\': \'foo\'})
print(df2)
# A B C D E F
# 0 1.0 2013-01-02 1.0 3 test foo
# 1 1.0 2013-01-02 1.0 3 train foo
# 2 1.0 2013-01-02 1.0 3 test foo
# 3 1.0 2013-01-02 1.0 3 train foo
# 4 1.0 2013-01-02 1.0 3 yzn foo
属性 df2.dtypes df2.index df2.columns
df2.values df2.describe() df2.T
df.sort_index(axis=1, ascending=False) df2.sort_values(by=\'E\')
print(df2.dtypes)
# A float64
# B datetime64[ns]
# C float32
# D int32
# E category
# F object
# dtype: object
print(df2.index)
# Int64Index([0, 1, 2, 3, 4], dtype=\'int64\')
print(df2.columns)
# Index([\'A\', \'B\', \'C\', \'D\', \'E\', \'F\'], dtype=\'object\')
print(df2.values)
# [[1.0 Timestamp(\'2013-01-02 00:00:00\') 1.0 3 \'test\' \'foo\']
# [1.0 Timestamp(\'2013-01-02 00:00:00\') 1.0 3 \'train\' \'foo\']
# [1.0 Timestamp(\'2013-01-02 00:00:00\') 1.0 3 \'test\' \'foo\']
# [1.0 Timestamp(\'2013-01-02 00:00:00\') 1.0 3 \'train\' \'foo\']
# [1.0 Timestamp(\'2013-01-02 00:00:00\') 1.0 3 \'yzn\' \'foo\']]
print(df2.describe())
# A C D
# count 5.0 5.0 5.0
# mean 1.0 1.0 3.0
# std 0.0 0.0 0.0
# min 1.0 1.0 3.0
# 25% 1.0 1.0 3.0
# 50% 1.0 1.0 3.0
# 75% 1.0 1.0 3.0
# max 1.0 1.0 3.0
print(df2.T)
# 0 ... 4
# A 1 ... 1
# B 2013-01-02 00:00:00 ... 2013-01-02 00:00:00
# C 1 ... 1
# D 3 ... 3
# E test ... yzn
# F foo ... foo
# [6 rows x 5 columns]
print(df.sort_index(axis=1, ascending=False))
# d c b a
# 2019-02-25 -0.086707 0.388089 0.513976 -0.148502
# 2019-02-26 -0.237655 -0.799583 -1.722373 0.318766
print(df.sort_index(axis=0, ascending=False))
# a b c d
# 2019-02-26 -2.117756 0.453841 -2.900436 1.061481
# 2019-02-25 -0.974467 0.598005 -0.552265 -2.487490
print(df2.sort_values(by=\'E\'))
# A B C D E F
# 0 1.0 2013-01-02 1.0 3 test foo
# 2 1.0 2013-01-02 1.0 3 test foo
# 1 1.0 2013-01-02 1.0 3 train foo
# 3 1.0 2013-01-02 1.0 3 train foo
# 4 1.0 2013-01-02 1.0 3 yzn foo
END