环境:Python 3.6.4 |Anaconda, Inc.
Python常用容器类型
1.list
1 l = [1, \'a\', 2, \'b\'] 2 print(type(l)) 3 print(\'修改前:\', l) 4 5 # 修改list的内容 6 l[0] = 3 7 print(\'修改后:\', l) 8 9 # 末尾添加元素 10 l.append(4) 11 print(\'添加后:\', l) 12 13 # 遍历list 14 print(\'遍历list(for循环):\') 15 for item in l: 16 print(item) 17 18 # 通过索引遍历list 19 print(\'遍历list(while循环):\') 20 i = 0 21 while i != len(l): 22 print(l[i]) 23 i += 1 24 25 # 列表合并 26 print(\'列表合并(+):\', [1, 2] + [3, 4]) 27 28 # 列表重复 29 print(\'列表重复(*):\', [1, 2] * 5) 30 31 # 判断元素是否在列表中 32 print(\'判断元素存在(in):\', 1 in [1, 2])
<class \'list\'> 修改前: [1, \'a\', 2, \'b\'] 修改后: [3, \'a\', 2, \'b\'] 添加后: [3, \'a\', 2, \'b\', 4] 遍历list(for循环): 3 a 2 b 4 遍历list(while循环): 3 a 2 b 4 列表合并(+): [1, 2, 3, 4] 列表重复(*): [1, 2, 1, 2, 1, 2, 1, 2, 1, 2] 判断元素存在(in): True
2.tuple
1 t = (1, \'a\', 2, \'b\') 2 print(type(t)) 3 4 #元组的内容不能修改,否则会报错 5 # t[0] = 3 6 7 # 遍历tuple 8 print(\'遍历list(for循环):\') 9 for item in t: 10 print(item) 11 12 # 通过索引遍历tuple 13 print(\'遍历tuple(while循环):\') 14 i = 0 15 while i != len(t): 16 print(t[i]) 17 i += 1 18 19 # 解包 unpack 20 a, b, _, _ = t 21 print(\'unpack: \', c) 22 23 # 确保unpack接收的变量个数和tuple的长度相同,否则报错 24 # 经常出现在函数返回值的赋值时 25 # a, b, c = t
3.dictionary
1 d = {\'小象学院\': \'http://www.chinahadoop.cn/\', 2 \'百度\': \'https://www.baidu.com/\', 3 \'阿里巴巴\': \'https://www.alibaba.com/\', 4 \'腾讯\': \'https://www.tencent.com/\'} 5 6 print(\'通过key获取value: \', d[\'小象学院\']) 7 8 # 遍历key 9 print(\'遍历key: \') 10 for key in d.keys(): 11 print(key) 12 13 # 遍历value 14 print(\'遍历value: \') 15 for value in d.values(): 16 print(value) 17 18 # 遍历item 19 print(\'遍历item: \') 20 for key, value in d.items(): 21 print(key + \': \' + value) 22 23 # format输出格式 24 print(\'format输出格式:\') 25 for key, value in d.items(): 26 print(\'{}的网址是{}\'.format(key, value))
通过key获取value: http://www.chinahadoop.cn/ 遍历key: 小象学院 百度 阿里巴巴 腾讯 遍历value: http://www.chinahadoop.cn/ https://www.baidu.com/ https://www.alibaba.com/ https://www.tencent.com/ 遍历item: 小象学院: http://www.chinahadoop.cn/ 百度: https://www.baidu.com/ 阿里巴巴: https://www.alibaba.com/ 腾讯: https://www.tencent.com/ format输出格式: 小象学院的网址是http://www.chinahadoop.cn/ 百度的网址是https://www.baidu.com/ 阿里巴巴的网址是https://www.alibaba.com/ 腾讯的网址是https://www.tencent.com/
4.set
1 print(\'创建set:\') 2 my_set = {1, 2, 3} 3 print(my_set) 4 my_set = set([1, 2, 3, 2]) 5 print(my_set) 6 7 print(\'添加单个元素:\') 8 my_set.add(3) 9 print(\'添加3\', my_set) 10 11 my_set.add(4) 12 print(\'添加4\', my_set) 13 14 print(\'添加多个元素:\') 15 my_set.update([4, 5, 6]) 16 print(my_set)
创建set:
{1, 2, 3}
{1, 2, 3}
添加单个元素:
添加3 {1, 2, 3}
添加4 {1, 2, 3, 4}
添加多个元素:
{1, 2, 3, 4, 5, 6}
5.Counter
- 初始化
1 import collections 2 3 c1 = collections.Counter([\'a\', \'b\', \'c\', \'a\', \'b\', \'b\']) 4 c2 = collections.Counter({\'a\':2, \'b\':3, \'c\':1}) 5 c3 = collections.Counter(a=2, b=3, c=1) 6 7 print(c1) 8 print(c2) 9 print(c3)
Counter({\'b\': 3, \'a\': 2, \'c\': 1})
Counter({\'b\': 3, \'a\': 2, \'c\': 1})
Counter({\'b\': 3, \'a\': 2, \'c\': 1})
- 更新内容
1 # 注意这里是做“加法”,不是“替换” 2 c1.update({\'a\': 4, \'c\': -2, \'d\': 4}) 3 print(c1)
Counter({\'a\': 6, \'d\': 4, \'b\': 3, \'c\': -1})
- 访问内容
1 print(\'a=\', c1[\'a\']) 2 print(\'b=\', c1[\'b\']) 3 # 对比和dict的区别 4 print(\'e=\', c1[\'e\'])
a= 6 b= 3 e= 0
- element()方法
1 for element in c1.elements(): 2 print(element)
d d d d b b b a a a a a a
- most_common()方法
1 c1.most_common(3) 2 [(\'a\', 6), (\'d\', 4), (\'b\', 3)]
6.defaultdict
1 # 统计每个字母出现的次数 2 s = \'chinadoop\' 3 4 # 使用Counter 5 print(collections.Counter(s))
Counter({\'o\': 2, \'d\': 1, \'c\': 1, \'p\': 1, \'a\': 1, \'n\': 1, \'h\': 1, \'i\': 1})
1 # 使用dict 2 counter = {} 3 for c in s: 4 if c not in counter: 5 counter[c] = 1 6 else: 7 counter[c] += 1 8 9 print(counter.items())
dict_items([(\'d\', 1), (\'c\', 1), (\'p\', 1), (\'a\', 1), (\'o\', 2), (\'n\', 1), (\'h\', 1), (\'i\', 1)])
1 # 使用defaultdict 2 counter2 = collections.defaultdict(int) 3 for c in s: 4 counter2[c] += 1 5 print(counter2.items())
dict_items([(\'d\', 1), (\'c\', 1), (\'p\', 1), (\'a\', 1), (\'o\', 2), (\'n\', 1), (\'h\', 1), (\'i\', 1)])
1 # 记录相同元素的列表 2 colors = [(\'yellow\', 1), (\'blue\', 2), (\'yellow\', 3), (\'blue\', 4), (\'red\', 1)] 3 d = collections.defaultdict(list) 4 for k, v in colors: 5 d[k].append(v) 6 7 print(d.items())
dict_items([(\'blue\', [2, 4]), (\'yellow\', [1, 3]), (\'red\', [1])])
7.map函数
1 import math 2 3 print(\'示例1,获取两个列表对应位置上的最小值:\') 4 l1 = [1, 3, 5, 7, 9] 5 l2 = [2, 4, 6, 6, 9] 6 mins = map(min, l1, l2) 7 print(mins) 8 9 # map()函数操作时,直到访问数据时才会执行 10 for item in mins: 11 print(item) 12 13 print(\'示例2,对列表中的元素进行平方根操作:\') 14 squared = map(math.sqrt, l2) 15 print(squared) 16 print(list(squared))
示例1,获取两个列表对应位置上的最小值: <map object at 0x0000019AF8B0CDD8> 1 3 5 6 9 示例2,对列表中的元素进行平方根操作: <map object at 0x0000019AF8A79DD8> [1.4142135623730951, 2.0, 2.449489742783178, 2.449489742783178, 3.0]
8.匿名函数lambda
1 # my_func = lambda a, b, c: a * b 2 # print(my_func) 3 # print(my_func(1, 2, 3)) 4 5 # 结合map 6 print(\'lambda结合map\') 7 l1 = [1, 3, 5, 7, 9] 8 l2 = [2, 4, 6, 8, 10] 9 result = map(lambda x, y: x * 2 + y, l1, l2) 10 print(list(result))
lambda结合map [4, 10, 16, 22, 28]
9.python操作csv数据文件
1 import csv 2 3 with open(\'grades.csv\') as csvfile: 4 grades_data = list(csv.DictReader(csvfile)) 5 6 print(\'记录个数:\', len(grades_data)) 7 print(\'前2条记录:\', grades_data[:2]) 8 print(\'列名:\', list(grades_data[0].keys()))
记录个数: 2315 前2条记录: [OrderedDict([(\'student_id\', \'B73F2C11-70F0-E37D-8B10-1D20AFED50B1\'), (\'assignment1_grade\', \'92.73394640624123\'), (\'assignment1_submission\', \'2015-11-02 06:55:34.282000000\'), (\'assignment2_grade\', \'83.03055176561709\'), (\'assignment2_submission\', \'2015-11-09 02:22:58.938000000\'), (\'assignment3_grade\', \'67.16444141249367\'), (\'assignment3_submission\', \'2015-11-12 08:58:33.998000000\'), (\'assignment4_grade\', \'53.01155312999494\'), (\'assignment4_submission\', \'2015-11-16 01:21:24.663000000\'), (\'assignment5_grade\', \'47.710397816995446\'), (\'assignment5_submission\', \'2015-11-20 13:24:59.692000000\'), (\'assignment6_grade\', \'38.16831825359636\'), (\'assignment6_submission\', \'2015-11-22 18:31:15.934000000\')]), OrderedDict([(\'student_id\', \'98A0FAE0-A19A-13D2-4BB5-CFBFD94031D1\'), (\'assignment1_grade\', \'86.79082085792986\'), (\'assignment1_submission\', \'2015-11-29 14:57:44.429000000\'), (\'assignment2_grade\', \'86.29082085792986\'), (\'assignment2_submission\', \'2015-12-06 17:41:18.449000000\'), (\'assignment3_grade\', \'69.7726566863439\'), (\'assignment3_submission\', \'2015-12-10 08:54:55.904000000\'), (\'assignment4_grade\', \'55.0981253490751\'), (\'assignment4_submission\', \'2015-12-13 17:32:30.941000000\'), (\'assignment5_grade\', \'49.5883128141676\'), (\'assignment5_submission\', \'2015-12-19 23:26:39.285000000\'), (\'assignment6_grade\', \'44.62948153275085\'), (\'assignment6_submission\', \'2015-12-21 17:07:24.275000000\')])] 列名: [\'student_id\', \'assignment1_grade\', \'assignment1_submission\', \'assignment2_grade\', \'assignment2_submission\', \'assignment3_grade\', \'assignment3_submission\', \'assignment4_grade\', \'assignment4_submission\', \'assignment5_grade\', \'assignment5_submission\', \'assignment6_grade\', \'assignment6_submission\']
1 avg_assign1 = sum([float(row[\'assignment1_grade\']) for row in grades_data]) / len(grades_data) 2 print(\'assignment1平均分数:\', avg_assign1)
assignment1平均分数: 74.5357320747794
1 assign1_sub_month = set(row[\'assignment1_submission\'][:7] for row in grades_data) 2 print(assign1_sub_month)
{\'2016-02\', \'2015-09\', \'2016-01\', \'2016-04\', \'2016-03\', \'2016-06\', \'2016-08\', \'2015-10\', \'2016-05\', \'2016-07\', \'2015-12\', \'2015-11\'}
科学计算库NumPy
1 import numpy as np
1. 创建Array
1 my_list = [1, 2, 3] 2 x = np.array(my_list) 3 4 print(\'列表:\', my_list) 5 print(\'Array: \', x)
列表: [1, 2, 3] Array: [1 2 3]
1 np.array([1, 2, 3]) - np.array([4, 5, 6])
array([-3, -3, -3])
1 m = np.array([[1, 2, 3], [4, 5, 6]]) 2 print(m) 3 print(\'shape: \', m.shape)
[[1 2 3] [4 5 6]] shape: (2, 3)
1 n = np.arange(0, 30, 2) 2 print(n)
[ 0 2 4 6 8 10 12 14 16 18 20 22 24 26 28]
1 n = n.reshape(3, 5) 2 print(\'reshape后: \') 3 print(n)
reshape后: [[ 0 2 4 6 8] [10 12 14 16 18] [20 22 24 26 28]]
1 print(\'ones:\n\', np.ones((3, 2))) 2 print(\'zeros:\n\', np.zeros((3, 2))) 3 print(\'eye:\n\', np.eye(3)) 4 print(\'diag:\n\', np.diag(my_list))
ones: [[1. 1.] [1. 1.] [1. 1.]] zeros: [[0. 0.] [0. 0.] [0. 0.]] eye: [[1. 0. 0.] [0. 1. 0.] [0. 0. 1.]] diag: [[1 0 0] [0 2 0] [0 0 3]]
1 print(\'*操作:\n\', np.array([1, 2, 3] * 3)) 2 print(\'repeat:\n\', np.repeat([1, 2, 3], 3))
*操作: [1 2 3 1 2 3 1 2 3] repeat: [1 1 1 2 2 2 3 3 3]
1 p1 = np.ones((3, 3)) 2 p2 = np.arange(9).reshape(3, 3) 3 print(\'纵向叠加: \n\', np.vstack((p1, p2))) 4 print(\'横向叠加: \n\', np.hstack((p1, p2)))
纵向叠加: [[ 1. 1. 1.] [ 1. 1. 1.] [ 1. 1. 1.] [ 0. 1. 2.] [ 3. 4. 5.] [ 6. 7. 8.]] 横向叠加: [[ 1. 1. 1. 0. 1. 2.] [ 1. 1. 1. 3. 4. 5.] [ 1. 1. 1. 6. 7. 8.]]
2. Array操作
1 p1 = np.array([[1, 1, 1], [1, 1, 1],[1,1,1]]) 2 p2 = np.arange(9).reshape(3, 3)3 print(\'p1: \n\', p1) 4 print(\'p2: \n\', p2) 5 6 print(\'p1 + p2 = \n\', p1 + p2) 7 print(\'p1 * p2 = \n\', p1 * p2) 8 print(\'p2^2 = \n\', p2 ** 2) 9 print(\'p1.p2 = \n\', p1.dot(p2))
p1: [[1 1 1] [1 1 1] [1 1 1]] p2: [[0 1 2] [3 4 5] [6 7 8]] p1 + p2 = [[1 2 3] [4 5 6] [7 8 9]] p1 * p2 = [[0 1 2] [3 4 5] [6 7 8]] p2^2 = [[ 0 1 4] [ 9 16 25] [36 49 64]] p1.p2 = [[ 9 12 15] [ 9 12 15] [ 9 12 15]]
1 p3 = np.arange(6).reshape(2, 3) 2 print(\'p3形状: \', p3.shape) 3 print(p3) 4 p4 = p3.T 5 print(\'转置后p3形状: \', p4.shape) 6 print(p4)
p3形状: (2, 3) [[0 1 2] [3 4 5]] 转置后p3形状: (3, 2) [[0 3] [1 4] [2 5]]
1 p3 = np.arange(6).reshape(2, 3) 2 print(\'p3数据类型:\', p3.dtype) 3 print(p3) 4 5 p5 = p3.astype(\'float\') 6 print(\'p5数据类型:\', p5.dtype) 7 print(p5)
p3数据类型: int32 [[0 1 2] [3 4 5]] p5数据类型: float64 [[0. 1. 2.] [3. 4. 5.]]
a = np.array([-4, -2, 1, 3, 5]) print(\'sum: \', a.sum()) print(\'min: \', a.min()) print(\'max: \', a.max()) print(\'mean: \', a.mean()) print(\'std: \', a.std()) //标准差 print(\'argmax: \', a.argmax()) //argmax(f(x))是使得 f(x)取得最大值所对应的变量x print(\'argmin: \', a.argmin()) //argmax(f(x))是使得 f(x)取得最小值所对应的变量x
sum: 3 min: -4 max: 5 mean: 0.6 std: 3.2619012860600183 argmax: 4 argmin: 0
3. 索引与切片
1 # 一维array 2 s = np.arange(13) ** 2 3 print(\'s: \', s) 4 print(\'s[0]: \', s[0]) 5 print(\'s[4]: \', s[4]) 6 print(\'s[0:3]: \', s[0:3]) 7 print(\'s[[0, 2, 4]]: \', s[[0, 2, 4]])
s: [ 0 1 4 9 16 25 36 49 64 81 100 121 144] s[0]: 0 s[4]: 16 s[0:3]: [0 1 4] s[[0, 2, 4]]: [ 0 4 16]
1 # 二维array 2 r = np.arange(36).reshape((6, 6)) 3 print(\'r: \n\', r) 4 print(\'r[2, 2]: \n\', r[2, 2]) //对应矩阵第三行第三列 5 print(\'r[3, 3:6]: \n\', r[3, 3:6]) //对应第四行第四列到第7列的数(只表示该行的数)
r: [[ 0 1 2 3 4 5] [ 6 7 8 9 10 11] [12 13 14 15 16 17] [18 19 20 21 22 23] [24 25 26 27 28 29] [30 31 32 33 34 35]] r[2, 2]: 14 r[3, 3:6]: [21 22 23]
1 r = np.arange(36).reshape((6, 6)) 2 r > 30
array([[False, False, False, False, False, False],
[False, False, False, False, False, False],
[False, False, False, False, False, False],
[False, False, False, False, False, False],
[False, False, False, False, False, False],
[False, True, True, True, True, True]])
1 # 过滤 2 print(r[r > 30]) 3 4 # 将大于30的数赋值为30 5 r[r > 30] = 30 6 print(r)
[31 32 33 34 35] [[ 0 1 2 3 4 5] [ 6 7 8 9 10 11] [12 13 14 15 16 17] [18 19 20 21 22 23] [24 25 26 27 28 29] [30 30 30 30 30 30]]
1 # copy()操作 2 r2 = r[:3, :3] 3 print(r2)
[[ 0 1 2] [ 6 7 8] [12 13 14]]
1 # 将r2内容设置为0 2 r2[:] = 0 3 4 # 查看r的内容 5 print(r)
[[ 0 0 0 3 4 5] [ 0 0 0 9 10 11] [ 0 0 0 15 16 17] [18 19 20 21 22 23] [24 25 26 27 28 29] [30 30 30 30 30 30]]
1 r3 = r.copy() 2 r3[:] = 0 3 print(r)
[[ 0 0 0 3 4 5] [ 0 0 0 9 10 11] [ 0 0 0 15 16 17] [18 19 20 21 22 23] [24 25 26 27 28 29] [30 30 30 30 30 30]]
4. 遍历 Array
1 import numpy as np 2 t = np.random.randint(0, 10, (4, 3)) 3 print(t)
[[3 2 7] [4 9 1] [1 3 0] [0 9 1]]
1 for row in t: 2 print(row)
[3 2 7] [4 9 1] [1 3 0] [0 9 1]
1 # 使用enumerate() 2 for i, row in enumerate(t): 3 print(\'row {} is {}\'.format(i, row))
row 0 is [3 2 7] row 1 is [4 9 1] row 2 is [1 3 0] row 3 is [0 9 1]
1 t2 = t ** 2 2 print(t2)
[[ 9 4 49] [16 81 1] [ 1 9 0] [ 0 81 1]]
1 # 使用zip对两个array进行遍历计算 2 for i, j in zip(t, t2): 3 print(\'{} + {} = {}\'.format(i, j, i + j))
[3 2 7] + [ 9 4 49] = [12 6 56] [4 9 1] + [16 81 1] = [20 90 2] [1 3 0] + [1 9 0] = [ 2 12 0] [0 9 1] + [ 0 81 1] = [ 0 90 2]