import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
tips = sns.load_dataset("tips")
tips.head()
|
total_bill |
tip |
sex |
smoker |
day |
time |
size |
| 0 |
16.99 |
1.01 |
Female |
No |
Sun |
Dinner |
2 |
| 1 |
10.34 |
1.66 |
Male |
No |
Sun |
Dinner |
3 |
| 2 |
21.01 |
3.50 |
Male |
No |
Sun |
Dinner |
3 |
| 3 |
23.68 |
3.31 |
Male |
No |
Sun |
Dinner |
2 |
| 4 |
24.59 |
3.61 |
Female |
No |
Sun |
Dinner |
4 |
箱线图
plt.boxplot(x = tips[tips['sex']=='Female']['total_bill'])
plt.show()
sns.boxplot(x = 'total_bill', y = 'sex', data = tips)
plt.show()


散点图
plt.scatter(tips['total_bill'], tips['tip'], color='green', marker='o')
plt.title('total_bill VS tip')
plt.xlabel('total_bill')
plt.ylabel('tip')
plt.show()
sns.scatterplot(x = 'total_bill', y = 'tip', data = tips, markers= '*')
plt.title('sns___total_bill VS tip')
plt.show()


折线图
plt.plot(tips['total_bill'], color = 'red')
plt.title('plt_total_bill line')
plt.show()
plt.plot(tips['tip'], color = 'green')
plt.show()


双坐标轴
ax1=plt.subplot(111)
ax1.plot('tip', data = tips, color = 'b')
ax1.set_ylabel('tip')
ax2 = ax1.twinx()
ax2.plot('total_bill', data = tips, color = 'r')
ax2.set_ylabel('total_bill')
ax1.set_label('double label for total_bill and tip')

柱状图
plt.bar(height = tips['tip'], x = tips['day'], color = 'g')
plt.show()
sns.barplot(y = 'tip', x = 'day', data = tips,estimator= np.sum, palette='Blues_d')
plt.title('the total tip of each day')
plt.show()
ax = sns.barplot(x = 'time', y = 'tip', data = tips,order = ['Dinner', 'Lunch'],estimator= np.sum)
plt.title('the total tip of each time order by ---Dinner, Luach')
plt.show()
sns.countplot('day', data = tips, palette= 'Set3')
plt.title('the frequency of day')
plt.show()

C:\Users\Administrator\software\anoconda\lib\site-packages\scipy\stats\stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval



创建带数字标签的直方图
numbers = list(range(1,11))
x = np.array(numbers)
y = np.array([a**2 for a in numbers])
plt.bar(x,y,width=0.5,align='center',color='c')
plt.title('Square Numbers',fontsize=24)
plt.xlabel('Value',fontsize=14)
plt.ylabel('Square of Value',fontsize=14)
plt.tick_params(axis='both',labelsize=14)
plt.axis([0,11,0,110])
for a,b in zip(x,y):
plt.text(a,b+0.1,'%.0f'%b,ha = 'center',va = 'bottom',fontsize=7)
plt.show()

input_values = [1,2,3,4,5,6]
squares = [1,4,9,16,25,36]
plt.plot(input_values,squares,linewidth=5)
x = np.array(input_values)
y = np.array(squares)
for a,b in zip(x,y):
plt.annotate('(%s,%s)'%(a,b),xy=(a,b),xytext=(-20,10),
textcoords='offset points')
plt.title('Square Numbers',fontsize=24)
plt.xlabel('Value',fontsize=14)
plt.ylabel('Square of Value',fontsize=14)
plt.tick_params(axis='both',labelsize=14)
plt.show()

day = tips.groupby('day', as_index = False)['tip'].sum()
day.sort_values(by = 'tip',ascending=False, inplace=True)
day
res = plt.bar(height = day['tip'], x = day['day'], color = 'g')
plt.title('the total tip of each day')
for r in res:
b = r.get_height()
# plt.text(r.get_x()+ r.get_width()/2, r.get_height(), '%.0f'r.get_height(), ha='center', fontsize=7 )
plt.text(r.get_x()+ r.get_width()/2, r.get_height(),'%.0f'%b,ha = 'center',va = 'bottom',fontsize=20)
plt.ylim(0,300)
plt.show()
