附代码:
import pandas as pd
beer = pd.read_csv('E:\\1\\000518.csv',sep=',')
In [3]:
beer
Out[3]:
| open | close | high | low | volume | money | |
|---|---|---|---|---|---|---|
| 0 | 2.26 | 2.25 | 2.26 | 2.25 | 64047.0 | 144526.0 |
| 1 | 2.25 | 2.25 | 2.26 | 2.25 | 7020.0 | 15805.0 |
| 2 | 2.25 | 2.24 | 2.25 | 2.24 | 52599.0 | 118287.0 |
| 3 | 2.24 | 2.24 | 2.25 | 2.24 | 39098.0 | 87629.0 |
| 4 | 2.24 | 2.24 | 2.24 | 2.23 | 45794.0 | 102386.0 |
| 5 | 2.24 | 2.24 | 2.24 | 2.24 | 52923.0 | 118635.0 |
| 6 | 2.24 | 2.25 | 2.25 | 2.24 | 33374.0 | 74939.0 |
| 7 | 2.25 | 2.25 | 2.25 | 2.24 | 22465.0 | 50455.0 |
| 8 | 2.24 | 2.25 | 2.25 | 2.24 | 72471.0 | 162653.0 |
| 9 | 2.25 | 2.25 | 2.26 | 2.24 | 20845.0 | 46884.0 |
| 10 | 2.26 | 2.26 | 2.26 | 2.25 | 3888.0 | 8774.0 |
| 11 | 2.26 | 2.25 | 2.26 | 2.25 | 34886.0 | 78752.0 |
| 12 | 2.26 | 2.26 | 2.26 | 2.26 | 3348.0 | 7564.0 |
| 13 | 2.26 | 2.26 | 2.26 | 2.26 | 5184.0 | 11712.0 |
| 14 | 2.26 | 2.26 | 2.26 | 2.26 | 7236.0 | 16348.0 |
| 15 | 2.26 | 2.26 | 2.26 | 2.26 | 5400.0 | 12200.0 |
| 16 | 2.26 | 2.26 | 2.26 | 2.26 | 60267.0 | 136152.0 |
| 17 | 2.26 | 2.26 | 2.26 | 2.26 | 4320.0 | 9760.0 |
| 18 | 2.26 | 2.26 | 2.26 | 2.26 | 6588.0 | 14884.0 |
| 19 | 2.26 | 2.26 | 2.26 | 2.26 | 33482.0 | 75640.0 |
| 20 | 2.26 | 2.26 | 2.26 | 2.26 | 12745.0 | 28792.0 |
| 21 | 2.26 | 2.25 | 2.26 | 2.25 | 35642.0 | 80492.0 |
| 22 | 2.26 | 2.26 | 2.26 | 2.26 | 90184.0 | 203740.0 |
| 23 | 2.25 | 2.26 | 2.26 | 2.25 | 24841.0 | 55893.0 |
| 24 | 2.26 | 2.25 | 2.26 | 2.25 | 1620.0 | 3652.0 |
| 25 | 2.25 | 2.26 | 2.26 | 2.25 | 6804.0 | 15314.0 |
| 26 | 2.25 | 2.26 | 2.26 | 2.25 | 11881.0 | 26740.0 |
| 27 | 2.25 | 2.26 | 2.26 | 2.25 | 24409.0 | 55024.0 |
| 28 | 2.25 | 2.26 | 2.26 | 2.25 | 3780.0 | 8525.0 |
| 29 | 2.25 | 2.26 | 2.26 | 2.25 | 41366.0 | 93233.0 |
| ... | ... | ... | ... | ... | ... | ... |
| 869850 | 3.28 | 3.28 | 3.28 | 3.28 | 300.0 | 984.0 |
| 869851 | 3.28 | 3.28 | 3.28 | 3.28 | 0.0 | 0.0 |
| 869852 | 3.29 | 3.29 | 3.29 | 3.29 | 100.0 | 329.0 |
| 869853 | 3.29 | 3.28 | 3.29 | 3.28 | 17200.0 | 56546.0 |
| 869854 | 3.29 | 3.28 | 3.29 | 3.28 | 1400.0 | 4597.0 |
| 869855 | 3.29 | 3.29 | 3.29 | 3.29 | 200.0 | 658.0 |
| 869856 | 3.28 | 3.29 | 3.29 | 3.28 | 4100.0 | 13459.0 |
| 869857 | 3.29 | 3.28 | 3.29 | 3.28 | 1400.0 | 4604.0 |
| 869858 | 3.28 | 3.28 | 3.28 | 3.28 | 0.0 | 0.0 |
| 869859 | 3.28 | 3.28 | 3.28 | 3.28 | 15200.0 | 49856.0 |
| 869860 | 3.28 | 3.28 | 3.28 | 3.28 | 5600.0 | 18368.0 |
| 869861 | 3.28 | 3.29 | 3.29 | 3.28 | 2300.0 | 7552.0 |
| 869862 | 3.29 | 3.29 | 3.29 | 3.29 | 0.0 | 0.0 |
| 869863 | 3.28 | 3.28 | 3.28 | 3.28 | 2000.0 | 6560.0 |
| 869864 | 3.29 | 3.28 | 3.29 | 3.28 | 9200.0 | 30245.0 |
| 869865 | 3.28 | 3.28 | 3.28 | 3.28 | 500.0 | 1640.0 |
| 869866 | 3.28 | 3.28 | 3.28 | 3.28 | 58800.0 | 192798.0 |
| 869867 | 3.28 | 3.28 | 3.28 | 3.28 | 1100.0 | 3608.0 |
| 869868 | 3.28 | 3.27 | 3.28 | 3.27 | 38600.0 | 126372.0 |
| 869869 | 3.27 | 3.27 | 3.27 | 3.27 | 27700.0 | 90579.0 |
| 869870 | 3.28 | 3.28 | 3.28 | 3.28 | 4400.0 | 14432.0 |
| 869871 | 3.28 | 3.28 | 3.28 | 3.28 | 1400.0 | 4592.0 |
| 869872 | 3.28 | 3.28 | 3.28 | 3.27 | 17100.0 | 56027.0 |
| 869873 | 3.28 | 3.28 | 3.28 | 3.28 | 31300.0 | 102729.0 |
| 869874 | 3.28 | 3.28 | 3.28 | 3.28 | 2000.0 | 6560.0 |
| 869875 | 3.28 | 3.28 | 3.28 | 3.28 | 3300.0 | 10824.0 |
| 869876 | 3.28 | 3.28 | 3.28 | 3.28 | 1300.0 | 4264.0 |
| 869877 | 3.28 | 3.28 | 3.28 | 3.28 | 0.0 | 0.0 |
| 869878 | 3.28 | 3.28 | 3.28 | 3.28 | 0.0 | 0.0 |
| 869879 | 3.28 | 3.29 | 3.29 | 3.28 | 3400.0 | 11156.0 |
869880 rows × 6 columns
In [6]:
X=beer[['open','close','high','low','volume','money']]
In [7]:
#K_means clustering
from sklearn.cluster import KMeans
km = KMeans(n_clusters=3).fit(X)
km2 = KMeans(n_clusters=2).fit(X)
In [8]:
km.labels_
Out[8]:
array([0, 0, 0, ..., 0, 0, 0])
In [12]:
beer['close1'] = km.labels_
beer['close2']=km.labels_
beer.sort_values('close1')
Out[12]:
| open | close | high | low | volume | money | close1 | close2 | |
|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 2.26 | 2.25 | 64047.0 | 144526.0 | 0 | 0 |
| 576484 | 0 | 0 | 5.96 | 5.94 | 411400.0 | 2446780.0 | 0 | 0 |
| 576485 | 0 | 0 | 5.97 | 5.95 | 307400.0 | 1831532.0 | 0 | 0 |
| 576486 | 0 | 0 | 5.99 | 5.97 | 492800.0 | 2946828.0 | 0 | 0 |
| 576488 | 0 | 0 | 6.02 | 6.01 | 429200.0 | 2583728.0 | 0 | 0 |
| 576490 | 0 | 0 | 6.01 | 5.99 | 516800.0 | 3100452.0 | 0 | 0 |
| 576491 | 0 | 0 | 6.00 | 5.98 | 444700.0 | 2662148.0 | 0 | 0 |
| 576492 | 0 | 0 | 5.99 | 5.98 | 380300.0 | 2277268.0 | 0 | 0 |
| 576494 | 0 | 0 | 5.98 | 5.97 | 390200.0 | 2330280.0 | 0 | 0 |
| 576495 | 0 | 0 | 5.98 | 5.97 | 249300.0 | 1489820.0 | 0 | 0 |
| 576498 | 0 | 0 | 5.95 | 5.94 | 302000.0 | 1795104.0 | 0 | 0 |
| 576483 | 0 | 0 | 5.96 | 5.93 | 354600.0 | 2104872.0 | 0 | 0 |
| 576500 | 0 | 0 | 5.93 | 5.92 | 448400.0 | 2658368.0 | 0 | 0 |
| 576502 | 0 | 0 | 5.97 | 5.95 | 187800.0 | 1118504.0 | 0 | 0 |
| 576503 | 0 | 0 | 5.97 | 5.96 | 306200.0 | 1826672.0 | 0 | 0 |
| 576504 | 0 | 0 | 5.97 | 5.96 | 252900.0 | 1509208.0 | 0 | 0 |
| 576505 | 0 | 0 | 5.98 | 5.97 | 196600.0 | 1174200.0 | 0 | 0 |
| 576506 | 0 | 0 | 5.98 | 5.97 | 261900.0 | 1565096.0 | 0 | 0 |
| 576508 | 0 | 0 | 5.98 | 5.97 | 435200.0 | 2598160.0 | 0 | 0 |
| 576510 | 0 | 0 | 5.97 | 5.96 | 276400.0 | 1647848.0 | 0 | 0 |
| 576511 | 0 | 0 | 5.97 | 5.96 | 161100.0 | 960744.0 | 0 | 0 |
| 576512 | 0 | 0 | 5.97 | 5.95 | 299800.0 | 1786568.0 | 0 | 0 |
| 576513 | 0 | 0 | 5.97 | 5.95 | 139700.0 | 832264.0 | 0 | 0 |
| 576501 | 0 | 0 | 5.95 | 5.93 | 449800.0 | 2672864.0 | 0 | 0 |
| 576514 | 0 | 0 | 5.96 | 5.95 | 397000.0 | 2362656.0 | 0 | 0 |
| 576481 | 0 | 0 | 5.99 | 5.97 | 424400.0 | 2536606.0 | 0 | 0 |
| 576477 | 0 | 0 | 6.02 | 6.02 | 16700.0 | 100448.0 | 0 | 0 |
| 576425 | 0 | 0 | 5.80 | 5.79 | 192800.0 | 1117824.0 | 0 | 0 |
| 576427 | 0 | 0 | 5.81 | 5.80 | 213700.0 | 1240896.0 | 0 | 0 |
| 576428 | 0 | 0 | 5.81 | 5.80 | 235700.0 | 1368416.0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 624961 | 2 | 2 | 4.86 | 4.83 | 704100.0 | 3403838.0 | 2 | 2 |
| 150393 | 2 | 2 | 5.90 | 5.87 | 821400.0 | 4834944.0 | 2 | 2 |
| 177084 | 2 | 2 | 6.91 | 6.89 | 575700.0 | 3972896.0 | 2 | 2 |
| 177087 | 2 | 2 | 6.90 | 6.88 | 492700.0 | 3394368.0 | 2 | 2 |
| 177089 | 2 | 2 | 6.90 | 6.89 | 492200.0 | 3394752.0 | 2 | 2 |
| 285619 | 2 | 2 | 4.43 | 4.38 | 2127800.0 | 9331944.0 | 2 | 2 |
| 177104 | 2 | 2 | 6.88 | 6.86 | 485600.0 | 3339008.0 | 2 | 2 |
| 177109 | 2 | 2 | 6.88 | 6.86 | 606800.0 | 4172352.0 | 2 | 2 |
| 133974 | 2 | 2 | 6.46 | 6.43 | 512000.0 | 3302224.0 | 2 | 2 |
| 823290 | 2 | 2 | 4.33 | 4.22 | 1876800.0 | 8073984.0 | 2 | 2 |
| 177112 | 2 | 2 | 6.87 | 6.86 | 475700.0 | 3267200.0 | 2 | 2 |
| 823292 | 2 | 2 | 4.30 | 4.18 | 918000.0 | 3885136.0 | 2 | 2 |
| 624982 | 2 | 2 | 4.82 | 4.79 | 846400.0 | 4066904.0 | 2 | 2 |
| 177113 | 2 | 2 | 6.87 | 6.86 | 1610000.0 | 11050560.0 | 2 | 2 |
| 335774 | 2 | 2 | 7.87 | 7.83 | 1177600.0 | 9240112.0 | 2 | 2 |
| 177114 | 2 | 2 | 6.87 | 6.86 | 528200.0 | 3626880.0 | 2 | 2 |
| 624978 | 2 | 2 | 4.83 | 4.79 | 1828100.0 | 8784036.0 | 2 | 2 |
| 177115 | 2 | 2 | 6.87 | 6.86 | 571900.0 | 3926976.0 | 2 | 2 |
| 177116 | 2 | 2 | 6.87 | 6.86 | 541800.0 | 3720832.0 | 2 | 2 |
| 384885 | 2 | 2 | 7.08 | 7.03 | 576100.0 | 4067248.0 | 2 | 2 |
| 177119 | 2 | 2 | 6.87 | 6.87 | 1158400.0 | 7958208.0 | 2 | 2 |
| 177120 | 2 | 2 | 6.92 | 6.89 | 1121600.0 | 7734116.0 | 2 | 2 |
| 177121 | 2 | 2 | 6.91 | 6.86 | 618700.0 | 4261458.0 | 2 | 2 |
| 285642 | 2 | 2 | 4.39 | 4.38 | 839800.0 | 3681336.0 | 2 | 2 |
| 624966 | 2 | 2 | 4.85 | 4.79 | 1627700.0 | 7836872.0 | 2 | 2 |
| 150396 | 2 | 2 | 5.89 | 5.85 | 599000.0 | 3517232.0 | 2 | 2 |
| 150395 | 2 | 2 | 5.89 | 5.87 | 570800.0 | 3357552.0 | 2 | 2 |
| 150394 | 2 | 2 | 5.90 | 5.87 | 1019900.0 | 5993008.0 | 2 | 2 |
| 624979 | 2 | 2 | 4.80 | 4.77 | 854100.0 | 4084476.0 | 2 | 2 |
| 341983 | 2 | 2 | 7.05 | 7.04 | 707300.0 | 4988704.0 | 2 | 2 |
869880 rows × 8 columns
In [16]:
from pandas.plotting import scatter_matrix
%matplotlib inline
close_centers = km.cluster_centers_
close_centers_2 = km2.cluster_centers_
In [22]:
beer.groupby("close").mean()
Out[22]:
| open | high | low | volume | money | close1 | close2 | |
|---|---|---|---|---|---|---|---|
| close | |||||||
| 0 | 0 | 4.678545 | 4.669329 | 7.491448e+04 | 3.665092e+05 | 0 | 0 |
| 1 | 1 | 7.283650 | 7.205276 | 6.855841e+06 | 4.644045e+07 | 1 | 1 |
| 2 | 2 | 6.804501 | 6.773411 | 9.541442e+05 | 6.112502e+06 | 2 | 2 |
In [20]:
beer.groupby("close2").mean()
Out[20]:
| open | close | high | low | volume | money | close1 | |
|---|---|---|---|---|---|---|---|
| close2 | |||||||
| 0 | 0 | 0 | 4.678545 | 4.669329 | 7.491448e+04 | 3.665092e+05 | 0 |
| 1 | 1 | 1 | 7.283650 | 7.205276 | 6.855841e+06 | 4.644045e+07 | 1 |
| 2 | 2 | 2 | 6.804501 | 6.773411 | 9.541442e+05 | 6.112502e+06 | 2 |
In [33]:
centers = beer.groupby("close").mean().reset_index()
In [25]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams['font.size'] = 14
In [26]:
import numpy as np
colors = np.array(['red','green','blue','yellow'])
In [34]:
plt.scatter(beer['open'],beer['high'],c = colors[beer['close']])
plt.scatter(centers.open,centers.high,linewidths=3,marker='+',s=300,c='black')
plt.xlabel("open")
plt.ylabel("high")
Out[34]:
Text(0, 0.5, 'high')
In [35]:
scatter_matrix(beer[["close","open","high","low"]],s=100,alpha=1,c=colors[beer["close"]],figsize=(10,10))
plt.suptitle("With 3 centroids initialized")
Out[35]:
Text(0.5, 0.98, 'With 3 centroids initialized')
In [39]:
#scaled data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_scaled
Out[39]:
array([[-1.24466895, -1.24971352, -1.24622056, -1.24811754, -0.14123794,
-0.24382383],
[-1.24970706, -1.24971352, -1.24622056, -1.24811754, -0.36183077,
-0.32424962],
[-1.24970706, -1.2547516 , -1.25125501, -1.25315928, -0.18552129,
-0.26021814],
...,
[-0.73078238, -0.73079179, -0.73270656, -0.72881834, -0.38898565,
-0.33412469],
[-0.73078238, -0.73079179, -0.73270656, -0.72881834, -0.38898565,
-0.33412469],
[-0.73078238, -0.72575372, -0.72767211, -0.72881834, -0.37583371,
-0.32715434]])
In [40]:
km = KMeans(n_clusters=3).fit(X_scaled)
In [41]:
beer["scaled_cluster"] = km.labels_
beer.sort_values("scaled_cluster")
Out[41]:
| open | close | high | low | volume | money | close1 | close2 | scaled_cluster | |
|---|---|---|---|---|---|---|---|---|---|
| 667794 | 0 | 0 | 6.79 | 6.79 | 9300.0 | 63147.0 | 0 | 0 | 0 |
| 644428 | 0 | 0 | 6.96 | 6.87 | 71100.0 | 492974.0 | 0 | 0 | 0 |
| 644429 | 0 | 0 | 6.94 | 6.89 | 144600.0 | 998647.0 | 0 | 0 | 0 |
| 644430 | 0 | 0 | 6.89 | 6.86 | 152000.0 | 1045036.0 | 0 | 0 | 0 |
| 644431 | 0 | 0 | 6.87 | 6.85 | 115000.0 | 788371.0 | 0 | 0 | 0 |
| 644432 | 0 | 0 | 6.86 | 6.84 | 188200.0 | 1287814.0 | 0 | 0 | 0 |
| 644433 | 0 | 0 | 6.87 | 6.84 | 136400.0 | 934928.0 | 0 | 0 | 0 |
| 644434 | 0 | 0 | 6.88 | 6.86 | 294500.0 | 2025171.0 | 0 | 0 | 0 |
| 644435 | 0 | 0 | 6.90 | 6.89 | 82400.0 | 568002.0 | 0 | 0 | 0 |
| 644436 | 0 | 0 | 6.90 | 6.88 | 133900.0 | 923330.0 | 0 | 0 | 0 |
| 644437 | 0 | 0 | 6.90 | 6.88 | 90400.0 | 622793.0 | 0 | 0 | 0 |
| 644438 | 0 | 0 | 6.89 | 6.87 | 150600.0 | 1036431.0 | 0 | 0 | 0 |
| 644439 | 0 | 0 | 6.90 | 6.89 | 65700.0 | 452861.0 | 0 | 0 | 0 |
| 644440 | 0 | 0 | 6.90 | 6.88 | 48600.0 | 335217.0 | 0 | 0 | 0 |
| 644441 | 0 | 0 | 6.93 | 6.89 | 99100.0 | 684674.0 | 0 | 0 | 0 |
| 644442 | 0 | 0 | 6.95 | 6.92 | 74300.0 | 515826.0 | 0 | 0 | 0 |
| 644443 | 0 | 0 | 6.95 | 6.93 | 70300.0 | 487831.0 | 0 | 0 | 0 |
| 644444 | 0 | 0 | 6.94 | 6.92 | 162100.0 | 1123415.0 | 0 | 0 | 0 |
| 644445 | 0 | 0 | 6.94 | 6.91 | 217100.0 | 1503263.0 | 0 | 0 | 0 |
| 644446 | 0 | 0 | 6.96 | 6.92 | 250700.0 | 1740438.0 | 0 | 0 | 0 |
| 644447 | 2 | 2 | 7.05 | 6.94 | 539600.0 | 3774772.0 | 2 | 2 | 0 |
| 644448 | 0 | 0 | 7.10 | 7.00 | 427200.0 | 3024468.0 | 0 | 0 | 0 |
| 644427 | 0 | 0 | 6.88 | 6.84 | 343700.0 | 2356016.0 | 0 | 0 | 0 |
| 644449 | 0 | 0 | 7.07 | 7.01 | 162300.0 | 1140414.0 | 0 | 0 | 0 |
| 644426 | 0 | 0 | 6.86 | 6.84 | 236600.0 | 1621721.0 | 0 | 0 | 0 |
| 644424 | 0 | 0 | 6.90 | 6.85 | 412200.0 | 2833977.0 | 0 | 0 | 0 |
| 644403 | 0 | 0 | 6.96 | 6.93 | 341900.0 | 2375505.0 | 0 | 0 | 0 |
| 644404 | 0 | 0 | 6.98 | 6.94 | 301900.0 | 2099317.0 | 0 | 0 | 0 |
| 644405 | 0 | 0 | 7.01 | 6.97 | 429800.0 | 3006154.0 | 0 | 0 | 0 |
| 644406 | 0 | 0 | 7.00 | 6.96 | 166800.0 | 1164742.0 | 0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 618442 | 2 | 2 | 7.06 | 7.01 | 1116900.0 | 7846912.0 | 2 | 2 | 2 |
| 618441 | 2 | 2 | 7.02 | 7.00 | 1908000.0 | 13381184.0 | 2 | 2 | 2 |
| 618440 | 2 | 2 | 7.04 | 7.01 | 1622400.0 | 11398784.0 | 2 | 2 | 2 |
| 618439 | 2 | 2 | 7.07 | 7.03 | 1334500.0 | 9399104.0 | 2 | 2 | 2 |
| 618438 | 2 | 2 | 7.10 | 7.05 | 1093700.0 | 7742592.0 | 2 | 2 | 2 |
| 618437 | 2 | 2 | 7.13 | 7.10 | 1618000.0 | 11503744.0 | 2 | 2 | 2 |
| 631278 | 2 | 2 | 6.29 | 6.26 | 991200.0 | 6210959.0 | 2 | 2 | 2 |
| 569910 | 2 | 2 | 6.85 | 6.84 | 948300.0 | 6491008.0 | 2 | 2 | 2 |
| 569907 | 2 | 2 | 6.80 | 6.76 | 1801200.0 | 12228352.0 | 2 | 2 | 2 |
| 700290 | 2 | 2 | 9.08 | 9.05 | 947400.0 | 8585212.0 | 2 | 2 | 2 |
| 631284 | 2 | 2 | 6.24 | 6.20 | 1253900.0 | 7797035.0 | 2 | 2 | 2 |
| 247465 | 2 | 2 | 4.11 | 4.10 | 1247300.0 | 5114904.0 | 2 | 2 | 2 |
| 627605 | 2 | 2 | 5.92 | 5.92 | 1081100.0 | 6400640.0 | 2 | 2 | 2 |
| 591024 | 2 | 2 | 5.89 | 5.87 | 1119900.0 | 6586016.0 | 2 | 2 | 2 |
| 591023 | 2 | 2 | 5.89 | 5.88 | 1129000.0 | 6646656.0 | 2 | 2 | 2 |
| 591022 | 2 | 2 | 5.90 | 5.88 | 2324200.0 | 13693904.0 | 2 | 2 | 2 |
| 591021 | 2 | 2 | 5.88 | 5.85 | 2427700.0 | 14238800.0 | 2 | 2 | 2 |
| 591020 | 2 | 2 | 5.85 | 5.79 | 2601700.0 | 15139256.0 | 2 | 2 | 2 |
| 627604 | 2 | 2 | 5.92 | 5.92 | 2050400.0 | 12138376.0 | 2 | 2 | 2 |
| 627603 | 2 | 2 | 5.92 | 5.92 | 2218300.0 | 13132376.0 | 2 | 2 | 2 |
| 627602 | 1 | 1 | 5.92 | 5.85 | 8823500.0 | 52179664.0 | 1 | 1 | 2 |
| 627601 | 2 | 2 | 5.86 | 5.72 | 2469300.0 | 14324688.0 | 2 | 2 | 2 |
| 627600 | 1 | 1 | 5.80 | 5.67 | 8339200.0 | 47560426.0 | 1 | 1 | 2 |
| 658208 | 2 | 2 | 6.92 | 6.88 | 1306900.0 | 9012986.0 | 2 | 2 | 2 |
| 247449 | 2 | 2 | 4.10 | 4.09 | 1195200.0 | 4894624.0 | 2 | 2 | 2 |
| 247445 | 2 | 2 | 4.18 | 4.15 | 2356100.0 | 9817096.0 | 2 | 2 | 2 |
| 247444 | 2 | 2 | 4.18 | 4.16 | 5117300.0 | 21366048.0 | 2 | 2 | 2 |
| 618398 | 2 | 2 | 7.12 | 7.09 | 1734300.0 | 12318048.0 | 2 | 2 | 2 |
| 247443 | 2 | 2 | 4.16 | 4.12 | 2340600.0 | 9700918.0 | 2 | 2 | 2 |
| 361882 | 2 | 2 | 8.44 | 8.38 | 2697200.0 | 22710144.0 | 2 | 2 | 2 |
869880 rows × 9 columns
beer.groupby("scaled_cluster").mean()
In [42]:
beer.groupby("scaled_cluster").mean()
Out[42]:
| open | close | high | low | volume | money | close1 | close2 | |
|---|---|---|---|---|---|---|---|---|
| scaled_cluster | ||||||||
| 0 | 0.078958 | 0.078958 | 6.779438 | 6.767458 | 1.059397e+05 | 7.168576e+05 | 0.078958 | 0.078958 |
| 1 | 0.006126 | 0.006126 | 3.311344 | 3.303476 | 7.111992e+04 | 2.462302e+05 | 0.006126 | 0.006126 |
| 2 | 1.957469 | 1.957469 | 6.565434 | 6.524719 | 1.791183e+06 | 1.130658e+07 | 1.957469 | 1.957469 |
In [47]:
pd.plotting.scatter_matrix(X,c=colors[beer.scaled_cluster],alpha=1,figsize=(10,10),s=100)
Out[47]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x000000001DBC8A90>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000000001AB75390>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000000001AB9B940>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000000001ABCBEB8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000000001AC054A8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000000001AC329B0>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x000000001AC61F60>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000000001AC9C588>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000000001AC9C5C0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000000001AD020F0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000000001AD356A0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000000001AD63C50>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x000000001ADA1240>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000000001ADD27F0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000000001AE05DA0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000000001AE44390>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000000001AE73940>,
<matplotlib.axes._subplots.AxesSubplot object at 0x0000000022126EF0>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x00000000221634E0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x0000000022193A90>,
<matplotlib.axes._subplots.AxesSubplot object at 0x00000000221D3080>,
<matplotlib.axes._subplots.AxesSubplot object at 0x0000000022203630>,
<matplotlib.axes._subplots.AxesSubplot object at 0x0000000022235BE0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x00000000222731D0>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x00000000222A4780>,
<matplotlib.axes._subplots.AxesSubplot object at 0x00000000222D5D30>,
<matplotlib.axes._subplots.AxesSubplot object at 0x0000000022311320>,
<matplotlib.axes._subplots.AxesSubplot object at 0x00000000223428D0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x0000000022374E80>,
<matplotlib.axes._subplots.AxesSubplot object at 0x00000000223B2470>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x00000000223E3A20>,
<matplotlib.axes._subplots.AxesSubplot object at 0x0000000022416FD0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x00000000224525C0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x0000000022482B70>,
<matplotlib.axes._subplots.AxesSubplot object at 0x00000000224C3160>,
<matplotlib.axes._subplots.AxesSubplot object at 0x00000000224F2710>]],
dtype=object)
In [ ]:
from sklearn import metrics
score_scaled = metrics.silhouette_score(X,beer.scaled_cluster)
score = metrics.silhouette_score(X,beer.cluster)
print(score_scaled,score)
In [ ]:
scores = []
for k in range(2,20):
labels = KMeans(n_clusters=k).fit(X).labels_
score = metrics.silhouette_score(X,labels)
scores.append(score)
scores
In [ ]:
plt.plot(list(range(2,20)),scores)
plt.xlabel("Number of Clusters Initialized")
plt.ylabel("Sihouette Score")
In [ ]:
from sklearn.cluster import DBSCAN
db = DBSCAN(eps=10,min_s)