import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
if __name__=="__main__":
    path="usagov_bitly_data2012-03-16-1331923249.txt"
    fp=open(path)
    records=[json.loads(line) for line in fp.readlines()]
    print(len(records))
    frame=pd.DataFrame(records)
    print(frame['tz'])
    clean_tz=frame['tz'].fillna('Missing')
    clean_tz[clean_tz=='']='Unknown'
    tz_counts=clean_tz.value_counts()
    print(tz_counts[:20])
    #tz_counts[:10].plot(kind='barh',rot=0)
    #plt.show()
    results=pd.Series([x.split()[0] for x in frame.a.dropna()])
    print(results[:5])
    cframe=frame[frame.a.notnull()]
    operating_system=np.where(cframe['a'].str.contains('Windows'),'Windows','not Windows')
    print(operating_system[:10])
    by_tz_os=cframe.groupby(['tz',operating_system])
    agg_counts=by_tz_os.size().unstack().fillna(0)
    print(agg_counts[:10])
    indexer=agg_counts.sum(1).argsort()
    print(indexer[:10])
    count_subset=agg_counts.take(indexer)[-10:]
    print(count_subset)
    #count_subset.plot(kind='barh',stacked=True)
    normed_subset=count_subset.div(count_subset.sum(1),axis=0)
    normed_subset.plot(kind='barh',stacked=True)
    plt.show()
    
    
    

简单的入门2

相关文章:

  • 2022-01-06
  • 2021-12-15
  • 2022-01-04
  • 2021-08-03
  • 2021-10-28
  • 2021-12-11
  • 2021-05-19
猜你喜欢
  • 2021-11-04
  • 2021-08-28
  • 2021-11-21
  • 2021-05-27
  • 2021-04-19
  • 2020-10-26
  • 2021-11-20
相关资源
相似解决方案