arthur-54271

config.py

import os

# 指定数据集路径
dataset_path=\'./data\'

# 结果保存路径
output_path=\'./output\'

if not os.path.exists(output_path):
os.mkdir(output_path)

# 公共列
common_cols=[\'year\',\'month\']

# 每个城市对应的文件名及所需分析的列名
# 以字典形式保存,如:{城市:(文件名, 列名)}

data_config_dict={\'beijing\':(\'BeijingPM20100101_20151231.csv\', [\'Dongsi\', \'Dongsihuan\', \'Nongzhanguan\']),
\'chengdu\':(\'ChengduPM20100101_20151231.csv\',[\'Caotangsi\', \'Shahepu\']),
\'guangzhou\':(\'GuangzhouPM20100101_20151231.csv\',[\'City Station\', \'5th Middle School\']),
\'shanghai\':(\'ShanghaiPM20100101_20151231.csv\',[\'Jingan\', \'Xuhui\']),
\'shenyang\':(\'ShenyangPM20100101_20151231.csv\',[\'Taiyuanjie\', \'Xiaoheyan\'])
}

====================================================================================================================

main.py

"""

案例:中国五大城市PM2.5数据分析
任务:
- 五城市污染状态
- 五城市每个区空气质量的月度差异

数据集来源:https://www.kaggle.com/uciml/pm25-data-for-five-chinese-cities

"""
import csv
import os
import numpy as np
import config

def load_data(data_file,usecols):
\'\'\'
读取数据文件,加载数据
:param data_file:文件路径
:param usecols:所使用的列
:return: data_arr: 数据的多维数组表示
\'\'\'

data=[]
with open(data_file,\'r\') as csvfile:
data_reader=csv.DictReader(csvfile)
# print(data_reader)
# === 数据处理 ===
for row in data_reader:
# 取出每行数据,组合为一个列表放入数据列表中
row_data=[]
# 注意csv模块读入的数据全部为字符串类型
for col in usecols:
str_val=row[col]
#数据类型转换为float,如果是\'NA\',则返回nan
row_data.append(float(str_val) if str_val!=\'NA\' else np.nan)
# 如果行数据中不包含nan才保存该行记录
if not any(np.isnan(row_data)):
data.append(row_data)
# 将data转换为ndarray
data_arr=np.array(data)
return data_arr


def get_polluted_perc(data_arr):
\'\'\'
获取各城市每个区污染占比的小时数
规则:
重度污染(heavy) PM2.5 > 150
重度污染(medium) 75 < PM2.5 <= 150
轻度污染(light) 35 < PM2.5 <= 75
优良空气(good) PM2.5 <= 35
:param data_arr:数据的多维数组表示
:return: polluted_perc_list: 污染小时数百分比列表
\'\'\'
# 将每个区的PM值平均后作为该城市小时的PM值
# 按行取平均值
hour_val=np.mean(data_arr[:,2:],axis=1)
# 总小时数
n_hours=hour_val.shape[0]
# 重度污染小时数
n_heavy_hours=hour_val[hour_val>150].shape[0]
# 中度污染小时数
n_medium_hours=hour_val[(hour_val>75) & (hour_val<=150)].shape[0]
# 轻度污染小时数
n_light_hours = hour_val[(hour_val > 35) & (hour_val <= 75)].shape[0]
# 优良空气小时数
n_good_hours = hour_val[hour_val <= 35].shape[0]

polluted_perc_list= [n_heavy_hours / n_hours, n_medium_hours / n_hours,
n_light_hours / n_hours, n_good_hours / n_hours]

return polluted_perc_list

def get_avg_pm_per_month(data_arr):
\'\'\'
获取每个区每月的平均PM值
:param data_arr:数据的多维数组表示
:return: results_arr: 多维数组结果
\'\'\'

results=[]
# 获取年份
years=np.unique(data_arr[:,0])
for year in years:
# 获取当前年份数据
year_data_arr=data_arr[data_arr[:,0]==year]
# 获取数据的月份
month_list=np.unique(year_data_arr[:,1])

for month in month_list:
# 获取月份的所有数据
month_data_arr=year_data_arr[year_data_arr[:,1]==month]
# 计算当前月份PM的均值
mean_vals=np.mean(month_data_arr[:,2:],axis=0).tolist()
# 格式化字符串
row_data=[\'{:.0f}-{:02.0f}\'.format(year,month)]+mean_vals
results.append(row_data)
results_arr=np.array(results)
return results_arr

def save_stats_to_csv(results_arr,save_file,headers):
\'\'\'
将统计结果保存至csv文件中
:param results_arr:多维数组结果
:param save_file:文件保存路径
:param headers: csv表头
:return:
\'\'\'

with open(save_file,\'w\',newline=\'\') as csvfile:
writer=csv.writer(csvfile)
writer.writerow(headers)
for row in results_arr.tolist():
writer.writerow(row)
def main():
\'\'\'
主函数
\'\'\'
polluted_state_list=[]

for city_name,(filename,cols) in config.data_config_dict.items():
# === 数据获取 + 数据处理 ===
data_file=os.path.join(config.dataset_path,filename)
usecols=config.common_cols+[\'PM_\'+col for col in cols]
#加载数据
data_arr=load_data(data_file,usecols)

print(\'{}共有{}行有效数据\'.format(city_name,data_arr.shape[0]))
# 预览前10行数据
print(\'{}的前10行数据:\'.format(city_name))
print(data_arr[:10])

# # === 数据分析 ===
# # 五城市污染状态,统计污染小时数的占比
polluted_perc_list=get_polluted_perc(data_arr)
polluted_state_list.append([city_name]+polluted_perc_list)
print(\'{}的污染小时数百分比{}\'.format(city_name,polluted_perc_list))

# 五城市每个区空气质量的月度差异,分析计算每个月,每个区的平均PM值
results_arr=get_avg_pm_per_month(data_arr)
print(\'{}的每月平均PM值预览:\'.format(city_name))
print(results_arr[:10])

# === 结果展示 ===
# 保存月度统计结果至csv文件
save_filename = city_name + \'_month_stats.csv\'
save_file = os.path.join(config.output_path, save_filename)
save_stats_to_csv(results_arr, save_file, headers=[\'month\'] + cols)
print(\'月度统计结果已保存至{}\'.format(save_file))

# 污染状态结果保存
save_file = os.path.join(config.output_path, \'polluted_percentage.csv\')
with open(save_file, \'w\', newline=\'\') as csvfile:
writer = csv.writer(csvfile)
writer.writerow([\'city\', \'heavy\', \'medium\', \'light\', \'good\'])
for row in polluted_state_list:
writer.writerow(row)
print(\'污染状态结果已保存至{}\'.format(save_file))


if __name__==\'__main__\':
main()

====================================================================================================================

/usr/local/bin/python3.6 /Users/apple/PycharmProjects/xiaoxiang02/main.py
beijing共有19613行有效数据
beijing的前10行数据:
[[2013. 3. 117. 166. 140.]
[2013. 3. 131. 165. 152.]
[2013. 3. 141. 173. 128.]
[2013. 3. 169. 182. 3.]
[2013. 3. 169. 169. 3.]
[2013. 3. 174. 183. 163.]
[2013. 3. 194. 195. 192.]
[2013. 3. 208. 212. 203.]
[2013. 3. 213. 207. 195.]
[2013. 3. 203. 198. 185.]]
beijing的污染小时数百分比[0.1723346759802172, 0.26956610411461784, 0.24611227247233977, 0.3119869474328252]
beijing的每月平均PM值预览:
[[\'2013-03\' \'117.99354838709678\' \'128.4725806451613\' \'116.1774193548387\']
[\'2013-04\' \'64.298937784522\' \'63.165402124430955\' \'56.88770864946889\']
[\'2013-05\' \'91.35816618911174\' \'101.55014326647564\' \'77.11174785100286\']
[\'2013-06\' \'110.01160092807424\' \'119.17169373549883\'
\'108.27146171693735\']
[\'2013-07\' \'72.19110378912686\' \'85.35090609555189\' \'74.67051070840198\']
[\'2013-08\' \'63.986301369863014\' \'69.77168949771689\' \'64.64687975646879\']
[\'2013-09\' \'83.79607250755286\' \'82.89577039274924\' \'80.97129909365559\']
[\'2013-10\' \'102.78525641025641\' \'101.52403846153847\' \'94.6923076923077\']
[\'2013-11\' \'83.16338028169014\' \'84.2338028169014\' \'83.55211267605634\']
[\'2013-12\' \'87.7453505007153\' \'92.02718168812589\' \'89.99570815450643\']]
月度统计结果已保存至./output/beijing_month_stats.csv
chengdu共有23816行有效数据
chengdu的前10行数据:
[[2.013e+03 1.000e+00 1.210e+02 1.380e+02]
[2.013e+03 1.000e+00 1.340e+02 1.590e+02]
[2.013e+03 1.000e+00 2.030e+02 1.620e+02]
[2.013e+03 1.000e+00 2.170e+02 1.570e+02]
[2.013e+03 1.000e+00 2.200e+02 1.700e+02]
[2.013e+03 1.000e+00 2.140e+02 2.250e+02]
[2.013e+03 1.000e+00 2.090e+02 2.440e+02]
[2.013e+03 1.000e+00 2.280e+02 2.420e+02]
[2.013e+03 1.000e+00 2.190e+02 2.770e+02]
[2.013e+03 1.000e+00 2.250e+02 2.810e+02]]
chengdu的污染小时数百分比[0.10971615720524018, 0.2613789049378569, 0.394902586496473, 0.23400235136042996]
chengdu的每月平均PM值预览:
[[\'2013-01\' \'170.09582689335394\' \'189.5625965996909\']
[\'2013-02\' \'126.59324758842443\' \'118.9807073954984\']
[\'2013-03\' \'141.24685534591194\' \'139.7059748427673\']
[\'2013-04\' \'102.12990196078431\' \'94.19607843137256\']
[\'2013-05\' \'77.12660944206009\' \'66.92703862660944\']
[\'2013-06\' \'52.236486486486484\' \'47.11711711711712\']
[\'2013-07\' \'50.69642857142857\' \'40.565934065934066\']
[\'2013-08\' \'66.55602240896359\' \'56.627450980392155\']
[\'2013-09\' \'60.584\' \'58.364\']
[\'2013-10\' \'100.51994301994301\' \'99.68518518518519\']]
月度统计结果已保存至./output/chengdu_month_stats.csv
guangzhou共有20074行有效数据
guangzhou的前10行数据:
[[2.013e+03 1.000e+00 8.300e+01 7.800e+01]
[2.013e+03 1.000e+00 9.500e+01 7.000e+01]
[2.013e+03 1.000e+00 5.500e+01 6.600e+01]
[2.013e+03 1.000e+00 6.000e+01 6.900e+01]
[2.013e+03 1.000e+00 4.100e+01 5.100e+01]
[2.013e+03 1.000e+00 4.200e+01 3.900e+01]
[2.013e+03 1.000e+00 4.000e+01 3.700e+01]
[2.013e+03 1.000e+00 4.000e+01 3.800e+01]
[2.013e+03 1.000e+00 3.500e+01 3.400e+01]
[2.013e+03 1.000e+00 4.200e+01 3.400e+01]]
guangzhou的污染小时数百分比[0.01225465776626482, 0.14715552455913122, 0.4265716847663645, 0.4140181329082395]
guangzhou的每月平均PM值预览:
[[\'2013-01\' \'83.84602076124567\' \'85.5363321799308\']
[\'2013-02\' \'60.82752613240418\' \'56.825783972125436\']
[\'2013-03\' \'67.9199372056515\' \'62.71742543171115\']
[\'2013-04\' \'72.91483516483517\' \'65.43406593406593\']
[\'2013-05\' \'37.05223880597015\' \'39.65422885572139\']
[\'2013-06\' \'25.188432835820894\' \'27.89179104477612\']
[\'2013-07\' \'15.283018867924529\' \'25.58490566037736\']
[\'2013-09\' \'40.171140939597315\' \'42.285234899328856\']
[\'2013-11\' \'30.181818181818183\' \'36.45454545454545\']
[\'2013-12\' \'62.295121951219514\' \'70.60487804878049\']]
月度统计结果已保存至./output/guangzhou_month_stats.csv
shanghai共有23993行有效数据
shanghai的前10行数据:
[[2.013e+03 1.000e+00 6.600e+01 7.100e+01]
[2.013e+03 1.000e+00 6.700e+01 7.200e+01]
[2.013e+03 1.000e+00 7.300e+01 7.400e+01]
[2.013e+03 1.000e+00 7.500e+01 7.700e+01]
[2.013e+03 1.000e+00 7.300e+01 8.000e+01]
[2.013e+03 1.000e+00 7.400e+01 7.700e+01]
[2.013e+03 1.000e+00 7.300e+01 8.400e+01]
[2.013e+03 1.000e+00 7.700e+01 8.700e+01]
[2.013e+03 1.000e+00 7.300e+01 9.100e+01]
[2.013e+03 1.000e+00 8.200e+01 8.800e+01]]
shanghai的污染小时数百分比[0.0504313758179469, 0.18809652815404493, 0.3728587504688868, 0.3886133455591214]
shanghai的每月平均PM值预览:
[[\'2013-01\' \'97.96923076923076\' \'96.23230769230769\']
[\'2013-02\' \'64.3262839879154\' \'62.24773413897281\']
[\'2013-03\' \'65.05007587253414\' \'64.90136570561457\']
[\'2013-04\' \'66.57551669316375\' \'61.32273449920509\']
[\'2013-05\' \'62.2625\' \'57.384375\']
[\'2013-06\' \'56.86453576864536\' \'58.00304414003044\']
[\'2013-07\' \'45.73089171974522\' \'45.99203821656051\']
[\'2013-08\' \'34.78417266187051\' \'35.93237410071942\']
[\'2013-09\' \'31.261755485893417\' \'31.976489028213166\']
[\'2013-10\' \'35.68104776579353\' \'37.707241910631744\']]
月度统计结果已保存至./output/shanghai_month_stats.csv
shenyang共有24115行有效数据
shenyang的前10行数据:
[[2.013e+03 1.000e+00 1.450e+02 1.480e+02]
[2.013e+03 1.000e+00 1.500e+02 1.330e+02]
[2.013e+03 1.000e+00 1.420e+02 1.210e+02]
[2.013e+03 1.000e+00 1.050e+02 1.100e+02]
[2.013e+03 1.000e+00 1.540e+02 1.070e+02]
[2.013e+03 1.000e+00 1.760e+02 1.230e+02]
[2.013e+03 1.000e+00 1.400e+02 1.110e+02]
[2.013e+03 1.000e+00 9.300e+01 7.600e+01]
[2.013e+03 1.000e+00 5.300e+01 5.600e+01]
[2.013e+03 1.000e+00 2.300e+01 2.900e+01]]
shenyang的污染小时数百分比[0.11909599834128136, 0.24242172921418204, 0.33278042712004974, 0.30570184532448685]
shenyang的每月平均PM值预览:
[[\'2013-01\' \'200.24801271860096\' \'207.59777424483306\']
[\'2013-02\' \'93.0326797385621\' \'93.11437908496733\']
[\'2013-03\' \'85.57299843014128\' \'74.7032967032967\']
[\'2013-04\' \'62.97513812154696\' \'58.08839779005525\']
[\'2013-05\' \'75.40425531914893\' \'74.39574468085107\']
[\'2013-06\' \'57.67380560131796\' \'53.85172981878089\']
[\'2013-07\' \'47.89235569422777\' \'32.42745709828393\']
[\'2013-08\' \'56.172821270310195\' \'43.90546528803545\']
[\'2013-09\' \'48.861759425493716\' \'42.72351885098743\']
[\'2013-10\' \'84.93227665706051\' \'82.7478386167147\']]
月度统计结果已保存至./output/shenyang_month_stats.csv
污染状态结果已保存至./output/polluted_percentage.csv

Process finished with exit code 0

 

 

分类:

技术点:

相关文章:

  • 2021-12-14
  • 2021-11-14
  • 2021-11-04
  • 2021-11-04
  • 2021-12-19
  • 2021-12-31
  • 2021-11-14
猜你喜欢
  • 2021-09-07
  • 2021-09-07
  • 2021-11-09
  • 2021-09-07
  • 2021-12-29
  • 2021-12-29
相关资源
相似解决方案