import random # randint(0,n),包括n import matplotlib.pyplot as plt %pylab inline # 数据集1000000条数据
Populating the interactive namespace from numpy and matplotlib
c:\users\wrs\appdata\local\programs\python\python36\lib\site-packages\IPython\core\magics\pylab.py:160: UserWarning: pylab import has clobbered these variables: ['random'] `%matplotlib` prevents importing * from pylab and numpy "\n`%matplotlib` prevents importing * from pylab and numpy"
In [2]:
def FillWindow(e):
if len(window) <=window_size:
window.append(e)
else:
for index in range(0,window_size-1):
window[index] = window[index+1]
window[window_size-1] = e
def Sample(t):
location=0 # 从1开始计算
with open('./物联网课程设计/data_for_programme1.txt','r') as f:
while 1:
location+=1
temp=f.readline()#读取值
temp_int=int(temp.strip('\n'))
FillWindow(temp_int)
if location>t:
break
if location <=sample_size:#填充样本容量
sample.append(temp_int)
elif random.randint(1,location)<sample_size:#按照k/n概率判断是否可以进入样本
sample[random.randint(0,sample_size-1)]=temp_int#随机替换
def Sample_act(t):
data_sum=0
i=0
with open('./物联网课程设计/data_for_programme1.txt','r') as f:
temp=f.readline()#读取值
while temp:
i+=1
if i>t:
break
data_sum+=int(temp.strip('\n'))
temp=f.readline()
return data_sum/i
In [3]:
sample_avg=Sample_act(1000000)
In [4]:
sample_avg
Out[4]:
33415.093201
In [5]:
# 同一窗口大小,同一样本大小,不同时刻
#t_list= [100,500,1000,1500]
t_list=[]
for i in range(1,31):
t_list.append(i*10000)
act_data=[] # 精确窗口均值
about_data=[] #估计样本均值
avg_data = []
window_size = 200 # 窗口大小
sample_size = 100#样本集合大小
for t in t_list:
window = [] #窗口
sample = [] #样本
Sample(t)
act_data.append(sum(window)/window_size)
about_data.append(sum(sample)/sample_size)
sample_avg=Sample_act(t)
avg_data.append(sample_avg)
plt.figure(figsize=(12,4))
plt.plot(t_list, act_data,label="act",color="red",marker='.', linestyle='solid')
plt.plot(t_list, about_data,label="about",marker='.', linestyle='solid')
plt.plot(t_list, avg_data,label="avg",marker='.', linestyle='solid')
# for x, y in zip(t_list, act_data):
# plt.text(x, y+0.3, '%.0f'%y, ha='center', va='bottom', fontsize=10.5)
# for x, y in zip(t_list, about_data):
# plt.text(x, y+0.3, '%.0f'%y, ha='center', va='bottom', fontsize=10.5)
plt.xlabel("t")
plt.ylabel("value")
plt.legend()
plt.show()
In [6]:
plt.figure(figsize=(12,4)) plt.plot(t_list, [x-y for x, y in zip(about_data, avg_data)],label="about-avg",marker='.', linestyle='solid') plt.plot(t_list, [x-y for x, y in zip(about_data, act_data)],label="about-act",marker='*', linestyle='solid') plt.legend() plt.show()
In [7]:
print("数据集平均值",sample_avg)
print("窗口均值",[round(x,1) for x in act_data])
print("样本均值",[round(x,1) for x in about_data])
print("给定时刻值",[round(x,1) for x in avg_data])
数据集平均值 33422.70823097257 窗口均值 [34112.3, 35816.2, 30887.6, 32975.2, 34375.8, 27977.6, 36411.9, 34310.3, 36661.3, 33549.2, 36815.9, 32952.3, 35188.3, 34315.5, 31901.6, 32871.9, 32215.9, 35862.6, 30909.8, 29014.7, 32939.0, 33465.4, 37761.4, 35677.2, 33392.0, 33260.0, 33617.3, 34545.4, 34235.5, 36789.0] 样本均值 [38113.5, 35623.2, 31675.3, 31770.9, 32142.1, 33834.7, 40752.4, 35226.0, 35997.7, 34025.9, 27705.3, 37591.8, 34802.5, 34105.6, 28488.9, 32133.5, 39879.3, 35852.8, 37146.6, 32030.7, 31583.4, 31711.8, 36208.1, 35966.9, 31384.6, 36758.9, 33578.4, 33908.6, 33178.6, 27743.4] 给定时刻值 [33744.9, 33778.2, 33665.3, 33617.2, 33574.6, 33523.6, 33556.6, 33533.7, 33558.8, 33535.5, 33527.9, 33542.0, 33521.4, 33508.2, 33487.2, 33473.3, 33498.3, 33495.1, 33470.5, 33474.6, 33473.6, 33465.8, 33458.0, 33442.3, 33457.6, 33451.4, 33448.3, 33426.6, 33430.4, 33422.7]
In [8]:
# 不同窗口大小,同一样本大小,同一时刻
window_size_list = []
for i in range(1,31):
window_size_list.append(i*100)
t = 40000 # 采样时刻
act_data=[] # 精确窗口均值
about_data=[] #估计样本均值
avg_data = []
sample_size = 100#样本集合大小
sample_avg=Sample_act(t)
for window_size in window_size_list:
window = [] #窗口
sample = [] #样本
Sample(t)
act_data.append(sum(window)/window_size)
about_data.append(sum(sample)/sample_size)
avg_data.append(sample_avg)
plt.figure(figsize=(12,4))
plt.plot(window_size_list, act_data,label="act",color="red",marker='*', linestyle='solid')
plt.plot(window_size_list, about_data,label="about",marker='.', linestyle='solid')
plt.plot(window_size_list, avg_data,label="avg",marker='.', linestyle='solid')
# for x, y in zip(window_size_list, act_data):
# plt.text(x, y+0.3, '%.0f'%y, ha='center', va='bottom', fontsize=10.5)
# for x, y in zip(window_size_list, about_data):
# plt.text(x, y+0.3, '%.0f'%y, ha='center', va='bottom', fontsize=10.5)
plt.xlabel("window_size")
plt.ylabel("value")
plt.legend()
plt.show()
In [9]:
plt.figure(figsize=(12,4)) plt.plot(window_size_list, [x-y for x, y in zip(about_data, avg_data)],label="about-avg",marker='.', linestyle='solid') plt.plot(window_size_list, [x-y for x, y in zip(about_data, act_data)],label="about-act",marker='*', linestyle='solid') plt.legend() plt.show()
In [10]:
print("数据集平均值",sample_avg)
print("窗口均值",[round(x,1) for x in act_data])
print("样本均值",[round(x,1) for x in about_data])
数据集平均值 33617.245893852654 窗口均值 [32412.3, 32975.2, 33626.7, 33964.0, 33144.3, 32790.1, 32194.3, 32232.4, 32780.0, 33138.3, 33235.6, 32909.2, 33327.4, 32985.6, 32914.2, 33065.9, 33095.3, 33409.6, 33326.2, 33522.5, 33482.3, 33681.4, 33642.5, 33591.8, 33587.1, 33556.4, 33478.9, 33478.4, 33440.7, 33519.4] 样本均值 [31466.2, 36143.5, 32360.0, 32526.5, 32527.0, 33550.1, 33404.6, 30672.1, 28940.6, 34753.0, 26807.3, 34903.1, 33954.1, 36901.0, 33670.3, 35374.3, 29946.4, 36758.6, 32292.8, 35097.7, 35508.4, 36568.6, 36860.9, 38576.6, 31693.9, 36039.2, 27709.3, 32178.3, 30042.9, 29947.7]
In [11]:
# 同窗口大小,不同样本大小,同一时刻
sample_size_list = []
for i in range(1,31):
sample_size_list.append(i*100)
t = 10000 # 采样时刻
act_data=[] # 精确窗口均值
about_data=[] #估计样本均值
avg_data = []
window_size = 4000 # 窗口大小
sample_avg=Sample_act(t)
for sample_size in sample_size_list:
window = [] #窗口
sample = [] #样本
Sample(t)
act_data.append(sum(window)/window_size)
about_data.append(sum(sample)/sample_size)
avg_data.append(sample_avg)
plt.figure(figsize=(12,4))
plt.plot(sample_size_list, act_data,label="act",color="red",marker='*', linestyle='solid')
plt.plot(sample_size_list, about_data,label="about",marker='.', linestyle='solid')
plt.plot(sample_size_list, avg_data,label="avg",marker='.', linestyle='solid')
# for x, y in zip(sample_size_list, act_data):
# plt.text(x, y+0.3, '%.0f'%y, ha='center', va='bottom', fontsize=10.5)
# for x, y in zip(sample_size_list, about_data):
# plt.text(x, y+0.3, '%.0f'%y, ha='center', va='bottom', fontsize=10.5)
plt.xlabel("sample_size")
plt.ylabel("value")
plt.legend()
plt.show()
In [12]:
plt.figure(figsize=(12,4)) plt.plot(sample_size_list, [x-y for x, y in zip(about_data, avg_data)],label="about-avg",marker='.', linestyle='solid') plt.plot(sample_size_list, [x-y for x, y in zip(about_data, act_data)],label="about-act",marker='*', linestyle='solid') plt.legend() plt.show()
In [13]:
print("数据集平均值",sample_avg)
print("窗口均值",[round(x,1) for x in act_data])
print("样本均值",[round(x,1) for x in about_data])
数据集平均值 33744.862713728624 窗口均值 [34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0] 样本均值 [29512.6, 33148.0, 33611.4, 32632.9, 32833.0, 33798.0, 32803.2, 34182.0, 33009.9, 33992.9, 33222.6, 31968.5, 33685.3, 33823.1, 32863.4, 34882.5, 34322.2, 33041.2, 33794.1, 33614.2, 33628.0, 34141.1, 34611.3, 33833.7, 34260.3, 33715.0, 33355.0, 33704.2, 34208.7, 33379.6]
In [14]:
# 同窗口大小,同样本大小,同一时刻,不同采样次数
sample_times_list = []
for i in range(1,31):
sample_times_list.append(i*10)
t = 10000 # 采样时刻
act_data=[] # 精确窗口均值
about_data=[] #估计样本均值
avg_data = []
window_size = 300 # 窗口大小
sample_size = 100 # 样本大小
window=[]
Sample(t)
sample_avg=Sample_act(t)
act=sum(window)/window_size#获取窗口均值
for sample_times in sample_times_list:
about_sum=[]
for i in range(0,sample_times):
window=[]
sample = [] #样本
Sample(t)
about_sum.append(sum(sample)/sample_size)
act_data.append(act)
about_data.append(sum(about_sum)/sample_times)
avg_data.append(sample_avg)
plt.figure(figsize=(12,4))
plt.plot(sample_times_list, act_data,label="act",color="red",marker='*', linestyle='solid')
plt.plot(sample_times_list, about_data,label="about",marker='.', linestyle='solid')
plt.plot(sample_times_list, avg_data,label="avg",marker='.', linestyle='solid')
# for x, y in zip(sample_times_list, act_data):
# plt.text(x, y+0.3, '%.0f'%y, ha='center', va='bottom', fontsize=10.5)
# for x, y in zip(sample_times_list, about_data):
# plt.text(x, y+0.3, '%.0f'%y, ha='center', va='bottom', fontsize=10.5)
plt.xlabel("sample_times")
plt.ylabel("value")
plt.legend()
plt.show()
In [15]:
plt.figure(figsize=(12,4)) plt.plot(sample_times_list, [x-y for x, y in zip(about_data, avg_data)],label="about-avg",marker='.', linestyle='solid') plt.plot(sample_times_list, [x-y for x, y in zip(about_data, act_data)],label="about-act",marker='*', linestyle='solid') plt.legend() plt.show()
In [16]:
print("数据集平均值",sample_avg)
print("窗口均值",[round(x,1) for x in act_data])
print("样本均值",[round(x,1) for x in about_data])
数据集平均值 33744.862713728624 窗口均值 [33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5] 样本均值 [33812.9, 34676.2, 33751.4, 34292.9, 33481.6, 34368.8, 33492.2, 33877.9, 34304.4, 34343.2, 33527.0, 34127.0, 33998.1, 33857.7, 34035.1, 33562.9, 34004.3, 34156.2, 34019.0, 33753.2, 33844.5, 33803.4, 34071.6, 33959.4, 34059.3, 34099.3, 34018.9, 33777.3, 33820.8, 33812.4]