import random # randint(0,n),包括n
import matplotlib.pyplot as plt
%pylab inline

# 数据集1000000条数据
Populating the interactive namespace from numpy and matplotlib
c:\users\wrs\appdata\local\programs\python\python36\lib\site-packages\IPython\core\magics\pylab.py:160: UserWarning: pylab import has clobbered these variables: ['random']
`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"

In [2]:

def FillWindow(e):
    if len(window) <=window_size:
        window.append(e)
    else:
        for index in range(0,window_size-1):
            window[index] = window[index+1]
        window[window_size-1] = e
        
def Sample(t):
    location=0 # 从1开始计算
    with open('./物联网课程设计/data_for_programme1.txt','r') as f:
        while 1:
            location+=1
            temp=f.readline()#读取值
            temp_int=int(temp.strip('\n'))
            FillWindow(temp_int)
            if location>t:
                break
            if location <=sample_size:#填充样本容量
                sample.append(temp_int)
            elif random.randint(1,location)<sample_size:#按照k/n概率判断是否可以进入样本
                sample[random.randint(0,sample_size-1)]=temp_int#随机替换
                
def Sample_act(t):
    data_sum=0
    i=0
    with open('./物联网课程设计/data_for_programme1.txt','r') as f:
        temp=f.readline()#读取值
        while temp:
            i+=1
            if i>t:
                break
            data_sum+=int(temp.strip('\n'))
            temp=f.readline()
    return data_sum/i

In [3]:

sample_avg=Sample_act(1000000)

In [4]:

sample_avg

Out[4]:

33415.093201

In [5]:

# 同一窗口大小,同一样本大小,不同时刻
#t_list= [100,500,1000,1500]
t_list=[]
for i in range(1,31):
    t_list.append(i*10000)
act_data=[] # 精确窗口均值
about_data=[] #估计样本均值
avg_data = []
window_size = 200 # 窗口大小
sample_size = 100#样本集合大小


for t in t_list:
    window = [] #窗口
    sample = [] #样本
    Sample(t)
    act_data.append(sum(window)/window_size)
    about_data.append(sum(sample)/sample_size)
    sample_avg=Sample_act(t)
    avg_data.append(sample_avg)
plt.figure(figsize=(12,4))
plt.plot(t_list, act_data,label="act",color="red",marker='.', linestyle='solid')
plt.plot(t_list, about_data,label="about",marker='.', linestyle='solid')
plt.plot(t_list, avg_data,label="avg",marker='.', linestyle='solid')

# for x, y in zip(t_list, act_data):
#     plt.text(x, y+0.3, '%.0f'%y, ha='center', va='bottom', fontsize=10.5)
# for x, y in zip(t_list, about_data):
#     plt.text(x, y+0.3, '%.0f'%y, ha='center', va='bottom', fontsize=10.5)
    
plt.xlabel("t")
plt.ylabel("value")
plt.legend()
plt.show()

水库采样 python 代码

In [6]:

plt.figure(figsize=(12,4))
plt.plot(t_list, [x-y for x, y in zip(about_data, avg_data)],label="about-avg",marker='.', linestyle='solid')
plt.plot(t_list, [x-y for x, y in zip(about_data, act_data)],label="about-act",marker='*', linestyle='solid')
plt.legend()
plt.show()

水库采样 python 代码

In [7]:

print("数据集平均值",sample_avg)
print("窗口均值",[round(x,1) for x in act_data])
print("样本均值",[round(x,1) for x in about_data])
print("给定时刻值",[round(x,1) for x in avg_data])
数据集平均值 33422.70823097257
窗口均值 [34112.3, 35816.2, 30887.6, 32975.2, 34375.8, 27977.6, 36411.9, 34310.3, 36661.3, 33549.2, 36815.9, 32952.3, 35188.3, 34315.5, 31901.6, 32871.9, 32215.9, 35862.6, 30909.8, 29014.7, 32939.0, 33465.4, 37761.4, 35677.2, 33392.0, 33260.0, 33617.3, 34545.4, 34235.5, 36789.0]
样本均值 [38113.5, 35623.2, 31675.3, 31770.9, 32142.1, 33834.7, 40752.4, 35226.0, 35997.7, 34025.9, 27705.3, 37591.8, 34802.5, 34105.6, 28488.9, 32133.5, 39879.3, 35852.8, 37146.6, 32030.7, 31583.4, 31711.8, 36208.1, 35966.9, 31384.6, 36758.9, 33578.4, 33908.6, 33178.6, 27743.4]
给定时刻值 [33744.9, 33778.2, 33665.3, 33617.2, 33574.6, 33523.6, 33556.6, 33533.7, 33558.8, 33535.5, 33527.9, 33542.0, 33521.4, 33508.2, 33487.2, 33473.3, 33498.3, 33495.1, 33470.5, 33474.6, 33473.6, 33465.8, 33458.0, 33442.3, 33457.6, 33451.4, 33448.3, 33426.6, 33430.4, 33422.7]

In [8]:

# 不同窗口大小,同一样本大小,同一时刻
window_size_list = []
for i in range(1,31):
    window_size_list.append(i*100)
t = 40000 # 采样时刻
act_data=[] # 精确窗口均值
about_data=[] #估计样本均值
avg_data = []
sample_size = 100#样本集合大小
sample_avg=Sample_act(t)
for window_size in window_size_list:
    window = [] #窗口
    sample = [] #样本
    Sample(t)
    act_data.append(sum(window)/window_size)
    about_data.append(sum(sample)/sample_size)
    avg_data.append(sample_avg)
plt.figure(figsize=(12,4))
plt.plot(window_size_list, act_data,label="act",color="red",marker='*', linestyle='solid')
plt.plot(window_size_list, about_data,label="about",marker='.', linestyle='solid')
plt.plot(window_size_list, avg_data,label="avg",marker='.', linestyle='solid')
# for x, y in zip(window_size_list, act_data):
#     plt.text(x, y+0.3, '%.0f'%y, ha='center', va='bottom', fontsize=10.5)
# for x, y in zip(window_size_list, about_data):
#     plt.text(x, y+0.3, '%.0f'%y, ha='center', va='bottom', fontsize=10.5)
plt.xlabel("window_size")
plt.ylabel("value")
plt.legend()
plt.show()

水库采样 python 代码

In [9]:

plt.figure(figsize=(12,4))
plt.plot(window_size_list, [x-y for x, y in zip(about_data, avg_data)],label="about-avg",marker='.', linestyle='solid')
plt.plot(window_size_list, [x-y for x, y in zip(about_data, act_data)],label="about-act",marker='*', linestyle='solid')
plt.legend()
plt.show()

水库采样 python 代码

In [10]:

print("数据集平均值",sample_avg)
print("窗口均值",[round(x,1) for x in act_data])
print("样本均值",[round(x,1) for x in about_data])
数据集平均值 33617.245893852654
窗口均值 [32412.3, 32975.2, 33626.7, 33964.0, 33144.3, 32790.1, 32194.3, 32232.4, 32780.0, 33138.3, 33235.6, 32909.2, 33327.4, 32985.6, 32914.2, 33065.9, 33095.3, 33409.6, 33326.2, 33522.5, 33482.3, 33681.4, 33642.5, 33591.8, 33587.1, 33556.4, 33478.9, 33478.4, 33440.7, 33519.4]
样本均值 [31466.2, 36143.5, 32360.0, 32526.5, 32527.0, 33550.1, 33404.6, 30672.1, 28940.6, 34753.0, 26807.3, 34903.1, 33954.1, 36901.0, 33670.3, 35374.3, 29946.4, 36758.6, 32292.8, 35097.7, 35508.4, 36568.6, 36860.9, 38576.6, 31693.9, 36039.2, 27709.3, 32178.3, 30042.9, 29947.7]

In [11]:

# 同窗口大小,不同样本大小,同一时刻
sample_size_list = []
for i in range(1,31):
    sample_size_list.append(i*100)
t = 10000 # 采样时刻
act_data=[] # 精确窗口均值
about_data=[] #估计样本均值
avg_data = []
window_size = 4000 # 窗口大小
sample_avg=Sample_act(t)
for sample_size in sample_size_list:
    window = [] #窗口
    sample = [] #样本
    Sample(t)
    act_data.append(sum(window)/window_size)
    about_data.append(sum(sample)/sample_size)
    avg_data.append(sample_avg)
plt.figure(figsize=(12,4))
plt.plot(sample_size_list, act_data,label="act",color="red",marker='*', linestyle='solid')
plt.plot(sample_size_list, about_data,label="about",marker='.', linestyle='solid')
plt.plot(sample_size_list, avg_data,label="avg",marker='.', linestyle='solid')

# for x, y in zip(sample_size_list, act_data):
#     plt.text(x, y+0.3, '%.0f'%y, ha='center', va='bottom', fontsize=10.5)
# for x, y in zip(sample_size_list, about_data):
#     plt.text(x, y+0.3, '%.0f'%y, ha='center', va='bottom', fontsize=10.5)
    
plt.xlabel("sample_size")
plt.ylabel("value")
plt.legend()
plt.show()

水库采样 python 代码

In [12]:

plt.figure(figsize=(12,4))
plt.plot(sample_size_list, [x-y for x, y in zip(about_data, avg_data)],label="about-avg",marker='.', linestyle='solid')
plt.plot(sample_size_list, [x-y for x, y in zip(about_data, act_data)],label="about-act",marker='*', linestyle='solid')
plt.legend()
plt.show()

水库采样 python 代码

In [13]:

print("数据集平均值",sample_avg)
print("窗口均值",[round(x,1) for x in act_data])
print("样本均值",[round(x,1) for x in about_data])
数据集平均值 33744.862713728624
窗口均值 [34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0, 34638.0]
样本均值 [29512.6, 33148.0, 33611.4, 32632.9, 32833.0, 33798.0, 32803.2, 34182.0, 33009.9, 33992.9, 33222.6, 31968.5, 33685.3, 33823.1, 32863.4, 34882.5, 34322.2, 33041.2, 33794.1, 33614.2, 33628.0, 34141.1, 34611.3, 33833.7, 34260.3, 33715.0, 33355.0, 33704.2, 34208.7, 33379.6]

In [14]:

# 同窗口大小,同样本大小,同一时刻,不同采样次数
sample_times_list = []
for i in range(1,31):
    sample_times_list.append(i*10)
t = 10000 # 采样时刻
act_data=[] # 精确窗口均值
about_data=[] #估计样本均值
avg_data = []
window_size = 300 # 窗口大小
sample_size = 100 # 样本大小
window=[]
Sample(t)
sample_avg=Sample_act(t)
act=sum(window)/window_size#获取窗口均值
for sample_times in sample_times_list:
    about_sum=[]
    for i in range(0,sample_times):
        window=[]
        sample = [] #样本
        Sample(t)
        about_sum.append(sum(sample)/sample_size)
    act_data.append(act)
    about_data.append(sum(about_sum)/sample_times)
    avg_data.append(sample_avg)
plt.figure(figsize=(12,4))
plt.plot(sample_times_list, act_data,label="act",color="red",marker='*', linestyle='solid')
plt.plot(sample_times_list, about_data,label="about",marker='.', linestyle='solid')
plt.plot(sample_times_list, avg_data,label="avg",marker='.', linestyle='solid')

# for x, y in zip(sample_times_list, act_data):
#     plt.text(x, y+0.3, '%.0f'%y, ha='center', va='bottom', fontsize=10.5)
# for x, y in zip(sample_times_list, about_data):
#     plt.text(x, y+0.3, '%.0f'%y, ha='center', va='bottom', fontsize=10.5)
    
plt.xlabel("sample_times")
plt.ylabel("value")
plt.legend()
plt.show()

水库采样 python 代码

In [15]:

plt.figure(figsize=(12,4))
plt.plot(sample_times_list, [x-y for x, y in zip(about_data, avg_data)],label="about-avg",marker='.', linestyle='solid')
plt.plot(sample_times_list, [x-y for x, y in zip(about_data, act_data)],label="about-act",marker='*', linestyle='solid')
plt.legend()
plt.show()

水库采样 python 代码

In [16]:

print("数据集平均值",sample_avg)
print("窗口均值",[round(x,1) for x in act_data])
print("样本均值",[round(x,1) for x in about_data])
数据集平均值 33744.862713728624
窗口均值 [33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5, 33342.5]
样本均值 [33812.9, 34676.2, 33751.4, 34292.9, 33481.6, 34368.8, 33492.2, 33877.9, 34304.4, 34343.2, 33527.0, 34127.0, 33998.1, 33857.7, 34035.1, 33562.9, 34004.3, 34156.2, 34019.0, 33753.2, 33844.5, 33803.4, 34071.6, 33959.4, 34059.3, 34099.3, 34018.9, 33777.3, 33820.8, 33812.4]

相关文章: