如何告诉 matplotlib 除了从 1 到 1000 的数字之外不要考虑其他任何东西？答案

【问题标题】：How to tell matplotlib to do not consider anything other than numbers from 1 to 1000?如何告诉 matplotlib 除了从 1 到 1000 的数字之外不要考虑其他任何东西？
【发布时间】：2017-05-17 20:45:08
【问题描述】：

这里是菜鸟。我有这个脚本，它每 5 分钟从一个刷新的 .csv 文件中绘制一次我的数据数据。问题是 .csv 数据中有时存在错误。也许是信件，也许是其他一些东西。如何告诉 matplotlib 不要考虑任何不是 1 到 1000 的数字？我将在下面粘贴我的代码。谢谢。

import matplotlib
matplotlib.use('Agg')
from matplotlib import pyplot, dates
from matplotlib.dates import HourLocator, DateFormatter, DayLocator, 
YearLocator, MinuteLocator
from csv import reader
from dateutil import parser
import os
import time
import pylab
import datetime
from datetime import datetime, date
os.chdir('/home/pi/csvdata')

time.sleep(30)
def plotloop():
hours = (HourLocator())
minutes = (MinuteLocator())
days = (DayLocator())
dayFormatter = DateFormatter('%X %x')      # e.g., 12
for plotinsideloop in range(300000):
    dated_files = [(os.path.getmtime(fn), os.path.basename(fn)) 
        for fn in os.listdir("/home/pi/csvdata") if 
fn.lower().endswith('.csv')]
    dated_files.sort()
    dated_files.reverse()
    newest = dated_files[0][1]
    with open(newest) as f:
        data = list(reader(f))
    humidity = [i[1] for i in data]
    dates1 = [i[0] for i in data]
    dates = [datetime.strptime(i, '%X %x') for i in dates1]
    #print dates
    print dates1[0], dates1[-1]
    fig, ax = pyplot.subplots()
    fig.subplots_adjust(bottom=0.2)
    ax.xaxis.set_major_locator(days)
    ax.xaxis.set_minor_locator(hours)
    ax.xaxis.set_major_formatter(dayFormatter)
    firstdate = (dates[0])
    firstdate1 = str(firstdate)#[:10]
    print "_______"
    #print firstdate
    lastdate = (dates[-1])
    lastdate1 = str(lastdate)
    lastdate2 = lastdate1.replace(" ", " ")
    firstdate2 = firstdate1.replace(" ", " ")
    lastdate3 = lastdate2.replace(":", " ")
    firstdate3 = firstdate2.replace(":", " ")
    lastdate4 = lastdate3.replace("-", " ")
    firstdate4 = firstdate3.replace("-", " ")
    lastdate5 = lastdate4.split(" ")
    firstdate5 = firstdate4.split(" ")
    print lastdate4
    print firstdate4
    firstdate6 = map(int, firstdate5)
    lastdate6 = map(int, lastdate5)
    #lastdate6 = [int(z) for z in lastdate5]
    #firstdate6 = [int(v) for v in firstdate5]
#    firstdatey = int.firstdate4[0]
##        firstdatem
##        firstdated
##        firstdateh
##        firstdatemin
##        firstdatesec

    print lastdate6
    print firstdate6
    titlename = (firstdate1, " - ", lastdate1)
    print titlename
    #print lastdate
    ax.set_xlim(datetime (*firstdate6), datetime (*lastdate6))
    pyplot.ylim(10,50) 
    ax.xaxis_date()
    ax.autoscale_view()
    pyplot.setp(pyplot.gca().get_xticklabels(), rotation=45, 
horizontalalignment='right')
    pyplot.xticks(rotation=15)
    pyplot.plot_date(dates, humidity)
    pyplot.title(titlename)
    pyplot.savefig(newest + '_2.png', dpi=260)
    pyplot.savefig("plot_2.png", dpi=260)
    #pyplot.savefig("test.pdf")
    print ("Done")
    print(lastdate)
    time.sleep(300)



plotloop()

这里是挂起的更新代码：

import matplotlib
matplotlib.use('Agg')
from matplotlib import pyplot, dates
from matplotlib.dates import HourLocator, DateFormatter, DayLocator, YearLocator, MinuteLocator
from csv import reader
from dateutil import parser
import os
import time
import pylab
import datetime
from datetime import datetime, date
os.chdir('/home/pi/csvdata')

#time.sleep(30)
def plotloop():
        hours = (HourLocator())
        minutes = (MinuteLocator())
        days = (DayLocator())
        dayFormatter = DateFormatter('%X %x')      # e.g., 12
        for plotinsideloop in range(300000):
                dated_files = [(os.path.getmtime(fn), os.path.basename(fn)) 
                        for fn in os.listdir("/home/pi/csvdata") if fn.lower().endswith('.csv')]
                dated_files.sort()
                dated_files.reverse()
                newest = dated_files[0][1]
                with open(newest) as f:
                        data = list(reader(f))

        humidity = [i[1] for i in data]
        dates1 = [i[0] for i in data]
        humdates = zip(humidity,dates1)
        humdatesfiltered = []
        for humdate in humdates:
            try:
                if 1 <= humdate[0] <= 1000:
                    humdatesfiltered.append(humdate)
            except TypeError:
                pass 

        dates = [datetime.strptime(i, '%X %x') for i in dates1]
        #print dates
        print dates1[0], dates1[-1]
        fig, ax = pyplot.subplots()
        fig.subplots_adjust(bottom=0.2)
        ax.xaxis.set_major_locator(days)
        ax.xaxis.set_minor_locator(hours)
        ax.xaxis.set_major_formatter(dayFormatter)
        firstdate = dates[0]
        lastdate = dates[-1]
        print "_______"

        titlename = (firstdate1.strftime('%Y-%m-%d'), " - ", lastdate1.strftime('%Y-%m-%d'))
        print titlename

        ax.set_xlim(firstdate1, lastdate1)
        pyplot.ylim(10,50) 
        ax.xaxis_date()
        ax.autoscale_view()
        pyplot.setp(pyplot.gca().get_xticklabels(), rotation=45, horizontalalignment='right')
        pyplot.xticks(rotation=15)
        pyplot.plot_date([i[1] for i in humdatesfiltered], 
                         [i[0] for i in humdatesfiltered])
        pyplot.title(titlename)
        pyplot.savefig(newest + '_2.png', dpi=260)
        pyplot.savefig("plot_2.png", dpi=260)
        #pyplot.savefig("test.pdf")
        print "Done"
        print lastdate
       #       time.sleep(300)




plotloop()

【问题讨论】：

for plotinsideloop in range(300000): 的这一行是什么？
这就是如何让它循环很长时间以不断刷新情节
好的，我明白了。然后您需要将data = list(reader(f)) 之后的代码缩进到与with open(newest) as f: 相同的级别
请看我编辑的答案。
@GamerDude 欢迎来到 SO。请下次展示您为解决您面临的问题所做的一些工作，并说出您遇到的其他问题。此外，使您的代码尽可能简洁将使更多人花时间阅读它，了解错误并为您提供帮助。

标签： python csv matplotlib

【解决方案1】：

一个问题是您想要删除那些有问题的值，但您不能只从其中一个列表推导中删除它们，因为这样您的两个列表将具有不同数量的元素。

您可以做的是zip() 将两个列表放在一起，然后删除有问题的值。即使湿度列表中有字母，下面也会这样做。

def plotloop():
    hours = (HourLocator())
    minutes = (MinuteLocator())
    days = (DayLocator())
    dayFormatter = DateFormatter('%X %x')      # e.g., 12
    for plotinsideloop in range(300000):
        dated_files = [(os.path.getmtime(fn), os.path.basename(fn)) 
                for fn in os.listdir("/home/pi/csvdata") if fn.lower().endswith('.csv')]
        dated_files.sort()
        dated_files.reverse()
        newest = dated_files[0][1]
        with open(newest) as f:
                data = list(reader(f))

        humidity = [i[1] for i in data]
        dates1 = [i[0] for i in data]
        humdates = zip(humidity,dates1)
        humdatesfiltered = []
        for humdate in humdates:
            try:
                if 1 <= humdate[0] <= 1000:
                    humdatesfiltered.append(humdate)
            except TypeError:
                pass 

        dates = [datetime.strptime(i, '%X %x') for i in dates1]
        #print dates
        print dates1[0], dates1[-1]
        fig, ax = pyplot.subplots()
        fig.subplots_adjust(bottom=0.2)
        ax.xaxis.set_major_locator(days)
        ax.xaxis.set_minor_locator(hours)
        ax.xaxis.set_major_formatter(dayFormatter)
        firstdate = dates[0]
        lastdate = dates[-1]
        print "_______"

        titlename = (firstdate1.strftime('%Y-%m-%d'), " - ", lastdate1.strftime('%Y-%m-%d'))
        print titlename

        ax.set_xlim(firstdate1, lastdate1)
        pyplot.ylim(10,50) 
        ax.xaxis_date()
        ax.autoscale_view()
        pyplot.setp(pyplot.gca().get_xticklabels(), rotation=45, horizontalalignment='right')
        pyplot.xticks(rotation=15)
        pyplot.plot_date([i[1] for i in humdatesfiltered], 
                         [i[0] for i in humdatesfiltered])
        pyplot.title(titlename)
        pyplot.savefig(newest + '_2.png', dpi=260)
        pyplot.savefig("plot_2.png", dpi=260)
        #pyplot.savefig("test.pdf")
        print "Done"
        print lastdate
        #       time.sleep(300)    

plotloop()

【讨论】：

非常感谢您的回答。但是当我尝试运行代码时，“打印标题名”出现语法错误。如果我将其注释掉，我会在下一行 "ax.set_xlim(firstdate1, lastdate1)" 得到语法错误。我不明白这是怎么回事
@GamerDude：啊，缺少一个关闭括号。请立即尝试。
非常感谢。它似乎已经解决了这个问题，但是现在脚本只是挂起了 100% 的 cpu 使用率，不确定它在计算什么，但它需要很长时间。 Shell 中什么也没有发生
旋转前的最后一个print 语句是什么？
它不打印任何内容，在 Shell 中不执行任何操作

【解决方案2】：

这是一个重组后的版本，可能更易于使用：

import csv
import datetime
import glob
import matplotlib
matplotlib.use('Agg')    # has to be called before pyplot is imported
import matplotlib.pyplot as plt
from matplotlib.dates import HourLocator, DateFormatter, DayLocator, YearLocator
import os
# import pylab
from time import sleep

CSV_DIR       = "/home/pi/csvdata"
DATE_FORMAT   = "%X %x"         # ex "07:06:05 09/30/13"
DAY_FORMAT    = "%Y-%m-%d"      # ex "2013-09-30"
INITIAL_DELAY = 30
REDRAW_DELAY  = 300

DEBUG = True
if DEBUG:
    def debug_print(s):
        print(s)
else:
    def debug_print(s):
        pass

def wait(i):
    debug_print("Waiting for {} seconds".format(i))
    sleep(i)

def get_newest_file(dir, ext=None):
    debug_print("Finding newest file")
    if ext is None:
        # no ext given - match all file extensions
        filespec = "*"
    elif ext[:1] == ".":
        # ext starts with period - don't repeat it
        filespec = "*" + ext
    else:
        # no period - insert one
        filespec = "*." + ext

    # make full search path
    path = os.path.join(dir, filespec)
    # get all matching files
    file_names = glob.glob(path)

    if not file_names:
        # no matching files found
        debug_print("  nothing found")
        return None
    else:
        # find newest file
        newest = max(file_names, key = os.path.getmtime)
        debug_print("  found {}".format(newest))
        return newest

def get_humidity_data(csv_fname):
    hum = []
    dat = []
    parsetime = datetime.datetime.strptime
    debug_print("Reading data")
    good, skip = 0, 0
    with open(csv_fname, "r") as csv_file:
        for row in csv.reader(csv_file):
            try:
                h = int(row[1])
                d = parsetime(row[0], DATE_FORMAT)
                if 1 <= h <= 1000:
                    hum.append(h)
                    dat.append(d)
                    good += 1
                else:
                    skip += 1
            except ValueError:
                skip += 1
    debug_print("  found {} good rows, skipped {} bad rows".format(good, skip))
    return hum, dat

def make_graph(humidities, dates_):
    firstdate = dates_[0]
    lastdate = dates_[-1]
    graph_title = "{} - {}".format(firstdate.strftime(DAY_FORMAT), lastdate.strftime(DAY_FORMAT))
    debug_print("Making graph for {}".format(graph_title))

    # create new plot
    fig, ax = plt.subplots()
    fig.subplots_adjust(bottom = 0.2)
    # configure axes
    ax.xaxis_date()
    ax.xaxis.set_major_locator(DayLocator())
    ax.xaxis.set_minor_locator(HourLocator())
    ax.xaxis.set_major_formatter(DateFormatter(DATE_FORMAT))
    ax.set_xlim(firstdate, lastdate)
    ax.set_ylim(10,50)
    ax.autoscale_view()
    plt.setp(plt.gca().get_xticklabels(), rotation=45, horizontalalignment='right')
    plt.xticks(rotation=15)
    # add data
    plt.plot_date(dates_, humidities)
    plt.title(graph_title)
    return fig

def plot_humidity():
    newest = get_newest_file(CSV_DIR, "csv")
    if newest is None:
        debug_print("No data file found!")
    else:
        h, d = get_humidity_data(newest)
        fig = make_graph(h, d)
        fig.savefig(newest + '_2.png', dpi=260)
        # fig.savefig("plot_2.png", dpi=260)
        # pyplot.savefig("test.pdf")
        debug_print("Done")

def main():
    wait(INITIAL_DELAY)
    while True:
        plot_humidity()
        wait(REDRAW_DELAY)

if __name__=="__main__":
    main()

【讨论】：

非常感谢。我不得不将 csf 更改为 csv，并且没有定义“坏”，所以我去掉了它，但它工作得很好。代码看起来很漂亮。非常感谢
抱歉，修正了两个错别字。
一个问题，我不太明白，它是每次都检查新文件还是只在脚本启动时检查？
main() 每五分钟调用一次plot_humidity()；每次，plot_humidity() 都会找到最新的文件，加载数据并绘制它。