【发布时间】:2014-08-14 03:58:45
【问题描述】:
我正在尝试从 .csv 文件中的股票代码列表从 Google 财经下载数据。
这是我试图从 site 改编的课程:
import urllib,time,datetime
import csv
class Quote(object):
DATE_FMT = '%Y-%m-%d'
TIME_FMT = '%H:%M:%S'
def __init__(self):
self.symbol = ''
self.date,self.time,self.open_,self.high,self.low,self.close,self.volume = ([] for _ in range(7))
def append(self,dt,open_,high,low,close,volume):
self.date.append(dt.date())
self.time.append(dt.time())
self.open_.append(float(open_))
self.high.append(float(high))
self.low.append(float(low))
self.close.append(float(close))
self.volume.append(int(volume))
def append_csv(self, filename):
with open(filename, 'a') as f:
f.write(self.to_csv())
def __repr__(self):
return self.to_csv()
def get_symbols(self, filename):
for line in open(filename,'r'):
if line != 'codigo':
print line
q = GoogleQuote(line,'2014-01-01','2014-06-20')
q.append_csv('data.csv')
class GoogleQuote(Quote):
''' Daily quotes from Google. Date format='yyyy-mm-dd' '''
def __init__(self,symbol,start_date,end_date=datetime.date.today().isoformat()):
super(GoogleQuote,self).__init__()
self.symbol = symbol.upper()
start = datetime.date(int(start_date[0:4]),int(start_date[5:7]),int(start_date[8:10]))
end = datetime.date(int(end_date[0:4]),int(end_date[5:7]),int(end_date[8:10]))
url_string = "http://www.google.com/finance/historical?q={0}".format(self.symbol)
url_string += "&startdate={0}&enddate={1}&output=csv".format(
start.strftime('%b %d, %Y'),end.strftime('%b %d, %Y'))
csv = urllib.urlopen(url_string).readlines()
csv.reverse()
for bar in xrange(0,len(csv)-1):
try:
#ds,open_,high,low,close,volume = csv[bar].rstrip().split(',')
#open_,high,low,close = [float(x) for x in [open_,high,low,close]]
#dt = datetime.datetime.strptime(ds,'%d-%b-%y')
#self.append(dt,open_,high,low,close,volume)
data = csv[bar].rstrip().split(',')
dt = datetime.datetime.strftime(data[0],'%d-%b-%y')
close = data[4]
self.append(dt,close)
except:
print "error " + str(len(csv)-1)
print "error " + csv[bar]
if __name__ == '__main__':
q = Quote() # create a generic quote object
q.get_symbols('list.csv')
但是,对于某些引号,代码不会返回所有数据(例如 BIOM3),某些字段返回为“-”。在这些情况下,我该如何处理拆分? 最后,在脚本的某个时刻,它停止下载数据,因为脚本停止,它不返回任何消息。我该如何处理这个问题?
【问题讨论】:
-
你看过
pandas库吗?见this -
@eswarp25 我正在尝试在 BOVESPA 公司中使用 pandas,但它不起作用。你能试一下吗? (例如 ABRE11、BIOM3)
标签: python pandas web-scraping