【发布时间】:2022-05-24 13:24:26
【问题描述】:
我正在尝试提取自 2019-01-01 以来每天的#btc 数量。
我知道错误与权限有关,但我已经在使用从 Twitter 开发人员门户生成的密钥。
这是我的代码,我删除了我的开发者密钥。
# Python Script to Extract tweets of a
# particular Hashtag using Tweepy and Pandas
# import modules
import pandas as pd
import tweepy
# function to display data of each tweet
def printtweetdata(n, ith_tweet):
print()
print(f"Tweet {n}:")
print(f"Username:{ith_tweet[0]}")
print(f"Description:{ith_tweet[1]}")
print(f"Location:{ith_tweet[2]}")
print(f"Following Count:{ith_tweet[3]}")
print(f"Follower Count:{ith_tweet[4]}")
print(f"Total Tweets:{ith_tweet[5]}")
print(f"Retweet Count:{ith_tweet[6]}")
print(f"Tweet Text:{ith_tweet[7]}")
print(f"Hashtags Used:{ith_tweet[8]}")
# function to perform data extraction
def scrape(words, date_since, numtweet):
# Creating DataFrame using pandas
db = pd.DataFrame(columns=['username', 'description', 'location', 'following',
'followers', 'totaltweets', 'retweetcount', 'text', 'hashtags'])
# We are using .Cursor() to search through twitter for the required tweets.
# The number of tweets can be restricted using .items(number of tweets)
tweets = tweepy.Cursor(api.search, q=words, lang="en",
since=date_since, tweet_mode='extended').items(numtweet)
# .Cursor() returns an iterable object. Each item in
# the iterator has various attributes that you can access to
# get information about each tweet
list_tweets = [tweet for tweet in tweets]
# Counter to maintain Tweet Count
i = 1
# we will iterate over each tweet in the list for extracting information about each tweet
for tweet in list_tweets:
username = tweet.user.screen_name
description = tweet.user.description
location = tweet.user.location
following = tweet.user.friends_count
followers = tweet.user.followers_count
totaltweets = tweet.user.statuses_count
retweetcount = tweet.retweet_count
hashtags = tweet.entities['hashtags']
# Retweets can be distinguished by a retweeted_status attribute,
# in case it is an invalid reference, except block will be executed
try:
text = tweet.retweeted_status.full_text
except AttributeError:
text = tweet.full_text
hashtext = list()
for j in range(0, len(hashtags)):
hashtext.append(hashtags[j]['text'])
# Here we are appending all the extracted information in the DataFrame
ith_tweet = [username, description, location, following,
followers, totaltweets, retweetcount, text, hashtext]
db.loc[len(db)] = ith_tweet
# Function call to print tweet data on screen
printtweetdata(i, ith_tweet)
i = i+1
filename = 'scraped_tweets.csv'
# we will save our database as a CSV file.
db.to_csv(filename)
if __name__ == '__main__':
# Enter your own credentials obtained
# from your developer account
consumer_key = ""
consumer_secret = ""
access_key = ""
access_secret = ""
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth)
# Enter Hashtag and initial date
print("Enter Twitter HashTag to search for")
words = input()
print("Enter Date since The Tweets are required in yyyy-mm--dd")
date_since = input()
# number of tweets you want to extract in one run
numtweet = 100
scrape(words, date_since, numtweet)
print('Scraping has completed!')
这是错误:
---------------------------------------------------------------------------
TweepError Traceback (most recent call last)
<ipython-input-4-dee0a1a7784b> in <module>()
98 # number of tweets you want to extract in one run
99 numtweet = 100
--> 100 scrape(words, date_since, numtweet)
101 print('Scraping has completed!')
6 frames
<ipython-input-4-dee0a1a7784b> in scrape(words, date_since, numtweet)
38 # the iterator has various attributes that you can access to
39 # get information about each tweet
---> 40 list_tweets = [tweet for tweet in tweets]
41
42 # Counter to maintain Tweet Count
<ipython-input-4-dee0a1a7784b> in <listcomp>(.0)
38 # the iterator has various attributes that you can access to
39 # get information about each tweet
---> 40 list_tweets = [tweet for tweet in tweets]
41
42 # Counter to maintain Tweet Count
/usr/local/lib/python3.7/dist-packages/tweepy/cursor.py in __next__(self)
49
50 def __next__(self):
---> 51 return self.next()
52
53 def next(self):
/usr/local/lib/python3.7/dist-packages/tweepy/cursor.py in next(self)
241 if self.current_page is None or self.page_index == len(self.current_page) - 1:
242 # Reached end of current page, get the next page...
--> 243 self.current_page = self.page_iterator.next()
244 while len(self.current_page) == 0:
245 self.current_page = self.page_iterator.next()
/usr/local/lib/python3.7/dist-packages/tweepy/cursor.py in next(self)
130
131 if self.index >= len(self.results) - 1:
--> 132 data = self.method(max_id=self.max_id, parser=RawParser(), *self.args, **self.kwargs)
133
134 if hasattr(self.method, '__self__'):
/usr/local/lib/python3.7/dist-packages/tweepy/binder.py in _call(*args, **kwargs)
251 return method
252 else:
--> 253 return method.execute()
254 finally:
255 method.session.close()
/usr/local/lib/python3.7/dist-packages/tweepy/binder.py in execute(self)
232 raise RateLimitError(error_msg, resp)
233 else:
--> 234 raise TweepError(error_msg, resp, api_code=api_error_code)
235
236 # Parse the response payload
TweepError: Twitter error response: status code = 403
【问题讨论】:
-
您的代码对我有用(按预期创建文件)。密钥是否在简单的 oauth 和 tweepy api 调用中工作?您是否设法获得 200 响应?也许只是删除并将密钥重新输入到文件中。也许你不小心在里面找到了一个不知道的角色。
-
不走运,我从这里imgur.com/Y3zExS8 获取消费者密钥和秘密,并从这里imgur.com/KWKmZEN 获取密钥和秘密。对吗?
-
我相信是的。我拿到钥匙已经有一段时间了。可能应用程序未正确启用。可能必须追溯创建它的步骤????