[+] Download tweets function

This commit is contained in:
Hykilpikonna
2021-11-03 18:59:15 -04:00
parent dcb912fae1
commit 2086c5199d
+31 -7
View File
@@ -1,9 +1,13 @@
import json
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from typing import Union
import pytz
import tweepy
from tweepy import API
from tweepy.models import Status
from collect.utils import Config, debug
@@ -68,23 +72,43 @@ def tweepy_login(conf: Config) -> tweepy.API:
return api
def get_user_tweets(api: API, screen_name: str) -> list[Tweet]:
def download_user_tweets(api: API, screen_name: str) -> None:
"""
Get all tweets from a specific individual
Download all tweets from a specific individual to a local folder
:param api: Tweepy API object
:param screen_name: Screen name of that individual
:return: All tweets
:return: None
"""
debug(f'Getting user tweets for {screen_name}')
start_date = pytz.UTC.localize(datetime(2020, 1, 1))
tweets: list[Tweet] = api.user_timeline(screen_name=screen_name, count=200, tweet_mode='extended')
# Get initial 200 tweets
tweets: list[Status] = api.user_timeline(screen_name=screen_name, count=200, tweet_mode='extended', trim_user=True)
# Get additional tweets
while True:
debug(f'- Got {len(tweets)} tweets, getting additional tweets...')
additional_tweets = api.user_timeline(screen_name=screen_name, count=200, tweet_mode='extended', max_id=int(tweets[-1].id_str) - 1)
additional_tweets: list[Status] = api.user_timeline(screen_name=screen_name, count=200, tweet_mode='extended', trim_user=True,
max_id=int(tweets[-1].id_str) - 1)
if len(additional_tweets) == 0:
debug(f'- Got {len(tweets)} tweets, finished because no more tweets are available.')
break
if additional_tweets[-1].created_at < start_date:
debug(f'- Got {len(tweets)} tweets, finished because the earliest tweet in the dataset goes before 2020-01-01.')
break
tweets.extend(additional_tweets)
debug(f'- Got {len(tweets)} tweets, finished.')
return tweets
# Make directory
dir = './data/raw_twitter_users/'
Path(dir).mkdir(parents=True, exist_ok=True)
# Encode json
json_output = json.dumps([t._json for t in tweets], indent=4)
# Store in file
with open(dir + screen_name + '.json', 'w') as f:
f.write(json_output)