[F] Fix pyta errors
This commit is contained in:
@@ -57,7 +57,7 @@ def load_users_popularity(user_dir: str = './data/twitter/user/') -> list[Genera
|
||||
|
||||
# Log progress
|
||||
if len(users) % 2000 == 0:
|
||||
debug(f'load_users_popularity: Loaded {len(users)} users.')
|
||||
debug(f'Loaded {len(users)} users.')
|
||||
|
||||
# Sort by followers count, descending
|
||||
users.sort(key=lambda x: x.popularity, reverse=True)
|
||||
|
||||
+18
-33
@@ -1,3 +1,6 @@
|
||||
"""
|
||||
TODO: Module docstring
|
||||
"""
|
||||
import json
|
||||
import math
|
||||
import random
|
||||
@@ -7,34 +10,15 @@ from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Union
|
||||
|
||||
import python_ta
|
||||
import pytz
|
||||
import tweepy
|
||||
from tweepy import API, TooManyRequests
|
||||
from tweepy import API, TooManyRequests, User
|
||||
|
||||
from process.twitter_process import Posting
|
||||
from utils import Config, debug, json_stringify, load_config, normalize_directory
|
||||
|
||||
|
||||
@dataclass
|
||||
class User:
|
||||
id: int
|
||||
id_str: str
|
||||
name: str
|
||||
screen_name: str
|
||||
location: str
|
||||
description: str
|
||||
url: str
|
||||
entities: dict
|
||||
protected: bool
|
||||
followers_count: int
|
||||
friends_count: int
|
||||
listed_count: int
|
||||
created_at: datetime
|
||||
favourites_count: int
|
||||
verified: bool
|
||||
statuses_count: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class Tweet:
|
||||
created_at: datetime
|
||||
@@ -85,7 +69,6 @@ def download_user_tweets(api: API, screen_name: str) -> None:
|
||||
:return: None
|
||||
"""
|
||||
debug(f'Getting user tweets for {screen_name}')
|
||||
start_date = pytz.UTC.localize(datetime(2020, 1, 1))
|
||||
|
||||
# Get initial 200 tweets
|
||||
tweets = api.user_timeline(screen_name=screen_name, count=200, tweet_mode='extended',
|
||||
@@ -102,12 +85,6 @@ def download_user_tweets(api: API, screen_name: str) -> None:
|
||||
debug(f'- Got {len(tweets)} tweets, finished because no more tweets are available.')
|
||||
break
|
||||
|
||||
if additional_tweets[-1].created_at < start_date:
|
||||
debug(
|
||||
f'- Got {len(tweets)} tweets, finished because the earliest tweet in the dataset '
|
||||
f'goes before 2020-01-01.')
|
||||
break
|
||||
|
||||
tweets.extend(additional_tweets)
|
||||
postings.extend([convert_to_generic(screen_name, t) for t in additional_tweets])
|
||||
|
||||
@@ -182,7 +159,8 @@ def download_users_start(api: API, start_point: str, n: float = math.inf,
|
||||
next_set = set()
|
||||
|
||||
# Start download
|
||||
download_users_execute(api, n, base_dir, rate_limit, downloaded, done_set, current_set, next_set)
|
||||
download_users_execute(api, n, base_dir, rate_limit, downloaded,
|
||||
done_set, current_set, next_set)
|
||||
|
||||
|
||||
def download_users_resume_progress(api: API, base_dir: str = './data/twitter/user/') -> None:
|
||||
@@ -332,7 +310,14 @@ def convert_to_generic(username: str, tweet: Tweet) -> Posting:
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
conf = load_config('config.json5')
|
||||
api = tweepy_login(conf)
|
||||
# download_users_start(api, 'sauricat')
|
||||
download_users_resume_progress(api)
|
||||
python_ta.check_all(config={
|
||||
'extra-imports': [], # the names (strs) of imported modules
|
||||
'allowed-io': [], # the names (strs) of functions that call print/open/input
|
||||
'max-line-length': 100,
|
||||
'disable': ['R1705', 'C0200']
|
||||
})
|
||||
|
||||
# conf = load_config('config.json5')
|
||||
# api = tweepy_login(conf)
|
||||
# # download_users_start(api, 'sauricat')
|
||||
# download_users_resume_progress(api)
|
||||
|
||||
Reference in New Issue
Block a user