[F] Format constants
This commit is contained in:
+4
-3
@@ -7,9 +7,10 @@ from utils import *
|
||||
# Constants (The instructors said that we can use global constants here:
|
||||
# https://piazza.com/class/ksovzjrlsye72f?cid=1664
|
||||
# They should not end with "/"
|
||||
data_dir = './data'
|
||||
tweets_dir = f'{data_dir}/twitter/user-tweets'
|
||||
user_dir = f'{data_dir}/twitter/user'
|
||||
DATA_DIR = './data'
|
||||
TWEETS_DIR = f'{DATA_DIR}/twitter/user-tweets'
|
||||
USER_DIR = f'{DATA_DIR}/twitter/user'
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Load config and create API
|
||||
|
||||
@@ -7,7 +7,7 @@ from dataclasses import dataclass
|
||||
|
||||
from py7zr import SevenZipFile
|
||||
|
||||
from main import data_dir, tweets_dir, user_dir
|
||||
from main import DATA_DIR, TWEETS_DIR, USER_DIR
|
||||
from utils import *
|
||||
|
||||
|
||||
@@ -44,11 +44,11 @@ def process_users() -> None:
|
||||
users = []
|
||||
|
||||
# Loop through all the files
|
||||
for filename in os.listdir(f'{user_dir}/users'):
|
||||
for filename in os.listdir(f'{USER_DIR}/users'):
|
||||
# Only check json files and ignore macos dot files
|
||||
if filename.endswith('.json') and not filename.startswith('.'):
|
||||
# Read
|
||||
user = json.loads(read(f'{user_dir}/users/{filename}'))
|
||||
user = json.loads(read(f'{USER_DIR}/users/{filename}'))
|
||||
|
||||
# Get user language (The problem is, most people's lang field are null, so we have to
|
||||
# look at the language of their latest status as well, while they might not have a
|
||||
@@ -69,7 +69,7 @@ def process_users() -> None:
|
||||
users.sort(key=lambda x: x.popularity, reverse=True)
|
||||
|
||||
# Save data
|
||||
write(f'{user_dir}/processed/users.json', json_stringify(users))
|
||||
write(f'{USER_DIR}/processed/users.json', json_stringify(users))
|
||||
|
||||
|
||||
def load_users() -> list[ProcessedUser]:
|
||||
@@ -78,7 +78,7 @@ def load_users() -> list[ProcessedUser]:
|
||||
|
||||
:return: List of processed users, sorted descending by popularity.
|
||||
"""
|
||||
return [ProcessedUser(*u) for u in json.loads(read(f'{user_dir}/processed/users.json'))]
|
||||
return [ProcessedUser(*u) for u in json.loads(read(f'{USER_DIR}/processed/users.json'))]
|
||||
|
||||
|
||||
def get_user_popularity_ranking(user: str) -> int:
|
||||
@@ -116,7 +116,7 @@ def select_user_sample() -> None:
|
||||
|
||||
:return: None
|
||||
"""
|
||||
file = f'{user_dir}/processed/sample.json'
|
||||
file = f'{USER_DIR}/processed/sample.json'
|
||||
|
||||
# Exists
|
||||
if os.path.isfile(file):
|
||||
@@ -152,7 +152,7 @@ def load_user_sample() -> Sample:
|
||||
|
||||
:return: None
|
||||
"""
|
||||
j = json.loads(read(f'{user_dir}/processed/sample.json'))
|
||||
j = json.loads(read(f'{USER_DIR}/processed/sample.json'))
|
||||
return Sample([ProcessedUser(*u) for u in j['most_popular']],
|
||||
[ProcessedUser(*u) for u in j['random']])
|
||||
|
||||
@@ -185,15 +185,15 @@ def process_tweets() -> None:
|
||||
:return: None
|
||||
"""
|
||||
# Loop through all the files
|
||||
for filename in os.listdir(f'{tweets_dir}/user'):
|
||||
for filename in os.listdir(f'{TWEETS_DIR}/user'):
|
||||
# Only check json files and ignore macos dot files
|
||||
if filename.endswith('.json') and not filename.startswith('.'):
|
||||
# Check if already processed
|
||||
if os.path.isfile(f'{tweets_dir}/processed/{filename}'):
|
||||
if os.path.isfile(f'{TWEETS_DIR}/processed/{filename}'):
|
||||
continue
|
||||
|
||||
# Read
|
||||
tweets = json.loads(read(f'{tweets_dir}/user/{filename}'))
|
||||
tweets = json.loads(read(f'{TWEETS_DIR}/user/{filename}'))
|
||||
p = [Posting(is_covid_related(t['full_text']),
|
||||
t['favorite_count'] + t['retweet_count'],
|
||||
'retweeted_status' in t,
|
||||
@@ -201,7 +201,7 @@ def process_tweets() -> None:
|
||||
for t in tweets]
|
||||
|
||||
# Save data
|
||||
write(f'{tweets_dir}/processed/{filename}', json_stringify(p))
|
||||
write(f'{TWEETS_DIR}/processed/{filename}', json_stringify(p))
|
||||
debug(f'Processed: {filename}')
|
||||
|
||||
|
||||
@@ -213,7 +213,7 @@ def load_tweets(username: str) -> list[Posting]:
|
||||
:return: User's processed tweets
|
||||
"""
|
||||
return [Posting(*p) for p in json.loads(read(
|
||||
os.path.join(tweets_dir, f'processed/{username}.json')))]
|
||||
os.path.join(TWEETS_DIR, f'processed/{username}.json')))]
|
||||
|
||||
|
||||
def is_covid_related(text: str) -> bool:
|
||||
@@ -248,7 +248,7 @@ def pack_data() -> None:
|
||||
|
||||
:return: None
|
||||
"""
|
||||
packed_dir = f'{data_dir}/packed'
|
||||
packed_dir = f'{DATA_DIR}/packed'
|
||||
Path(packed_dir).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Pack data for processed.
|
||||
@@ -259,4 +259,4 @@ def pack_data() -> None:
|
||||
z: SevenZipFile = z
|
||||
for p in processed_dirs:
|
||||
debug(f'- Packing {p}')
|
||||
z.writeall(data_dir + p)
|
||||
z.writeall(DATA_DIR + p)
|
||||
|
||||
@@ -9,7 +9,7 @@ from typing import List
|
||||
import tweepy
|
||||
from tweepy import API, TooManyRequests, User, Tweet, Unauthorized
|
||||
|
||||
from main import tweets_dir, user_dir
|
||||
from main import TWEETS_DIR, USER_DIR
|
||||
from utils import *
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ def download_all_tweets(api: API, screen_name: str,
|
||||
:return: None
|
||||
"""
|
||||
# Ensure directories exist
|
||||
file = f'{tweets_dir}/user/{screen_name}.json'
|
||||
file = f'{TWEETS_DIR}/user/{screen_name}.json'
|
||||
|
||||
# Check if user already exists
|
||||
if os.path.isfile(file):
|
||||
@@ -180,7 +180,7 @@ def download_users_resume_progress(api: API) -> None:
|
||||
:return: None
|
||||
"""
|
||||
# Open file and read
|
||||
meta = json.loads(read(f'{user_dir}/meta/meta.json'))
|
||||
meta = json.loads(read(f'{USER_DIR}/meta/meta.json'))
|
||||
|
||||
# Resume
|
||||
download_users_execute(api, meta['n'],
|
||||
@@ -215,7 +215,7 @@ def download_users_execute(api: API, n: float,
|
||||
print("Executing friends-chain download:")
|
||||
print(f"- n: {n}")
|
||||
print(f"- Requests per minute: 1")
|
||||
print(f"- Directory: {user_dir}")
|
||||
print(f"- Directory: {USER_DIR}")
|
||||
print(f"- Downloaded: {len(downloaded)}")
|
||||
print(f"- Current search set: {len(current_set)}")
|
||||
print(f"- Next search set: {len(next_set)}")
|
||||
@@ -241,7 +241,7 @@ def download_users_execute(api: API, n: float,
|
||||
# This user was not saved, save the user.
|
||||
if user not in downloaded:
|
||||
# Save user json
|
||||
write(f'{user_dir}/users/{user.screen_name}.json', json_stringify(user._json))
|
||||
write(f'{USER_DIR}/users/{user.screen_name}.json', json_stringify(user._json))
|
||||
|
||||
# Add to set
|
||||
downloaded.add(user.screen_name)
|
||||
@@ -281,7 +281,7 @@ def download_users_execute(api: API, n: float,
|
||||
# Update meta info so that downloading can be continued
|
||||
meta = {'downloaded': downloaded, 'done_set': done_set,
|
||||
'current_set': current_set, 'next_set': next_set, 'n': n}
|
||||
write(f'{user_dir}/meta/meta.json', json_stringify(meta))
|
||||
write(f'{USER_DIR}/meta/meta.json', json_stringify(meta))
|
||||
|
||||
debug(f'Finished saving friends of {screen_name}')
|
||||
debug(f'============= Total {len(downloaded)} saved =============')
|
||||
|
||||
Reference in New Issue
Block a user