[F] Format constants

This commit is contained in:
Hykilpikonna
2021-11-24 10:31:28 -05:00
parent df02c0ba51
commit 815eab8cf1
3 changed files with 24 additions and 23 deletions
+4 -3
View File
@@ -7,9 +7,10 @@ from utils import *
# Constants (The instructors said that we can use global constants here:
# https://piazza.com/class/ksovzjrlsye72f?cid=1664
# They should not end with "/"
data_dir = './data'
tweets_dir = f'{data_dir}/twitter/user-tweets'
user_dir = f'{data_dir}/twitter/user'
DATA_DIR = './data'
TWEETS_DIR = f'{DATA_DIR}/twitter/user-tweets'
USER_DIR = f'{DATA_DIR}/twitter/user'
if __name__ == '__main__':
# Load config and create API
+14 -14
View File
@@ -7,7 +7,7 @@ from dataclasses import dataclass
from py7zr import SevenZipFile
from main import data_dir, tweets_dir, user_dir
from main import DATA_DIR, TWEETS_DIR, USER_DIR
from utils import *
@@ -44,11 +44,11 @@ def process_users() -> None:
users = []
# Loop through all the files
for filename in os.listdir(f'{user_dir}/users'):
for filename in os.listdir(f'{USER_DIR}/users'):
# Only check json files and ignore macos dot files
if filename.endswith('.json') and not filename.startswith('.'):
# Read
user = json.loads(read(f'{user_dir}/users/{filename}'))
user = json.loads(read(f'{USER_DIR}/users/{filename}'))
# Get user language (The problem is, most people's lang field are null, so we have to
# look at the language of their latest status as well, while they might not have a
@@ -69,7 +69,7 @@ def process_users() -> None:
users.sort(key=lambda x: x.popularity, reverse=True)
# Save data
write(f'{user_dir}/processed/users.json', json_stringify(users))
write(f'{USER_DIR}/processed/users.json', json_stringify(users))
def load_users() -> list[ProcessedUser]:
@@ -78,7 +78,7 @@ def load_users() -> list[ProcessedUser]:
:return: List of processed users, sorted descending by popularity.
"""
return [ProcessedUser(*u) for u in json.loads(read(f'{user_dir}/processed/users.json'))]
return [ProcessedUser(*u) for u in json.loads(read(f'{USER_DIR}/processed/users.json'))]
def get_user_popularity_ranking(user: str) -> int:
@@ -116,7 +116,7 @@ def select_user_sample() -> None:
:return: None
"""
file = f'{user_dir}/processed/sample.json'
file = f'{USER_DIR}/processed/sample.json'
# Exists
if os.path.isfile(file):
@@ -152,7 +152,7 @@ def load_user_sample() -> Sample:
:return: None
"""
j = json.loads(read(f'{user_dir}/processed/sample.json'))
j = json.loads(read(f'{USER_DIR}/processed/sample.json'))
return Sample([ProcessedUser(*u) for u in j['most_popular']],
[ProcessedUser(*u) for u in j['random']])
@@ -185,15 +185,15 @@ def process_tweets() -> None:
:return: None
"""
# Loop through all the files
for filename in os.listdir(f'{tweets_dir}/user'):
for filename in os.listdir(f'{TWEETS_DIR}/user'):
# Only check json files and ignore macos dot files
if filename.endswith('.json') and not filename.startswith('.'):
# Check if already processed
if os.path.isfile(f'{tweets_dir}/processed/{filename}'):
if os.path.isfile(f'{TWEETS_DIR}/processed/{filename}'):
continue
# Read
tweets = json.loads(read(f'{tweets_dir}/user/{filename}'))
tweets = json.loads(read(f'{TWEETS_DIR}/user/{filename}'))
p = [Posting(is_covid_related(t['full_text']),
t['favorite_count'] + t['retweet_count'],
'retweeted_status' in t,
@@ -201,7 +201,7 @@ def process_tweets() -> None:
for t in tweets]
# Save data
write(f'{tweets_dir}/processed/{filename}', json_stringify(p))
write(f'{TWEETS_DIR}/processed/{filename}', json_stringify(p))
debug(f'Processed: {filename}')
@@ -213,7 +213,7 @@ def load_tweets(username: str) -> list[Posting]:
:return: User's processed tweets
"""
return [Posting(*p) for p in json.loads(read(
os.path.join(tweets_dir, f'processed/{username}.json')))]
os.path.join(TWEETS_DIR, f'processed/{username}.json')))]
def is_covid_related(text: str) -> bool:
@@ -248,7 +248,7 @@ def pack_data() -> None:
:return: None
"""
packed_dir = f'{data_dir}/packed'
packed_dir = f'{DATA_DIR}/packed'
Path(packed_dir).mkdir(parents=True, exist_ok=True)
# Pack data for processed.
@@ -259,4 +259,4 @@ def pack_data() -> None:
z: SevenZipFile = z
for p in processed_dirs:
debug(f'- Packing {p}')
z.writeall(data_dir + p)
z.writeall(DATA_DIR + p)
+6 -6
View File
@@ -9,7 +9,7 @@ from typing import List
import tweepy
from tweepy import API, TooManyRequests, User, Tweet, Unauthorized
from main import tweets_dir, user_dir
from main import TWEETS_DIR, USER_DIR
from utils import *
@@ -65,7 +65,7 @@ def download_all_tweets(api: API, screen_name: str,
:return: None
"""
# Ensure directories exist
file = f'{tweets_dir}/user/{screen_name}.json'
file = f'{TWEETS_DIR}/user/{screen_name}.json'
# Check if user already exists
if os.path.isfile(file):
@@ -180,7 +180,7 @@ def download_users_resume_progress(api: API) -> None:
:return: None
"""
# Open file and read
meta = json.loads(read(f'{user_dir}/meta/meta.json'))
meta = json.loads(read(f'{USER_DIR}/meta/meta.json'))
# Resume
download_users_execute(api, meta['n'],
@@ -215,7 +215,7 @@ def download_users_execute(api: API, n: float,
print("Executing friends-chain download:")
print(f"- n: {n}")
print(f"- Requests per minute: 1")
print(f"- Directory: {user_dir}")
print(f"- Directory: {USER_DIR}")
print(f"- Downloaded: {len(downloaded)}")
print(f"- Current search set: {len(current_set)}")
print(f"- Next search set: {len(next_set)}")
@@ -241,7 +241,7 @@ def download_users_execute(api: API, n: float,
# This user was not saved, save the user.
if user not in downloaded:
# Save user json
write(f'{user_dir}/users/{user.screen_name}.json', json_stringify(user._json))
write(f'{USER_DIR}/users/{user.screen_name}.json', json_stringify(user._json))
# Add to set
downloaded.add(user.screen_name)
@@ -281,7 +281,7 @@ def download_users_execute(api: API, n: float,
# Update meta info so that downloading can be continued
meta = {'downloaded': downloaded, 'done_set': done_set,
'current_set': current_set, 'next_set': next_set, 'n': n}
write(f'{user_dir}/meta/meta.json', json_stringify(meta))
write(f'{USER_DIR}/meta/meta.json', json_stringify(meta))
debug(f'Finished saving friends of {screen_name}')
debug(f'============= Total {len(downloaded)} saved =============')