[+] Create script to pack for markus submission

This commit is contained in:
Hykilpikonna
2021-12-13 18:36:32 -05:00
parent b7e11cb45b
commit 84bfca5e62
+46 -11
View File
@@ -5,6 +5,8 @@ users, creating samples of users, filtering news channels, and processing tweets
import json
import os
import random
import sys
import zipfile
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
@@ -322,25 +324,58 @@ def is_covid_related(text: str) -> bool:
def pack_data() -> None:
"""
This function packs processed data and raw data separately.
This function packs processed data and raw data separately, and it also packs the data ready for
submission on MarkUs
:return: None
"""
packed_dir = f'{DATA_DIR}/packed'
Path(packed_dir).mkdir(parents=True, exist_ok=True)
packed_data = f'{packed_dir}/processed.7z'
packed_res = f'{packed_dir}/resources.7z'
# Pack data for processed.
debug('Packing data...')
processed_dirs = ['/twitter/user/meta', '/twitter/user/processed',
'/twitter/user-tweets/processed']
with SevenZipFile(f'{packed_dir}/processed.7z', 'w') as z:
z: SevenZipFile = z
for p in processed_dirs:
debug(f'- Packing {p}')
z.writeall(DATA_DIR + p)
# Pack processed data (Since packing this takes a long time, we decided to not overwrite it for
# every run. This is also because the processed data hasn't changed since Nov 28 when the
# project is mostly finished, and there is no need to re-pack data every time, whereas the
# resources and sources might change after every update) So, delete the packed 7z file if you
# want to repack.
if not os.path.isfile(packed_data):
debug('Packing data...')
processed_dirs = ['/twitter/user/meta', '/twitter/user/processed',
'/twitter/user-tweets/processed']
with SevenZipFile(packed_data, 'w') as z:
z: SevenZipFile = z
for p in processed_dirs:
debug(f'- Packing {p}')
z.writeall(DATA_DIR + p)
# Pack resources
debug('Packing resources...')
with SevenZipFile(f'{packed_dir}/resources.7z', 'w') as z:
with SevenZipFile(packed_res, 'w') as z:
z: SevenZipFile = z
z.writeall(RES_DIR)
# Pack MarkUs submission
# Even though 7zip has much better compression rate than zip, MarkUs only supports zip.
debug('Packing source code...')
with zipfile.ZipFile(f'{packed_dir}/markus.zip', 'w') as zf:
z: zipfile.ZipFile = zf
# Add sources
src_path = Path(os.path.realpath(__file__)).parent
for f in os.listdir(src_path):
if not os.path.isdir(f):
z.write(f)
# Add packed resource
z.write(packed_res, 'resources.7z')
# Add report tex
z.write(os.path.join(src_path, '../writing/report/project_report.tex'), 'project_report.tex')
z.write(os.path.join(src_path, '../writing/report/project_report.pdf'), 'project_report.pdf')
# Open packed location (Since there isn't a platform-independent way of doing this, we currently
# only support macOS)
if sys.platform == 'darwin':
os.system(f'open {Path(packed_dir).absolute()}')