181 lines
4.5 KiB
Python
181 lines
4.5 KiB
Python
from __future__ import annotations
|
|
|
|
import dataclasses
|
|
import datetime
|
|
import hashlib
|
|
import io
|
|
import json
|
|
import pickle
|
|
from pathlib import Path
|
|
from types import SimpleNamespace
|
|
from typing import Any
|
|
|
|
|
|
def pickle_encode(obj: Any, protocol=None, fix_imports=True) -> bytes:
|
|
"""
|
|
Encode object to pickle bytes
|
|
|
|
>>> by = pickle_encode({'function': pickle_encode})
|
|
>>> len(by)
|
|
57
|
|
>>> decoded = pickle_decode(by)
|
|
>>> by = decoded['function']({'meow': 565656})
|
|
>>> pickle_decode(by)
|
|
{'meow': 565656}
|
|
"""
|
|
with io.BytesIO() as bio:
|
|
pickle.dump(obj, bio, protocol=protocol, fix_imports=fix_imports)
|
|
return bio.getvalue()
|
|
|
|
|
|
def pickle_decode(by: bytes) -> Any:
|
|
"""
|
|
Decode pickle bytes to object
|
|
"""
|
|
with io.BytesIO(by) as bio:
|
|
return pickle.load(bio)
|
|
|
|
|
|
class EnhancedJSONEncoder(json.JSONEncoder):
|
|
"""
|
|
An improvement to the json.JSONEncoder class, which supports:
|
|
encoding for dataclasses, encoding for datetime, and sets
|
|
"""
|
|
|
|
def default(self, o: object) -> object:
|
|
|
|
# Support encoding dataclasses
|
|
# https://stackoverflow.com/a/51286749/7346633
|
|
if dataclasses.is_dataclass(o):
|
|
return dataclasses.asdict(o)
|
|
|
|
# Simple namespace
|
|
if isinstance(o, SimpleNamespace):
|
|
return o.__dict__
|
|
|
|
# Support encoding datetime
|
|
if isinstance(o, (datetime.datetime, datetime.date)):
|
|
return o.isoformat()
|
|
|
|
# Support for sets
|
|
# https://stackoverflow.com/a/8230505/7346633
|
|
if isinstance(o, set):
|
|
return list(o)
|
|
|
|
return super().default(o)
|
|
|
|
|
|
def json_stringify(obj: object, **kwargs) -> str:
|
|
"""
|
|
Serialize json string with support for dataclasses and datetime and sets and with custom
|
|
configuration.
|
|
|
|
Preconditions:
|
|
- obj != None
|
|
|
|
:param obj: Objects
|
|
:return: Json strings
|
|
"""
|
|
args = dict(ensure_ascii=False, cls=EnhancedJSONEncoder)
|
|
args.update(kwargs)
|
|
return json.dumps(obj, **args)
|
|
|
|
|
|
def jsn(s: str) -> SimpleNamespace:
|
|
return json.loads(s, object_hook=lambda d: SimpleNamespace(**d))
|
|
|
|
|
|
def ensure_dir(path: Path | str) -> Path:
|
|
"""
|
|
Ensure that the directory exists (and create if not)
|
|
|
|
:returns The directory
|
|
"""
|
|
path = Path(path)
|
|
path.mkdir(parents=True, exist_ok=True)
|
|
return path
|
|
|
|
|
|
def ensure_parent(path: Path | str) -> Path:
|
|
"""
|
|
Ensure that the parent directory of a path exists (and create if not)
|
|
|
|
:return: The directory
|
|
"""
|
|
path = Path(path)
|
|
ensure_dir(path.parent)
|
|
return path
|
|
|
|
|
|
def write(fp: Path | str, data: bytes | str):
|
|
"""
|
|
Make sure the directory exists, and then write data, either in bytes or string.
|
|
|
|
Also forces utf-8 encoding for strings.
|
|
"""
|
|
fp = ensure_parent(fp)
|
|
|
|
if isinstance(data, str):
|
|
return fp.write_text(data, 'utf-8')
|
|
if isinstance(data, bytes):
|
|
return fp.write_bytes(data)
|
|
|
|
|
|
def read(file: Path | str) -> str:
|
|
"""
|
|
Read file content, force utf-8
|
|
|
|
:param file: File path
|
|
:return: File content
|
|
"""
|
|
return Path(file).read_text('utf-8')
|
|
|
|
|
|
def write_json(fp: Path | str, data: Any):
|
|
write(fp, json_stringify(data))
|
|
|
|
|
|
def parse_date_time(iso: str) -> datetime.datetime:
|
|
"""
|
|
Parse date faster. Running 1,000,000 trials, this parse_date function is 4.03 times faster than
|
|
python's built-in dateutil.parser.isoparse() function.
|
|
|
|
Preconditions:
|
|
- iso is the output of datetime.isoformat() (In a format like "2021-10-20T23:50:14")
|
|
- iso is a valid date (this function does not check for the validity of the input)
|
|
|
|
:param iso: Input date
|
|
:return: Datetime object
|
|
"""
|
|
return datetime.datetime(int(iso[:4]), int(iso[5:7]), int(iso[8:10]),
|
|
int(iso[11:13]), int(iso[14:16]), int(iso[17:19]))
|
|
|
|
|
|
def parse_date_only(iso: str) -> datetime.datetime:
|
|
"""
|
|
Parse date faster.
|
|
|
|
Preconditions:
|
|
- iso starts with the format of "YYYY-MM-DD" (e.g. "2021-10-20" or "2021-10-20T10:04:14")
|
|
- iso is a valid date (this function does not check for the validity of the input)
|
|
|
|
:param iso: Input date
|
|
:return: Datetime object
|
|
"""
|
|
return datetime.datetime(int(iso[:4]), int(iso[5:7]), int(iso[8:10]))
|
|
|
|
|
|
def md5(file: Path | str) -> str:
|
|
"""
|
|
Compute md5 of a file
|
|
|
|
:param file: File path
|
|
:return: md5 string
|
|
"""
|
|
file = Path(file)
|
|
hash_md5 = hashlib.md5()
|
|
with open(file, "rb") as f:
|
|
for chunk in iter(lambda: f.read(4096), b""):
|
|
hash_md5.update(chunk)
|
|
return hash_md5.hexdigest()
|