Compare commits
40 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| dbbb45d2fe | |||
| 8b51af821f | |||
| 1b7e347de0 | |||
| 304f499fbf | |||
| 8caebbb36d | |||
| 884686a1cb | |||
| a3f475224b | |||
| 6952b160f1 | |||
| 52fcbfc205 | |||
| 27e3f92186 | |||
| cd3051a1b1 | |||
| c6bb2b5207 | |||
| 179f9ac5a6 | |||
| a732f31ae7 | |||
| b926d6253c | |||
| b9ed726caa | |||
| 46ea72641f | |||
| 62929dd48a | |||
| 25e319d898 | |||
| 6291d178d4 | |||
| 87a46fcf28 | |||
| df16f90a8f | |||
| 25aecabd34 | |||
| 332a63479e | |||
| b748a217a0 | |||
| afaef06f40 | |||
| 1948ff4a9c | |||
| 80bf1da83d | |||
| 6a60712d8c | |||
| 0530d41f42 | |||
| f6aa847368 | |||
| cb6aff290d | |||
| 1325224fd8 | |||
| 04f987cab8 | |||
| 156562f5a3 | |||
| 26d756e628 | |||
| e0b2ef63b7 | |||
| 374aedabb6 | |||
| c28ca20edc | |||
| 2480f4e690 |
@@ -116,3 +116,5 @@ dmypy.json
|
|||||||
# Custom
|
# Custom
|
||||||
.idea
|
.idea
|
||||||
HyPyUtils.iml
|
HyPyUtils.iml
|
||||||
|
.DS_Store
|
||||||
|
._*
|
||||||
|
|||||||
@@ -1,2 +1,21 @@
|
|||||||
# HyPyUtils
|
# HyPyUtils
|
||||||
HyDEV Utils for Python
|
HyDEV Utils for Python
|
||||||
|
|
||||||
|
`pip install hypy_utils`
|
||||||
|
|
||||||
|
## Modules
|
||||||
|
|
||||||
|
Some modules have extra requirements that are not installed along with hypy_utils. These are listed below:
|
||||||
|
|
||||||
|
| Module | Requirements |
|
||||||
|
|--------------------|--------------------------|
|
||||||
|
| `tqdm_utils` | tqdm |
|
||||||
|
| `downloader` | tqdm, requests |
|
||||||
|
| `scientific_utils` | numpy, numba, matplotlib |
|
||||||
|
| `git_utils` | dateutil |
|
||||||
|
|
||||||
|
## BadBlocks - HDD sector scanning for Linux
|
||||||
|
|
||||||
|
Usage: `python3 -m hypy_utils.badblocks -d /dev/sda`
|
||||||
|
|
||||||
|

|
||||||
|
|||||||
Binary file not shown.
|
After Width: | Height: | Size: 832 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 539 KiB |
+35
-1
@@ -1,14 +1,18 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
__version__ = "1.0.13"
|
__version__ = "1.0.29"
|
||||||
|
|
||||||
import time
|
import time
|
||||||
|
import logging
|
||||||
from typing import Callable
|
from typing import Callable
|
||||||
|
|
||||||
from .color_utils import *
|
from .color_utils import *
|
||||||
from .serializer import *
|
from .serializer import *
|
||||||
|
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class Timer:
|
class Timer:
|
||||||
start: int
|
start: int
|
||||||
|
|
||||||
@@ -39,3 +43,33 @@ def run_time(func: Callable, *args, **kwargs):
|
|||||||
_ = [func(*args, **kwargs) for _ in range(iter)]
|
_ = [func(*args, **kwargs) for _ in range(iter)]
|
||||||
ms = (time.time_ns() - start) / 1e6
|
ms = (time.time_ns() - start) / 1e6
|
||||||
print(f'RT {name:30} {ms:6.1f} ms')
|
print(f'RT {name:30} {ms:6.1f} ms')
|
||||||
|
|
||||||
|
|
||||||
|
def safe(func: Callable, on_error: Callable[[Exception], Any] = None) -> Callable:
|
||||||
|
"""
|
||||||
|
Wrapper for safely executing a function and returning the result of on_error if an exception occurs
|
||||||
|
|
||||||
|
If on_error is None, it will return None on error
|
||||||
|
|
||||||
|
Example Usage:
|
||||||
|
>>> safe(lambda x: 1 / x)(0)
|
||||||
|
None
|
||||||
|
>>> safe(lambda x: 1 / x)(2)
|
||||||
|
0.5
|
||||||
|
|
||||||
|
:param func: Function that needs safe execution
|
||||||
|
:param on_error: Function to execute when an error occurs
|
||||||
|
:return: Wrapped function
|
||||||
|
"""
|
||||||
|
def wrapper(*args, **kwargs):
|
||||||
|
try:
|
||||||
|
return func(*args, **kwargs)
|
||||||
|
except Exception as e:
|
||||||
|
if on_error:
|
||||||
|
return on_error(e)
|
||||||
|
else:
|
||||||
|
log.exception(e)
|
||||||
|
return None
|
||||||
|
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
|||||||
@@ -1,9 +1,5 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
print('🐱')
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
print('🐱')
|
||||||
|
|||||||
@@ -0,0 +1,82 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="html5">
|
||||||
|
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>BadBlocks Scan Result for /dev/sda</title>
|
||||||
|
<script src="https://unpkg.com/petite-vue"></script>
|
||||||
|
<script src="https://cdn.tailwindcss.com"></script>
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body v-scope @vue:mounted="mounted" class="p-4 relative flex flex-col gap-3">
|
||||||
|
<div>
|
||||||
|
<h1 class="text-2xl font-bold mb-4">BadBlocks Scan Result for /dev/sda</h1>
|
||||||
|
<p>Scan started on {{ first.timestamp }} and ended on {{ last?.timestamp }}.</p>
|
||||||
|
<p>
|
||||||
|
Total blocks: {{ last.end_block }} blocks |
|
||||||
|
Block size: {{ d.block_size }} |
|
||||||
|
Total size: <span class="text-red-500">{{ (last.end_block * d.block_size / 1_000_000_000_000).toFixed(2) }} TB</span>
|
||||||
|
<span class="text-gray-400">= {{ (last.end_block * d.block_size / 1024 / 1024 / 1024 / 1024).toFixed(2) }} TiB</span></p>
|
||||||
|
<p><span class="text-red-500">Red</span> blocks indicate bad blocks or blocks that take too long (8x normal time) to scan. Hover over a block to see more information.</p>
|
||||||
|
<p>Made with ♥ by <a href="https://github.com/hykilpikonna" class="text-red-500 underline">Azalea</a> | GitHub @ <a href="https://github.com/hykilpikonna/HyPyUtils" class="text-red-500 underline">hykilpikonna/HyPyUtils</a></p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="flex flex-wrap gap-0.5">
|
||||||
|
<div v-for="(log, index) in d.logs" :key="index"
|
||||||
|
class="inline-block w-2 h-2"
|
||||||
|
:style="{backgroundColor: getBlockColor(log)}"
|
||||||
|
@mouseenter="showHoverInfo($event, log, index)" @mouseleave="hideHoverInfo"></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Tooltip for showing hover information -->
|
||||||
|
<div v-if="hover"
|
||||||
|
:style="{top: hover?.y + 'px', left: hover?.x + 'px'}"
|
||||||
|
class="absolute bg-gray-800 text-white text-sm rounded px-2 py-1 shadow-md pointer-events-none transition-opacity duration-150">
|
||||||
|
<p>Start: {{ hover?.l?.start_block?.toString(16) }}</p>
|
||||||
|
<p>End: {{ hover?.l?.end_block?.toString(16) }}</p>
|
||||||
|
<p>Duration: {{ hover?.l?.duration?.toFixed(2) }}</p>
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
PetiteVue.createApp({
|
||||||
|
d: { logs: [] }, // timestamp, duration, start_block, end_block, bad_blocks
|
||||||
|
max_dur: 0, min_dur: 0, hover: null, firs: null, last: null,
|
||||||
|
onInit() {
|
||||||
|
// Extract all durations and sort them
|
||||||
|
const durations = this.d.logs.map(l => l.duration).sort((a, b) => a - b);
|
||||||
|
|
||||||
|
// Compute Q1 index; you can choose to do an interpolation if you need higher accuracy
|
||||||
|
const q1Index = Math.floor(durations.length * 0.25);
|
||||||
|
|
||||||
|
// Use Q1 as our new "min"
|
||||||
|
this.min_dur = durations[q1Index];
|
||||||
|
this.max_dur = this.min_dur * 8;
|
||||||
|
|
||||||
|
console.log(`Q1 duration: ${this.min_dur}, Max duration: ${this.max_dur}`);
|
||||||
|
|
||||||
|
this.first = this.d.logs[0];
|
||||||
|
this.last = this.d.logs[this.d.logs.length - 1];
|
||||||
|
},
|
||||||
|
mounted() {
|
||||||
|
if (this.d.logs.length) return this.onInit() // For injecting data from server-side
|
||||||
|
fetch('http://localhost:8080/badblocks_log__dev_sda.json').then(resp => resp.json())
|
||||||
|
.then(data => { this.d = data; this.onInit() })
|
||||||
|
},
|
||||||
|
getBlockColor(log) {
|
||||||
|
if (log.bad_blocks.length) return 'red'
|
||||||
|
const ratio = 1 - ((log.duration - this.min_dur) / (this.max_dur - this.min_dur))
|
||||||
|
return `rgb(${Math.round(255 * (1 - ratio))}, ${Math.round(255 * ratio)}, 0)`
|
||||||
|
},
|
||||||
|
showHoverInfo(event, log, index) {
|
||||||
|
const rect = event.target.getBoundingClientRect();
|
||||||
|
this.hover = { l: log,
|
||||||
|
x: rect.left + window.scrollX + 10,
|
||||||
|
y: rect.top + window.scrollY - 30
|
||||||
|
}
|
||||||
|
},
|
||||||
|
hideHoverInfo() { this.hover = null }
|
||||||
|
}).mount()
|
||||||
|
</script>
|
||||||
|
</html>
|
||||||
@@ -0,0 +1,162 @@
|
|||||||
|
import argparse
|
||||||
|
import datetime
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import platform
|
||||||
|
from shutil import which
|
||||||
|
import signal
|
||||||
|
import subprocess
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from hypy_utils import color
|
||||||
|
from hypy_utils.logging_utils import setup_logger
|
||||||
|
|
||||||
|
log = setup_logger()
|
||||||
|
speeds = []
|
||||||
|
|
||||||
|
|
||||||
|
def signal_handler(sig, frame):
|
||||||
|
global pending_stop
|
||||||
|
pending_stop = True
|
||||||
|
log.error("^C received, signaling for the main process to stop...")
|
||||||
|
log.warning("Please wait for the current block to finish scanning, then the program will exit.")
|
||||||
|
log.warning("If you want to stop immediately, press ^\\ (NOT RECOMMENDED)")
|
||||||
|
|
||||||
|
|
||||||
|
pending_stop = False
|
||||||
|
signal.signal(signal.SIGINT, signal_handler)
|
||||||
|
|
||||||
|
|
||||||
|
def to_gb(block: int):
|
||||||
|
return block * BLOCK_SIZE / (1024 * 1024 * 1024)
|
||||||
|
|
||||||
|
|
||||||
|
def disk_info() -> tuple[int, int]:
|
||||||
|
# Get the disk size in blocks
|
||||||
|
disk_size = int(subprocess.run(f"blockdev --getsize64 {DISK}", capture_output=True, text=True, shell=True).stdout) // BLOCK_SIZE
|
||||||
|
log.info(f"Disk size: {to_gb(disk_size):,.0f} GB, {disk_size:#x} blocks")
|
||||||
|
|
||||||
|
# Get the size of a logical sector (LDA)
|
||||||
|
lss = int(subprocess.run(f"blockdev --getss {DISK}", capture_output=True, text=True, shell=True).stdout)
|
||||||
|
pss = int(subprocess.run(f"blockdev --getpbsz {DISK}", capture_output=True, text=True, shell=True).stdout)
|
||||||
|
log.info(f"Logical sector size: {lss} bytes, physical sector size: {pss} bytes")
|
||||||
|
|
||||||
|
return disk_size, lss
|
||||||
|
|
||||||
|
|
||||||
|
def run_badblocks(start_block: int, end_block: int):
|
||||||
|
# Print block address in hex
|
||||||
|
log.debug(f"Scanning from {start_block:#x} ({to_gb(start_block):,.0f} GB) to {end_block:#x} ({to_gb(end_block):,.0f} GB)")
|
||||||
|
|
||||||
|
command = f"badblocks -b 4096 -v {DISK} {end_block} {start_block}"
|
||||||
|
duration = time.time()
|
||||||
|
result = subprocess.run(command, capture_output=True, text=True, shell=True, start_new_session=True)
|
||||||
|
duration = time.time() - duration
|
||||||
|
|
||||||
|
# stdout should be a list of bad blocks, parse it
|
||||||
|
bad_blocks = [int(r) for r in result.stdout.strip().split("\n") if r]
|
||||||
|
|
||||||
|
# Write the log as json
|
||||||
|
logf = json.loads(LOG_FILE.read_text())
|
||||||
|
logf["logs"].append({
|
||||||
|
"timestamp": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||||
|
"duration": duration,
|
||||||
|
"start_block": start_block,
|
||||||
|
"end_block": end_block,
|
||||||
|
"bad_blocks": bad_blocks,
|
||||||
|
"stderr": result.stderr,
|
||||||
|
})
|
||||||
|
LOG_FILE.write_text(json.dumps(logf, indent=2))
|
||||||
|
|
||||||
|
# Print logs
|
||||||
|
if bad_blocks:
|
||||||
|
log.error(f"> Bad blocks found: ")
|
||||||
|
for block in bad_blocks:
|
||||||
|
# Pint in hex
|
||||||
|
log.error(f"> {block:#x} = LDA {block * BLOCK_SIZE // lss:#x} = {block * BLOCK_SIZE / (1024 * 1024 * 1024):,.0f} GB")
|
||||||
|
else:
|
||||||
|
log.debug(color(f"> Clean!"))
|
||||||
|
|
||||||
|
# Print summary (speed, progress, eta, etc.)
|
||||||
|
# The stored speed is in blocks per second
|
||||||
|
speed = (end_block - start_block) / duration
|
||||||
|
speeds.append(speed)
|
||||||
|
avg_spd = sum(speeds) / len(speeds)
|
||||||
|
progress = end_block / disk_size
|
||||||
|
|
||||||
|
# Calculate ETA
|
||||||
|
eta = (disk_size - end_block) / avg_spd
|
||||||
|
eta = str(datetime.timedelta(seconds=eta))[:-7]
|
||||||
|
|
||||||
|
# Convert speed to MB/s
|
||||||
|
speed *= BLOCK_SIZE / (1024 * 1024)
|
||||||
|
avg_spd *= BLOCK_SIZE / (1024 * 1024)
|
||||||
|
|
||||||
|
log.info(f"> {progress * 100:.2f}% | Cur {speed:.1f} MB/s | Remain {eta} | "
|
||||||
|
f"Avg {avg_spd:.1f} MB/s")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Take in disk and block size as optional arguments
|
||||||
|
parser = argparse.ArgumentParser("Bad block detection utility")
|
||||||
|
parser.add_argument("command", type=str, help="Command to run", choices=["scan", "plot"])
|
||||||
|
parser.add_argument("--disk", "-d", type=str, help="Disk to scan")
|
||||||
|
parser.add_argument("--block-size", "-b", type=int, default=4096, help="Block size in bytes")
|
||||||
|
parser.add_argument("--start", "-s", type=int, help="Start block")
|
||||||
|
parser.add_argument("--end", "-e", type=int, help="End block")
|
||||||
|
parser.add_argument("--rescan", action="store_true", help="Rescan the whole disk")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
DISK = args.disk
|
||||||
|
BLOCK_SIZE = args.block_size
|
||||||
|
START = args.start
|
||||||
|
END = args.end
|
||||||
|
|
||||||
|
try:
|
||||||
|
assert platform.system() != "Windows", "Windows is not supported, go use DiskGenius or something"
|
||||||
|
assert which("badblocks"), "badblocks command not found, please install e2fsprogs"
|
||||||
|
assert which("blockdev"), "blockdev command not found, please install util-linux"
|
||||||
|
assert DISK and Path(DISK).exists(), f"Disk {DISK} does not exist"
|
||||||
|
assert BLOCK_SIZE % 512 == 0, "Block size must be a multiple of 512"
|
||||||
|
assert os.geteuid() == 0, "You need to run as root to access the disk"
|
||||||
|
except AssertionError as e:
|
||||||
|
log.error(e.args[0])
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
LOG_FILE = Path(__file__).parent / f"badblocks_log_{DISK.replace('/', '_')}.json"
|
||||||
|
|
||||||
|
if not LOG_FILE.exists():
|
||||||
|
LOG_FILE.write_text(json.dumps({"logs": [], "block_size": BLOCK_SIZE}, indent=2))
|
||||||
|
elif not args.rescan:
|
||||||
|
# Check if the block size matches
|
||||||
|
block_size = json.loads(LOG_FILE.read_text())["block_size"]
|
||||||
|
if block_size != BLOCK_SIZE:
|
||||||
|
raise ValueError(f"Block size mismatch: {block_size} != {BLOCK_SIZE}")
|
||||||
|
|
||||||
|
# Resume from the last run
|
||||||
|
logs = json.loads(LOG_FILE.read_text())["logs"]
|
||||||
|
if logs:
|
||||||
|
last_log = logs[-1]
|
||||||
|
START = last_log["end_block"]
|
||||||
|
log.info(f"Resuming from {START:#x}")
|
||||||
|
|
||||||
|
gb_approx = 1024 * 1024 * 1024 // BLOCK_SIZE
|
||||||
|
disk_size, lss = disk_info()
|
||||||
|
|
||||||
|
if args.command == "scan":
|
||||||
|
for start in range(START or 0, END or disk_size, gb_approx):
|
||||||
|
end = min(start + gb_approx, disk_size)
|
||||||
|
run_badblocks(start, end)
|
||||||
|
if pending_stop:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Plot
|
||||||
|
ouf = Path(f"badblocks{DISK.replace('/', '_')}.html")
|
||||||
|
html = ((Path(__file__).parent / 'badblocks.html').read_text()
|
||||||
|
.replace("d: { logs: [] }", f"d: {LOG_FILE.read_text()}")
|
||||||
|
.replace("/dev/sda", DISK)
|
||||||
|
)
|
||||||
|
ouf.write_text(html)
|
||||||
|
log.info(f"Results saved to {ouf}.")
|
||||||
|
log.warning(f"You can open the html {ouf.absolute().as_uri()} in your browser. I can't open it for you because this script is running in sudo.")
|
||||||
@@ -0,0 +1,102 @@
|
|||||||
|
from hypy_utils import infer
|
||||||
|
|
||||||
|
|
||||||
|
def is_non_empty(o):
|
||||||
|
return not hasattr(o, '__len__') or len(o) > 0
|
||||||
|
|
||||||
|
|
||||||
|
def remove_values(d: dict | list, vals: list, preserve_list: bool = False) -> dict | list:
|
||||||
|
"""
|
||||||
|
Recursively remove values from a dict
|
||||||
|
|
||||||
|
:param d: Dict
|
||||||
|
:param vals: Values to remove
|
||||||
|
:param preserve_list: Whether to ignore list elements
|
||||||
|
:return: Dict without specific values
|
||||||
|
"""
|
||||||
|
if isinstance(d, list):
|
||||||
|
d = [remove_values(i, vals, preserve_list) for i in d if preserve_list or i not in vals]
|
||||||
|
d = [i for i in d if is_non_empty(i)]
|
||||||
|
return d
|
||||||
|
|
||||||
|
if isinstance(d, dict):
|
||||||
|
d = {k: remove_values(v, vals, preserve_list) for k, v in d.items() if v not in vals}
|
||||||
|
d = {k: v for k, v in d.items() if is_non_empty(v)}
|
||||||
|
return d
|
||||||
|
|
||||||
|
return d
|
||||||
|
|
||||||
|
|
||||||
|
def remove_nones(d: dict | list, preserve_list: bool = False) -> dict:
|
||||||
|
"""
|
||||||
|
Recursively remove nones from a dict
|
||||||
|
|
||||||
|
>>> remove_nones({'a': {'b': None, 'c': 1}, 'b': [None, {'a': None}], 'c': {'a': None}, 'd': [None, 1]})
|
||||||
|
{'a': {'c': 1}, 'd': [1]}
|
||||||
|
|
||||||
|
:param d: Dict
|
||||||
|
:param preserve_list: Whether to ignore list elements
|
||||||
|
:return: Dict without nones
|
||||||
|
"""
|
||||||
|
return remove_values(d, [None], preserve_list=preserve_list)
|
||||||
|
|
||||||
|
|
||||||
|
def remove_keys(d: dict | list, keys: set) -> dict | list:
|
||||||
|
"""
|
||||||
|
Recursively remove keys
|
||||||
|
|
||||||
|
>>> remove_keys({'a': {'b': None, 'c': 1}, 'b': [None, {'a': None}], 'c': {'a': None}, 'd': [None, 1]}, {'b'})
|
||||||
|
{'a': {'c': 1}, 'c': {'a': None}, 'd': [None, 1]}
|
||||||
|
|
||||||
|
:param d: The dictionary that you want to remove keys from
|
||||||
|
:param keys: Set of keys you want to remove
|
||||||
|
:return: Dict without specific keys
|
||||||
|
"""
|
||||||
|
if isinstance(d, list):
|
||||||
|
d = [remove_keys(i, keys) for i in d]
|
||||||
|
d = [i for i in d if is_non_empty(i)]
|
||||||
|
return d
|
||||||
|
|
||||||
|
if isinstance(d, dict):
|
||||||
|
d = {k: remove_keys(v, keys) for k, v in d.items() if k not in keys}
|
||||||
|
d = {k: v for k, v in d.items() if is_non_empty(v)}
|
||||||
|
return d
|
||||||
|
|
||||||
|
return d
|
||||||
|
|
||||||
|
|
||||||
|
def deep_dict(o: object, exclude: set | None):
|
||||||
|
"""
|
||||||
|
Recursively convert an object into a dictionary
|
||||||
|
|
||||||
|
:param o: Object
|
||||||
|
:param exclude: Keys to exclude
|
||||||
|
:return: Deep dictionary of the object's variables
|
||||||
|
"""
|
||||||
|
exclude = exclude or {}
|
||||||
|
infer_result = infer(o)
|
||||||
|
if infer_result:
|
||||||
|
return infer_result
|
||||||
|
if hasattr(o, '__dict__'):
|
||||||
|
return deep_dict(dict(vars(o)), exclude)
|
||||||
|
if isinstance(o, dict):
|
||||||
|
return {k: deep_dict(v, exclude) for k, v in o.items() if k not in exclude}
|
||||||
|
if isinstance(o, list):
|
||||||
|
return [deep_dict(v, exclude) for v in o]
|
||||||
|
return o
|
||||||
|
|
||||||
|
|
||||||
|
def get_rec(cd: dict, key: str):
|
||||||
|
"""
|
||||||
|
:param cd: Dictionary
|
||||||
|
:param key: Recursive key in the format of keya.keyb.keyc...
|
||||||
|
"""
|
||||||
|
if '.' not in key:
|
||||||
|
return cd.get(key)
|
||||||
|
|
||||||
|
ks = key.split('.')
|
||||||
|
while len(ks) > 0:
|
||||||
|
cd = cd.get(ks.pop(0))
|
||||||
|
if cd is None:
|
||||||
|
break
|
||||||
|
return cd
|
||||||
@@ -0,0 +1,47 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import requests
|
||||||
|
import tqdm
|
||||||
|
|
||||||
|
|
||||||
|
def download_file(url: str, file: str | Path, progress: bool = True):
|
||||||
|
"""
|
||||||
|
Helper method handling downloading large files from `url` to `filename`.
|
||||||
|
Returns a pointer to `filename`.
|
||||||
|
https://stackoverflow.com/a/42071418/7346633
|
||||||
|
"""
|
||||||
|
file = Path(file)
|
||||||
|
if file.is_file():
|
||||||
|
return file
|
||||||
|
|
||||||
|
chunk_size = 1024
|
||||||
|
|
||||||
|
try:
|
||||||
|
term_len = os.get_terminal_size().columns
|
||||||
|
bar_len = int(term_len * 0.4)
|
||||||
|
except Exception:
|
||||||
|
term_len = 60
|
||||||
|
bar_len = 20
|
||||||
|
|
||||||
|
tqdm_args = dict()
|
||||||
|
r = requests.get(url, stream=True)
|
||||||
|
if 'content-length' in r.headers:
|
||||||
|
tqdm_args['total'] = int(r.headers['content-length']) / 1024 / 1024
|
||||||
|
|
||||||
|
with open(file, 'wb') as f:
|
||||||
|
pbar = None
|
||||||
|
if progress:
|
||||||
|
pbar = tqdm.tqdm(unit=" MB", ncols=term_len,
|
||||||
|
bar_format='{desc} {rate_noinv_fmt} {remaining} [{bar}] {percentage:.0f}%', ascii=' #',
|
||||||
|
desc=file.name[:bar_len].ljust(bar_len), **tqdm_args)
|
||||||
|
|
||||||
|
for chunk in r.iter_content(chunk_size=chunk_size):
|
||||||
|
if chunk:
|
||||||
|
if pbar:
|
||||||
|
pbar.update(len(chunk) / 1024 / 1024)
|
||||||
|
f.write(chunk)
|
||||||
|
|
||||||
|
return file
|
||||||
@@ -0,0 +1,37 @@
|
|||||||
|
import base64
|
||||||
|
|
||||||
|
FILENAME_BLACKLIST = [
|
||||||
|
# Unix and Windows
|
||||||
|
"/",
|
||||||
|
|
||||||
|
# Windows only
|
||||||
|
"<", ">", ":", '"', "\\", "|", "?", "*", "\0",
|
||||||
|
"CON", "PRN", "AUX", "NUL",
|
||||||
|
"COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9",
|
||||||
|
"LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9",
|
||||||
|
|
||||||
|
# Just for extra safety
|
||||||
|
"~"
|
||||||
|
]
|
||||||
|
|
||||||
|
FILENAME_REPLACE = {c: f"%{base64.b64encode(c.encode()).decode().replace('=', '')}" for c in FILENAME_BLACKLIST}
|
||||||
|
|
||||||
|
|
||||||
|
def escape_filename(fn: str) -> str:
|
||||||
|
fn = fn.replace("%", "[ PeRcEnT EsCaPe owo ]")
|
||||||
|
|
||||||
|
for c, r in FILENAME_REPLACE.items():
|
||||||
|
fn = fn.replace(c, r)
|
||||||
|
|
||||||
|
fn = fn.replace("[ PeRcEnT EsCaPe owo ]", "%%")
|
||||||
|
return fn
|
||||||
|
|
||||||
|
|
||||||
|
def unescape_filename(fn: str) -> str:
|
||||||
|
fn = fn.replace("%%", "[ PeRcEnT EsCaPe owo ]")
|
||||||
|
|
||||||
|
for c, r in FILENAME_REPLACE.items():
|
||||||
|
fn = fn.replace(r, c)
|
||||||
|
|
||||||
|
fn = fn.replace("[ PeRcEnT EsCaPe owo ]", "%")
|
||||||
|
return fn
|
||||||
@@ -0,0 +1,45 @@
|
|||||||
|
import datetime
|
||||||
|
import shlex
|
||||||
|
from pathlib import Path
|
||||||
|
from subprocess import check_output
|
||||||
|
from typing import NamedTuple
|
||||||
|
|
||||||
|
import dateutil.parser
|
||||||
|
|
||||||
|
|
||||||
|
class ExtractedCommit(NamedTuple):
|
||||||
|
sha: str
|
||||||
|
author: str
|
||||||
|
email: str
|
||||||
|
time: str
|
||||||
|
message: str
|
||||||
|
file_names: list[str]
|
||||||
|
|
||||||
|
def get_time(self) -> datetime:
|
||||||
|
return dateutil.parser.isoparse(self.time)
|
||||||
|
|
||||||
|
|
||||||
|
def git_log(path: Path, fail_silently: bool = False) -> list[ExtractedCommit]:
|
||||||
|
"""
|
||||||
|
Call and parse git log. This function requires that git>=2.37.1 is installed on your system.
|
||||||
|
|
||||||
|
:param path: Path of git repository
|
||||||
|
:param fail_silently: If true, ignore errors. If false, raise exception when errors occur.
|
||||||
|
:return: List of commits
|
||||||
|
"""
|
||||||
|
# check_call(shlex.split('git config diff.renames 0'))
|
||||||
|
cmd = f"git -c 'diff.renamelimit=0' -c 'diff.renames=0' -C '{path.absolute()}' log --name-status --diff-filter=AMD --pretty=format:'START_COMMIT_QwQ %H%n%aN%n%aE%n%aI%n%s%n'"
|
||||||
|
log = check_output(shlex.split(cmd)).decode('utf-8', 'ignore')
|
||||||
|
|
||||||
|
def extract_commit(block: str) -> ExtractedCommit:
|
||||||
|
try:
|
||||||
|
lines = block.split('\n')
|
||||||
|
sha, author, email, date, message = lines + [""] if len(lines) == 4 else lines[:5]
|
||||||
|
files = [f.replace('\t', '/') for f in lines[6:]]
|
||||||
|
return ExtractedCommit(sha, author, email, date, message, files)
|
||||||
|
except Exception as e:
|
||||||
|
print(f'========== Commit Extract Error {e} ==========\n{block}\n==========')
|
||||||
|
if not fail_silently:
|
||||||
|
raise e
|
||||||
|
|
||||||
|
return [extract_commit(c.strip()) for c in log.split('START_COMMIT_QwQ') if c]
|
||||||
@@ -0,0 +1,24 @@
|
|||||||
|
import logging
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
def setup_logger(debug: bool = os.environ.get("DEBUG", False)):
|
||||||
|
# Try to use rich for pretty printing
|
||||||
|
try:
|
||||||
|
from rich.logging import RichHandler
|
||||||
|
handler = RichHandler(rich_tracebacks=True)
|
||||||
|
|
||||||
|
from rich.traceback import install
|
||||||
|
install(show_locals=True)
|
||||||
|
except ImportError:
|
||||||
|
handler = logging.StreamHandler()
|
||||||
|
|
||||||
|
# Initialize debug logger
|
||||||
|
logging.basicConfig(
|
||||||
|
level="NOTSET" if debug else "INFO",
|
||||||
|
format="%(message)s",
|
||||||
|
datefmt="[%X]",
|
||||||
|
handlers=[handler]
|
||||||
|
)
|
||||||
|
|
||||||
|
return logging.getLogger("a2")
|
||||||
@@ -0,0 +1,27 @@
|
|||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
def setup_proxy(session: requests.Session, addr: str = 'socks5://localhost:9050', verbose: bool = True):
|
||||||
|
url = 'https://ip.me'
|
||||||
|
|
||||||
|
# Setup proxy
|
||||||
|
ip = session.get(url).text.strip()
|
||||||
|
session.proxies = {
|
||||||
|
'http': addr,
|
||||||
|
'https': addr
|
||||||
|
}
|
||||||
|
proxy_ip = session.get(url).text.strip()
|
||||||
|
|
||||||
|
# Print ip
|
||||||
|
if verbose:
|
||||||
|
print(f'Raw ip: {ip}')
|
||||||
|
print(f'Proxy ip: {proxy_ip}')
|
||||||
|
|
||||||
|
# ips shouldn't match
|
||||||
|
assert ip != proxy_ip, 'Proxy did not start correctly.'
|
||||||
|
|
||||||
|
# Disable default requests behavior
|
||||||
|
def warn(*args, **kwargs):
|
||||||
|
raise ReferenceError('Use session.get instead of requests.get')
|
||||||
|
requests.get = warn
|
||||||
|
requests.post = warn
|
||||||
+67
-21
@@ -1,11 +1,14 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import base64
|
||||||
import dataclasses
|
import dataclasses
|
||||||
import datetime
|
import datetime
|
||||||
import hashlib
|
import hashlib
|
||||||
|
import inspect
|
||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
import pickle
|
import pickle
|
||||||
|
from enum import Enum
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from types import SimpleNamespace
|
from types import SimpleNamespace
|
||||||
from typing import Any
|
from typing import Any
|
||||||
@@ -36,36 +39,70 @@ def pickle_decode(by: bytes) -> Any:
|
|||||||
return pickle.load(bio)
|
return pickle.load(bio)
|
||||||
|
|
||||||
|
|
||||||
|
def infer(o: object) -> object | None:
|
||||||
|
# Support encoding dataclasses
|
||||||
|
# https://stackoverflow.com/a/51286749/7346633
|
||||||
|
if dataclasses.is_dataclass(o):
|
||||||
|
return dataclasses.asdict(o)
|
||||||
|
|
||||||
|
# Simple namespace
|
||||||
|
if isinstance(o, SimpleNamespace):
|
||||||
|
return o.__dict__
|
||||||
|
|
||||||
|
# Support encoding datetime
|
||||||
|
if isinstance(o, (datetime.datetime, datetime.date)):
|
||||||
|
return o.isoformat()
|
||||||
|
|
||||||
|
# Support for sets
|
||||||
|
# https://stackoverflow.com/a/8230505/7346633
|
||||||
|
if isinstance(o, set):
|
||||||
|
return list(o)
|
||||||
|
|
||||||
|
# Support for Path
|
||||||
|
if isinstance(o, Path):
|
||||||
|
return str(o)
|
||||||
|
|
||||||
|
# Support for byte arrays (encode as base64 string)
|
||||||
|
if isinstance(o, bytes):
|
||||||
|
return base64.b64encode(o).decode()
|
||||||
|
|
||||||
|
# Enums
|
||||||
|
if isinstance(o, Enum):
|
||||||
|
return o.name
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
class EnhancedJSONEncoder(json.JSONEncoder):
|
class EnhancedJSONEncoder(json.JSONEncoder):
|
||||||
"""
|
"""
|
||||||
An improvement to the json.JSONEncoder class, which supports:
|
An improvement to the json.JSONEncoder class, which supports:
|
||||||
encoding for dataclasses, encoding for datetime, and sets
|
encoding for dataclasses, encoding for datetime, and sets
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def default(self, o: object) -> object:
|
def default(self, o: object) -> object:
|
||||||
|
return infer(o) or super().default(o)
|
||||||
|
|
||||||
# Support encoding dataclasses
|
|
||||||
# https://stackoverflow.com/a/51286749/7346633
|
|
||||||
if dataclasses.is_dataclass(o):
|
|
||||||
return dataclasses.asdict(o)
|
|
||||||
|
|
||||||
# Simple namespace
|
class ForceJSONEcoder(EnhancedJSONEncoder):
|
||||||
if isinstance(o, SimpleNamespace):
|
"""
|
||||||
return o.__dict__
|
A json encoder that can serialize almost everything (including custom classes, byte arrays)
|
||||||
|
"""
|
||||||
|
def default(self, o: object) -> object:
|
||||||
|
infer_result = infer(o)
|
||||||
|
if infer_result:
|
||||||
|
return infer_result
|
||||||
|
|
||||||
# Support encoding datetime
|
# # Support EnumType
|
||||||
if isinstance(o, (datetime.datetime, datetime.date)):
|
# if isinstance(o, EnumType):
|
||||||
return o.isoformat()
|
# return {i.name: i.value for i in o}
|
||||||
|
|
||||||
# Support for sets
|
# Support for custom classes (get dict values)
|
||||||
# https://stackoverflow.com/a/8230505/7346633
|
if hasattr(o, '__dict__') and not inspect.isclass(o):
|
||||||
if isinstance(o, set):
|
return dict(vars(o))
|
||||||
return list(o)
|
|
||||||
|
|
||||||
return super().default(o)
|
return super().default(o)
|
||||||
|
|
||||||
|
|
||||||
def json_stringify(obj: object, **kwargs) -> str:
|
def json_stringify(obj: object, forced: bool = True, **kwargs) -> str:
|
||||||
"""
|
"""
|
||||||
Serialize json string with support for dataclasses and datetime and sets and with custom
|
Serialize json string with support for dataclasses and datetime and sets and with custom
|
||||||
configuration.
|
configuration.
|
||||||
@@ -74,15 +111,24 @@ def json_stringify(obj: object, **kwargs) -> str:
|
|||||||
- obj != None
|
- obj != None
|
||||||
|
|
||||||
:param obj: Objects
|
:param obj: Objects
|
||||||
|
:param forced: Whether to force the conversion of classes and byte arrays
|
||||||
:return: Json strings
|
:return: Json strings
|
||||||
"""
|
"""
|
||||||
args = dict(ensure_ascii=False, cls=EnhancedJSONEncoder)
|
args = dict(ensure_ascii=False, cls=ForceJSONEcoder if forced else EnhancedJSONEncoder)
|
||||||
args.update(kwargs)
|
args.update(kwargs)
|
||||||
return json.dumps(obj, **args)
|
return json.dumps(obj, **args)
|
||||||
|
|
||||||
|
|
||||||
def jsn(s: str) -> SimpleNamespace:
|
class SafeNamespace(SimpleNamespace):
|
||||||
return json.loads(s, object_hook=lambda d: SimpleNamespace(**d))
|
def __getattr__(self, attr):
|
||||||
|
try:
|
||||||
|
return super().__getattr__(attr)
|
||||||
|
except AttributeError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def jsn(s: str) -> SafeNamespace:
|
||||||
|
return json.loads(s, object_hook=lambda d: SafeNamespace(**d))
|
||||||
|
|
||||||
|
|
||||||
def ensure_dir(path: Path | str) -> Path:
|
def ensure_dir(path: Path | str) -> Path:
|
||||||
@@ -131,8 +177,8 @@ def read(file: Path | str) -> str:
|
|||||||
return Path(file).read_text('utf-8')
|
return Path(file).read_text('utf-8')
|
||||||
|
|
||||||
|
|
||||||
def write_json(fp: Path | str, data: Any):
|
def write_json(fp: Path | str, data: Any, **kwargs):
|
||||||
write(fp, json_stringify(data))
|
write(fp, json_stringify(data, **kwargs))
|
||||||
|
|
||||||
|
|
||||||
def parse_date_time(iso: str) -> datetime.datetime:
|
def parse_date_time(iso: str) -> datetime.datetime:
|
||||||
|
|||||||
@@ -0,0 +1,51 @@
|
|||||||
|
import pickle
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import zstandard as zstd
|
||||||
|
import orjson
|
||||||
|
|
||||||
|
from . import write
|
||||||
|
|
||||||
|
zstd_d = zstd.ZstdDecompressor()
|
||||||
|
zstd_c = zstd.ZstdCompressor(level=5, write_checksum=True, threads=-1)
|
||||||
|
|
||||||
|
|
||||||
|
def load_json_zst(file_path: str | Path) -> dict | list:
|
||||||
|
"""
|
||||||
|
Load a .json.zst file and return its parsed content.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
file_path (str): The path to the .json.zst file.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict or list: The parsed JSON content.
|
||||||
|
"""
|
||||||
|
with Path(file_path).open('rb') as f:
|
||||||
|
return orjson.loads(zstd_d.stream_reader(f).read())
|
||||||
|
|
||||||
|
|
||||||
|
def write_json_zst(file_path: str | Path, data: dict | list, **kwargs):
|
||||||
|
"""
|
||||||
|
Dump data to a .json.zst file.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
file_path (str): The path to the .json.zst file.
|
||||||
|
data (dict or list): The data to dump.
|
||||||
|
"""
|
||||||
|
write(file_path, zstd_c.compress(orjson.dumps(data, **kwargs)))
|
||||||
|
|
||||||
|
|
||||||
|
def load_pickle_zst(file_path: str | Path):
|
||||||
|
with Path(file_path).open('rb') as f:
|
||||||
|
return pickle.loads(zstd_d.stream_reader(f).read())
|
||||||
|
|
||||||
|
|
||||||
|
def write_pickle_zst(file_path: str | Path, data):
|
||||||
|
write(file_path, zstd_c.compress(pickle.dumps(data)))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
write_pickle_zst('test.pickle.zst', {'a': 1, 'b': 2})
|
||||||
|
assert load_pickle_zst('test.pickle.zst') == {'a': 1, 'b': 2}
|
||||||
|
write_json_zst('test.json.zst', {'a': 1, 'b': 2})
|
||||||
|
assert load_json_zst('test.json.zst') == {'a': 1, 'b': 2}
|
||||||
@@ -22,17 +22,12 @@ setup(
|
|||||||
classifiers=[
|
classifiers=[
|
||||||
"License :: OSI Approved :: MIT License",
|
"License :: OSI Approved :: MIT License",
|
||||||
"Programming Language :: Python :: 3",
|
"Programming Language :: Python :: 3",
|
||||||
"Programming Language :: Python :: 3.7",
|
|
||||||
"Programming Language :: Python :: 3.8",
|
|
||||||
"Programming Language :: Python :: 3.9",
|
|
||||||
"Programming Language :: Python :: 3.10",
|
"Programming Language :: Python :: 3.10",
|
||||||
|
"Programming Language :: Python :: 3.11",
|
||||||
|
"Programming Language :: Python :: 3.12",
|
||||||
|
"Programming Language :: Python :: 3.13",
|
||||||
],
|
],
|
||||||
packages=find_packages(exclude=("tests",)),
|
packages=find_packages(exclude=("tests",)),
|
||||||
include_package_data=True,
|
include_package_data=True,
|
||||||
install_requires=[],
|
install_requires=[]
|
||||||
entry_points={
|
|
||||||
"console_scripts": [
|
|
||||||
"hypy_utils=hypy_utils.__main__:main",
|
|
||||||
]
|
|
||||||
},
|
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user