450 lines
15 KiB
Python
450 lines
15 KiB
Python
import argparse
|
|
import concurrent.futures
|
|
from collections import deque
|
|
from dataclasses import dataclass
|
|
from datetime import datetime
|
|
import html
|
|
import os
|
|
from pathlib import Path
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
import threading
|
|
import time
|
|
import tomllib
|
|
import urllib.parse
|
|
|
|
import requests
|
|
|
|
|
|
BASE_DIR = Path(__file__).resolve().parent
|
|
CONFIG_FILE = BASE_DIR / "config.toml"
|
|
IMDB_ID_RE = re.compile(r"\btt\d{7,}\b", re.IGNORECASE)
|
|
PROGRESS_RE = re.compile(r"Progress:\s*(?P<progress>[0-9.]+%)\s*\(State:\s*(?P<state>[^)]+)\)")
|
|
TELEGRAM_TEXT_LIMIT = 4096
|
|
|
|
|
|
class TelegramApiError(RuntimeError):
|
|
pass
|
|
|
|
|
|
class TelegramBotClient:
|
|
def __init__(self, token: str):
|
|
self.base_url = f"https://api.telegram.org/bot{token}"
|
|
self._local = threading.local()
|
|
|
|
def session(self) -> requests.Session:
|
|
if not hasattr(self._local, "session"):
|
|
self._local.session = requests.Session()
|
|
return self._local.session
|
|
|
|
def request(self, method: str, **params) -> dict:
|
|
response = self.session().post(f"{self.base_url}/{method}", json=params, timeout=45)
|
|
try:
|
|
payload = response.json()
|
|
except ValueError:
|
|
response.raise_for_status()
|
|
raise
|
|
if not payload.get("ok"):
|
|
description = payload.get("description", "unknown Telegram API error")
|
|
raise TelegramApiError(description)
|
|
response.raise_for_status()
|
|
return payload["result"]
|
|
|
|
def get_updates(self, offset: int | None, timeout: int = 30) -> list[dict]:
|
|
params = {
|
|
"timeout": timeout,
|
|
"allowed_updates": ["message"],
|
|
}
|
|
if offset is not None:
|
|
params["offset"] = offset
|
|
|
|
response = self.session().post(f"{self.base_url}/getUpdates", json=params, timeout=timeout + 10)
|
|
try:
|
|
payload = response.json()
|
|
except ValueError:
|
|
response.raise_for_status()
|
|
raise
|
|
if not payload.get("ok"):
|
|
description = payload.get("description", "unknown Telegram API error")
|
|
raise TelegramApiError(description)
|
|
response.raise_for_status()
|
|
return payload["result"]
|
|
|
|
def send_message(self, chat_id: int, text: str) -> dict:
|
|
return self.request(
|
|
"sendMessage",
|
|
chat_id=chat_id,
|
|
text=truncate_telegram_text(text),
|
|
disable_web_page_preview=True,
|
|
)
|
|
|
|
def edit_message(self, chat_id: int, message_id: int, text: str) -> dict:
|
|
return self.request(
|
|
"editMessageText",
|
|
chat_id=chat_id,
|
|
message_id=message_id,
|
|
text=truncate_telegram_text(text),
|
|
disable_web_page_preview=True,
|
|
)
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class BotConfig:
|
|
token: str
|
|
dl_dir: str
|
|
jellyfin_dir: str
|
|
imdb_source: str
|
|
ignore_existing: bool
|
|
allowed_chat_ids: frozenset[int]
|
|
workers: int
|
|
progress_interval: float
|
|
|
|
|
|
def load_config(config_file: Path = CONFIG_FILE) -> BotConfig:
|
|
config = tomllib.loads(config_file.read_text(encoding="utf-8"))
|
|
telegram_config = config.get("telegram", {})
|
|
|
|
token = telegram_config.get("bot_token") or os.getenv("TELEGRAM_BOT_TOKEN")
|
|
if not token:
|
|
raise ValueError("Missing Telegram bot token. Set [telegram].bot_token or TELEGRAM_BOT_TOKEN.")
|
|
|
|
paths = config["paths"]
|
|
allowed_chat_ids = frozenset(int(chat_id) for chat_id in telegram_config.get("allowed_chat_ids", []))
|
|
imdb_source = str(telegram_config.get("imdb_source", "imdbapi"))
|
|
if imdb_source not in {"imdbapi", "mteam"}:
|
|
raise ValueError("[telegram].imdb_source must be either 'imdbapi' or 'mteam'.")
|
|
|
|
return BotConfig(
|
|
token=token,
|
|
dl_dir=str(telegram_config.get("dl_dir") or paths["qb_download_dir"]),
|
|
jellyfin_dir=str(telegram_config.get("jellyfin_dir") or paths["jellyfin_dir"]),
|
|
imdb_source=imdb_source,
|
|
ignore_existing=bool(telegram_config.get("ignore_existing", False)),
|
|
allowed_chat_ids=allowed_chat_ids,
|
|
workers=max(1, int(telegram_config.get("workers", 2))),
|
|
progress_interval=max(2.0, float(telegram_config.get("progress_interval", 10.0))),
|
|
)
|
|
|
|
|
|
def extract_imdb_id(text: str) -> str | None:
|
|
current = text.strip()
|
|
seen = set()
|
|
|
|
for _ in range(6):
|
|
match = IMDB_ID_RE.search(current)
|
|
if match:
|
|
return match.group(0).lower()
|
|
|
|
seen.add(current)
|
|
decoded = urllib.parse.unquote_plus(html.unescape(current))
|
|
if decoded == current or decoded in seen:
|
|
break
|
|
current = decoded
|
|
|
|
return None
|
|
|
|
|
|
def truncate_telegram_text(text: str) -> str:
|
|
if len(text) <= TELEGRAM_TEXT_LIMIT:
|
|
return text
|
|
return text[: TELEGRAM_TEXT_LIMIT - 120] + "\n...\n[truncated]"
|
|
|
|
|
|
def tail_text(lines: deque[str], max_chars: int = 2200) -> str:
|
|
selected = []
|
|
total = 0
|
|
|
|
for line in reversed(lines):
|
|
line_len = len(line) + 1
|
|
if selected and total + line_len > max_chars:
|
|
break
|
|
selected.append(line)
|
|
total += line_len
|
|
|
|
return "\n".join(reversed(selected))
|
|
|
|
|
|
class ProgressReporter:
|
|
def __init__(self, bot: TelegramBotClient, chat_id: int, message_id: int, imdb_id: str, interval: float):
|
|
self.bot = bot
|
|
self.chat_id = chat_id
|
|
self.message_id = message_id
|
|
self.imdb_id = imdb_id
|
|
self.interval = interval
|
|
self.started_at = datetime.now()
|
|
self.last_edit_at = 0.0
|
|
self.last_text = ""
|
|
self.title = ""
|
|
self.step = "Starting"
|
|
self.progress = ""
|
|
self.state = ""
|
|
self.lines: deque[str] = deque(maxlen=14)
|
|
|
|
def observe(self, line: str) -> None:
|
|
clean_line = line.strip()
|
|
if not clean_line:
|
|
return
|
|
|
|
self.lines.append(clean_line)
|
|
|
|
if clean_line.startswith("==="):
|
|
self.step = clean_line.strip("= ")
|
|
elif clean_line.startswith("Found Title:"):
|
|
self.title = clean_line.removeprefix("Found Title:").strip()
|
|
elif progress_match := PROGRESS_RE.search(clean_line):
|
|
self.progress = progress_match.group("progress")
|
|
self.state = progress_match.group("state")
|
|
elif clean_line == "Download complete!":
|
|
self.progress = "100.0%"
|
|
self.state = "complete"
|
|
elif clean_line.startswith("Finished processing"):
|
|
self.step = clean_line
|
|
|
|
def status_text(self, status: str, log_path: Path | None = None, return_code: int | None = None) -> str:
|
|
elapsed = int((datetime.now() - self.started_at).total_seconds())
|
|
parts = [
|
|
f"{status}: {self.imdb_id}",
|
|
f"Step: {self.step}",
|
|
]
|
|
|
|
if self.title:
|
|
parts.append(f"Title: {self.title}")
|
|
if self.progress:
|
|
parts.append(f"Progress: {self.progress}" + (f" ({self.state})" if self.state else ""))
|
|
if return_code is not None:
|
|
parts.append(f"Exit code: {return_code}")
|
|
parts.append(f"Elapsed: {elapsed}s")
|
|
if log_path is not None:
|
|
parts.append(f"Log: {log_path}")
|
|
|
|
recent = tail_text(self.lines)
|
|
if recent:
|
|
parts.append(f"\nRecent output:\n{recent}")
|
|
|
|
return "\n".join(parts)
|
|
|
|
def flush(self, status: str = "Running", log_path: Path | None = None, return_code: int | None = None, force: bool = False) -> None:
|
|
now = time.monotonic()
|
|
if not force and now - self.last_edit_at < self.interval:
|
|
return
|
|
|
|
text = self.status_text(status=status, log_path=log_path, return_code=return_code)
|
|
if text == self.last_text:
|
|
return
|
|
|
|
try:
|
|
self.bot.edit_message(self.chat_id, self.message_id, text)
|
|
self.last_edit_at = now
|
|
self.last_text = text
|
|
except TelegramApiError as exc:
|
|
if "message is not modified" in str(exc).lower():
|
|
self.last_edit_at = now
|
|
self.last_text = text
|
|
return
|
|
try:
|
|
message = self.bot.send_message(self.chat_id, text)
|
|
self.message_id = int(message["message_id"])
|
|
self.last_edit_at = now
|
|
self.last_text = text
|
|
except Exception as send_exc:
|
|
print(f"Telegram progress fallback failed: {send_exc}")
|
|
except Exception as exc:
|
|
print(f"Telegram progress update failed: {exc}")
|
|
|
|
|
|
def workflow_command(imdb_id: str, config: BotConfig) -> list[str]:
|
|
cmd = [
|
|
sys.executable,
|
|
"-u",
|
|
str(BASE_DIR / "workflow.py"),
|
|
imdb_id,
|
|
"--dl-dir",
|
|
config.dl_dir,
|
|
"--jellyfin-dir",
|
|
config.jellyfin_dir,
|
|
"--imdb-source",
|
|
config.imdb_source,
|
|
]
|
|
if config.ignore_existing:
|
|
cmd.append("--ignore-existing")
|
|
return cmd
|
|
|
|
|
|
def run_download_job(bot: TelegramBotClient, chat_id: int, message_id: int, imdb_id: str, config: BotConfig, active_jobs: set[str], active_lock: threading.Lock) -> None:
|
|
logs_dir = BASE_DIR / "logs"
|
|
errors_dir = BASE_DIR / "errors"
|
|
logs_dir.mkdir(exist_ok=True)
|
|
errors_dir.mkdir(exist_ok=True)
|
|
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
log_path = logs_dir / f"telegram_{imdb_id}_{timestamp}.log"
|
|
reporter = ProgressReporter(bot, chat_id, message_id, imdb_id, config.progress_interval)
|
|
reporter.flush(status="Starting", log_path=log_path, force=True)
|
|
|
|
env = os.environ.copy()
|
|
env["PYTHONUNBUFFERED"] = "1"
|
|
|
|
return_code = 1
|
|
final_log_path = log_path
|
|
|
|
try:
|
|
with log_path.open("w", encoding="utf-8") as log_file:
|
|
process = subprocess.Popen(
|
|
workflow_command(imdb_id, config),
|
|
cwd=BASE_DIR,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.STDOUT,
|
|
text=True,
|
|
bufsize=1,
|
|
env=env,
|
|
)
|
|
|
|
assert process.stdout is not None
|
|
for line in process.stdout:
|
|
log_file.write(line)
|
|
log_file.flush()
|
|
reporter.observe(line)
|
|
reporter.flush(status="Running", log_path=log_path)
|
|
|
|
return_code = process.wait()
|
|
|
|
if return_code != 0:
|
|
final_log_path = errors_dir / log_path.name
|
|
log_path.rename(final_log_path)
|
|
reporter.flush(status="Failed", log_path=final_log_path, return_code=return_code, force=True)
|
|
else:
|
|
reporter.flush(status="Completed", log_path=final_log_path, return_code=return_code, force=True)
|
|
except Exception as exc:
|
|
reporter.lines.append(f"Bot adapter error: {exc}")
|
|
reporter.flush(status="Failed", log_path=final_log_path, return_code=return_code, force=True)
|
|
finally:
|
|
with active_lock:
|
|
active_jobs.discard(imdb_id)
|
|
|
|
|
|
def is_chat_allowed(chat_id: int, config: BotConfig) -> bool:
|
|
return not config.allowed_chat_ids or chat_id in config.allowed_chat_ids
|
|
|
|
|
|
def help_text() -> str:
|
|
return (
|
|
"Send /download tt0903747, or /download with any URL/text that contains one after URL decoding.\n"
|
|
"If group privacy mode is disabled, plain tt0903747 messages also work.\n"
|
|
"The bot will start the existing MTfin workflow and keep this chat updated with progress."
|
|
)
|
|
|
|
|
|
def handle_message(
|
|
bot: TelegramBotClient,
|
|
message: dict,
|
|
config: BotConfig,
|
|
executor: concurrent.futures.ThreadPoolExecutor,
|
|
active_jobs: set[str],
|
|
active_lock: threading.Lock,
|
|
) -> None:
|
|
chat = message.get("chat") or {}
|
|
chat_id = int(chat.get("id"))
|
|
text = (message.get("text") or message.get("caption") or "").strip()
|
|
command = text.split(maxsplit=1)[0].split("@", 1)[0].lower() if text.startswith("/") else ""
|
|
|
|
if command == "/chatid":
|
|
bot.send_message(chat_id, f"Chat ID: {chat_id}")
|
|
return
|
|
|
|
if not is_chat_allowed(chat_id, config):
|
|
bot.send_message(chat_id, "This chat is not allowed to start downloads.")
|
|
return
|
|
|
|
if command in {"/start", "/help"}:
|
|
bot.send_message(chat_id, help_text())
|
|
return
|
|
|
|
if command == "/status":
|
|
with active_lock:
|
|
running = sorted(active_jobs)
|
|
bot.send_message(chat_id, "Active jobs: " + (", ".join(running) if running else "none"))
|
|
return
|
|
|
|
if command and command not in {"/download", "/add"}:
|
|
bot.send_message(chat_id, help_text())
|
|
return
|
|
|
|
imdb_id = extract_imdb_id(text)
|
|
if imdb_id is None:
|
|
bot.send_message(chat_id, "I could not find an IMDb title ID. Send something like tt0903747 or an IMDb URL.")
|
|
return
|
|
|
|
with active_lock:
|
|
if imdb_id in active_jobs:
|
|
bot.send_message(chat_id, f"{imdb_id} is already queued or running.")
|
|
return
|
|
active_jobs.add(imdb_id)
|
|
|
|
try:
|
|
status_message = bot.send_message(chat_id, f"Queued: {imdb_id}")
|
|
executor.submit(
|
|
run_download_job,
|
|
bot,
|
|
chat_id,
|
|
int(status_message["message_id"]),
|
|
imdb_id,
|
|
config,
|
|
active_jobs,
|
|
active_lock,
|
|
)
|
|
except Exception:
|
|
with active_lock:
|
|
active_jobs.discard(imdb_id)
|
|
raise
|
|
|
|
|
|
def run_bot(config: BotConfig) -> None:
|
|
bot = TelegramBotClient(config.token)
|
|
active_jobs: set[str] = set()
|
|
active_lock = threading.Lock()
|
|
next_offset = None
|
|
|
|
print(f"Telegram bot is polling. Workers: {config.workers}")
|
|
if not config.allowed_chat_ids:
|
|
print("Warning: no [telegram].allowed_chat_ids configured; any chat can start downloads.")
|
|
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=config.workers) as executor:
|
|
while True:
|
|
try:
|
|
updates = bot.get_updates(next_offset)
|
|
except Exception as exc:
|
|
print(f"Polling error: {exc}")
|
|
time.sleep(5)
|
|
continue
|
|
|
|
for update in updates:
|
|
next_offset = int(update["update_id"]) + 1
|
|
message = update.get("message")
|
|
if not message:
|
|
continue
|
|
|
|
try:
|
|
handle_message(bot, message, config, executor, active_jobs, active_lock)
|
|
except Exception as exc:
|
|
chat = message.get("chat") or {}
|
|
chat_id = chat.get("id")
|
|
print(f"Message handling error: {exc}")
|
|
if chat_id is not None:
|
|
try:
|
|
bot.send_message(int(chat_id), f"Bot adapter error: {exc}")
|
|
except Exception as send_exc:
|
|
print(f"Failed to report message handling error: {send_exc}")
|
|
|
|
|
|
def main() -> None:
|
|
parser = argparse.ArgumentParser(description="Telegram bot adapter for starting MTfin downloads.")
|
|
parser.parse_args()
|
|
|
|
config = load_config()
|
|
run_bot(config)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|