From 18cf024b6e0d2f7d047d5af33b7275c42a76dab7 Mon Sep 17 00:00:00 2001 From: Azalea Date: Wed, 6 May 2026 17:42:20 +0000 Subject: [PATCH] [+] Retry --- Cargo.lock | 39 +++++++++ Cargo.toml | 1 + README.md | 12 +++ src/main.rs | 34 ++++++++ src/sync.rs | 232 +++++++++++++++++++++++++++++++++++++++++++++++++--- 5 files changed, 308 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8414551..3e072b1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + [[package]] name = "anstream" version = "1.0.0" @@ -359,6 +368,7 @@ dependencies = [ "console", "dialoguer", "directories", + "regex", "reqwest", "serde", "serde_json", @@ -892,6 +902,35 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + [[package]] name = "reqwest" version = "0.12.28" diff --git a/Cargo.toml b/Cargo.toml index 599f95a..72c36b2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,6 +10,7 @@ console = "0.16" dialoguer = "0.12" directories = "5.0" reqwest = { version = "0.12", default-features = false, features = ["blocking", "json", "rustls-tls"] } +regex = "1.11" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" tempfile = "3.13" diff --git a/README.md b/README.md index 43eeaf9..e5aef9d 100644 --- a/README.md +++ b/README.md @@ -114,6 +114,18 @@ Preview commands without writing to Git remotes: git-sync sync --dry-run ``` +Sync only repositories whose names match a regex: + +```sh +git-sync sync --repo-pattern '^(foo|bar)-' +``` + +Retry only repositories that failed during the previous non-dry-run sync: + +```sh +git-sync sync --retry-failed +``` + Use cron or another scheduler for automatic execution: ```cron diff --git a/src/main.rs b/src/main.rs index 9b8ce11..9f3f2a1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -105,6 +105,10 @@ struct SyncCommand { no_create: bool, #[arg(long)] force: bool, + #[arg(long, value_name = "REGEX")] + repo_pattern: Option, + #[arg(long)] + retry_failed: bool, #[arg(long, value_name = "PATH")] work_dir: Option, } @@ -147,6 +151,8 @@ fn main() -> Result<()> { dry_run: command.dry_run, create_missing_override: command.no_create.then_some(false), force_override: command.force.then_some(true), + repo_pattern: command.repo_pattern, + retry_failed: command.retry_failed, work_dir: command.work_dir, }, ) @@ -349,6 +355,34 @@ mod tests { )); } + #[test] + fn cli_accepts_sync_repo_pattern() { + let cli = Cli::try_parse_from([ + "git-sync", + "sync", + "--repo-pattern", + "^(foo|bar)-", + "--dry-run", + ]) + .unwrap(); + + let Command::Sync(args) = cli.command else { + panic!("parsed unexpected command"); + }; + assert_eq!(args.repo_pattern, Some("^(foo|bar)-".to_string())); + assert!(args.dry_run); + } + + #[test] + fn cli_accepts_sync_retry_failed() { + let cli = Cli::try_parse_from(["git-sync", "sync", "--retry-failed"]).unwrap(); + + let Command::Sync(args) = cli.command else { + panic!("parsed unexpected command"); + }; + assert!(args.retry_failed); + } + #[test] fn endpoint_parser_supports_aliases_and_rejects_bad_kinds() { let endpoint = parse_endpoint("github:organization:MewoLab").unwrap(); diff --git a/src/sync.rs b/src/sync.rs index 7d29641..46d148b 100644 --- a/src/sync.rs +++ b/src/sync.rs @@ -1,20 +1,26 @@ -use std::collections::{BTreeSet, HashMap}; +use std::collections::{BTreeMap, BTreeSet, HashMap}; use std::fs; use std::path::{Path, PathBuf}; use anyhow::{Context, Result, bail}; use console::style; +use regex::Regex; +use serde::{Deserialize, Serialize}; use crate::config::{Config, EndpointConfig, MirrorConfig, default_work_dir, validate_config}; use crate::git::{GitMirror, Redactor, RemoteSpec, is_disabled_repository_error, safe_remote_name}; use crate::provider::{EndpointRepo, ProviderClient, repos_by_name}; +const FAILURE_STATE_FILE: &str = "failed-repos.toml"; + #[derive(Clone, Debug, Default)] pub struct SyncOptions { pub group: Option, pub dry_run: bool, pub create_missing_override: Option, pub force_override: Option, + pub repo_pattern: Option, + pub retry_failed: bool, pub work_dir: Option, } @@ -44,19 +50,42 @@ pub fn sync_all(config: &Config, options: SyncOptions) -> Result<()> { .map(|site| site.token()) .collect::>>()?; let redactor = Redactor::new(tokens); + let repo_pattern = options + .repo_pattern + .as_deref() + .map(Regex::new) + .transpose() + .with_context(|| "invalid --repo-pattern regex")?; + let retry_failed_repos = if options.retry_failed { + Some(load_failure_state(&work_dir)?.repos_by_group()) + } else { + None + }; let mut failures = Vec::new(); for mirror in mirrors { - match sync_group(config, mirror, &options, &work_dir, redactor.clone()) { + match sync_group( + config, + mirror, + &options, + &work_dir, + redactor.clone(), + repo_pattern.as_ref(), + retry_failed_repos.as_ref(), + ) { Ok(mut group_failures) => failures.append(&mut group_failures), Err(error) => { let scope = format!("mirror group {}", mirror.name); print_failure(&scope, &error); - failures.push(SyncFailure::new(scope, error)); + failures.push(SyncFailure::group(scope, error)); } } } + if !options.dry_run { + save_failure_state(&work_dir, &FailureState::from_failures(&failures))?; + } + if !failures.is_empty() { print_failure_summary(&failures); bail!("sync completed with {} failure(s)", failures.len()); @@ -69,15 +98,91 @@ pub fn sync_all(config: &Config, options: SyncOptions) -> Result<()> { struct SyncFailure { scope: String, error: String, + retry: Option, } impl SyncFailure { - fn new(scope: String, error: anyhow::Error) -> Self { + fn group(scope: String, error: anyhow::Error) -> Self { Self { scope, error: format_error(&error), + retry: None, } } + + fn repo(group: String, repo: String, error: anyhow::Error) -> Self { + Self { + scope: format!("{group}/{repo}"), + error: format_error(&error), + retry: Some(FailedRepo { group, repo }), + } + } +} + +#[derive(Clone, Debug, Deserialize, Eq, Ord, PartialEq, PartialOrd, Serialize)] +struct FailedRepo { + group: String, + repo: String, +} + +#[derive(Clone, Debug, Default, Deserialize, Serialize)] +struct FailureState { + #[serde(default)] + repos: Vec, +} + +impl FailureState { + fn from_failures(failures: &[SyncFailure]) -> Self { + let repos = failures + .iter() + .filter_map(|failure| failure.retry.clone()) + .collect::>() + .into_iter() + .collect(); + Self { repos } + } + + fn repos_by_group(&self) -> BTreeMap> { + let mut output = BTreeMap::>::new(); + for failure in &self.repos { + output + .entry(failure.group.clone()) + .or_default() + .insert(failure.repo.clone()); + } + output + } +} + +fn load_failure_state(work_dir: &Path) -> Result { + let path = failure_state_path(work_dir); + if !path.exists() { + return Ok(FailureState::default()); + } + let contents = + fs::read_to_string(&path).with_context(|| format!("failed to read {}", path.display()))?; + toml::from_str(&contents).with_context(|| format!("failed to parse {}", path.display())) +} + +fn save_failure_state(work_dir: &Path, state: &FailureState) -> Result<()> { + let path = failure_state_path(work_dir); + if state.repos.is_empty() { + if path.exists() { + fs::remove_file(&path) + .with_context(|| format!("failed to remove {}", path.display()))?; + } + return Ok(()); + } + if let Some(parent) = path.parent() { + fs::create_dir_all(parent) + .with_context(|| format!("failed to create {}", parent.display()))?; + } + let contents = toml::to_string_pretty(state)?; + fs::write(&path, contents).with_context(|| format!("failed to write {}", path.display())) +} + +fn failure_state_path(work_dir: &Path) -> PathBuf { + work_dir.join(FAILURE_STATE_FILE) } fn print_failure(scope: &str, error: &anyhow::Error) { @@ -122,6 +227,8 @@ fn sync_group( options: &SyncOptions, work_dir: &Path, redactor: Redactor, + repo_pattern: Option<&Regex>, + retry_failed_repos: Option<&BTreeMap>>, ) -> Result> { println!(); println!( @@ -155,14 +262,60 @@ fn sync_group( } let mut repos = repos_by_name(all_endpoint_repos); - let repo_names = repos.keys().cloned().collect::>(); + let all_repo_count = repos.len(); + let retry_repo_names = retry_failed_repos.and_then(|repos| repos.get(&mirror.name)); + let repo_names = repos + .keys() + .filter(|name| { + repo_pattern.is_none_or(|pattern| pattern.is_match(name)) + && retry_repo_names.is_none_or(|repos| repos.contains(name.as_str())) + }) + .cloned() + .collect::>(); if repo_names.is_empty() { - println!( - " {} mirror group has no repositories", - style("skip").yellow().bold() - ); + if let Some(retry_repo_names) = retry_repo_names { + println!( + " {} no previously failed repositories were found in this group ({} saved)", + style("skip").yellow().bold(), + retry_repo_names.len() + ); + } else if retry_failed_repos.is_some() { + println!( + " {} no previous failures for this group", + style("skip").yellow().bold() + ); + } else if let Some(pattern) = repo_pattern { + println!( + " {} no repositories match {} ({} discovered)", + style("skip").yellow().bold(), + style(pattern.as_str()).cyan(), + all_repo_count + ); + } else { + println!( + " {} mirror group has no repositories", + style("skip").yellow().bold() + ); + } return Ok(Vec::new()); } + if let Some(pattern) = repo_pattern { + println!( + " {} {} of {} repositories match {}", + style("filter").cyan().bold(), + repo_names.len(), + all_repo_count, + style(pattern.as_str()).cyan() + ); + } + if let Some(retry_repo_names) = retry_repo_names { + println!( + " {} retrying {} of {} previously failed repositories", + style("retry").cyan().bold(), + repo_names.len(), + retry_repo_names.len() + ); + } let mut failures = Vec::new(); for repo_name in repo_names { @@ -180,7 +333,7 @@ fn sync_group( { let scope = format!("{}/{}", mirror.name, repo_name); print_failure(&scope, &error); - failures.push(SyncFailure::new(scope, error)); + failures.push(SyncFailure::repo(mirror.name.clone(), repo_name, error)); } } @@ -493,3 +646,62 @@ fn print_tag_decisions(tags: &[crate::git::TagDecision]) { fn short_sha(sha: &str) -> &str { sha.get(..12).unwrap_or(sha) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn failure_state_persists_repo_failures_by_group() { + let temp = tempfile::TempDir::new().unwrap(); + let failures = vec![ + SyncFailure::repo( + "sync-1".to_string(), + "repo-a".to_string(), + anyhow::anyhow!("a"), + ), + SyncFailure::repo( + "sync-1".to_string(), + "repo-a".to_string(), + anyhow::anyhow!("a again"), + ), + SyncFailure::repo( + "sync-2".to_string(), + "repo-b".to_string(), + anyhow::anyhow!("b"), + ), + SyncFailure::group( + "mirror group sync-3".to_string(), + anyhow::anyhow!("list failed"), + ), + ]; + let state = FailureState::from_failures(&failures); + + save_failure_state(temp.path(), &state).unwrap(); + let loaded = load_failure_state(temp.path()).unwrap(); + let by_group = loaded.repos_by_group(); + + assert_eq!(by_group["sync-1"].len(), 1); + assert!(by_group["sync-1"].contains("repo-a")); + assert_eq!(by_group["sync-2"].len(), 1); + assert!(by_group["sync-2"].contains("repo-b")); + assert!(!by_group.contains_key("sync-3")); + } + + #[test] + fn empty_failure_state_removes_retry_file() { + let temp = tempfile::TempDir::new().unwrap(); + let state = FailureState { + repos: vec![FailedRepo { + group: "sync-1".to_string(), + repo: "repo-a".to_string(), + }], + }; + save_failure_state(temp.path(), &state).unwrap(); + assert!(failure_state_path(temp.path()).exists()); + + save_failure_state(temp.path(), &FailureState::default()).unwrap(); + + assert!(!failure_state_path(temp.path()).exists()); + } +}