diff --git a/src/git.rs b/src/git.rs index c6af95b..deff8d7 100644 --- a/src/git.rs +++ b/src/git.rs @@ -816,6 +816,13 @@ pub fn is_disabled_repository_error(error: &anyhow::Error) -> bool { .any(|error| is_disabled_repository_stderr(error.stderr())) } +pub fn is_missing_repository_error(error: &anyhow::Error) -> bool { + error + .chain() + .filter_map(|cause| cause.downcast_ref::()) + .any(|error| is_missing_repository_stderr(error.stderr())) +} + fn missing_remotes(all_remote_names: &[String], source_remotes: &[String]) -> Vec { all_remote_names .iter() @@ -832,6 +839,14 @@ fn is_disabled_repository_stderr(stderr: &str) -> bool { || stderr.contains("dmca takedown") } +fn is_missing_repository_stderr(stderr: &str) -> bool { + let stderr = stderr.to_ascii_lowercase(); + (stderr.contains("repository") && stderr.contains("not found")) + || stderr.contains("project you were looking for could not be found") + || stderr.contains("does not appear to be a git repository") + || stderr.contains("the requested url returned error: 404") +} + impl Redactor { pub fn new(secrets: Vec) -> Self { let secrets = secrets diff --git a/src/sync.rs b/src/sync.rs index 9f1d0fa..9745293 100644 --- a/src/sync.rs +++ b/src/sync.rs @@ -15,7 +15,7 @@ use crate::config::{ }; use crate::git::{ BranchConflict, BranchDeletion, BranchUpdate, GitMirror, Redactor, RefBackup, RemoteSpec, - is_disabled_repository_error, ls_remote_refs, safe_remote_name, + is_disabled_repository_error, is_missing_repository_error, ls_remote_refs, safe_remote_name, }; use crate::logging; use crate::provider::{ @@ -140,6 +140,83 @@ pub fn sync_all(config: &Config, options: SyncOptions) -> Result<()> { Ok(()) } +pub fn sync_webhook_repo( + config: &Config, + group: &str, + repo_name: &str, + work_dir: Option, + jobs: usize, +) -> Result<()> { + validate_config(config)?; + if jobs == 0 { + bail!("jobs must be at least 1"); + } + let work_dir = work_dir.unwrap_or_else(default_work_dir); + fs::create_dir_all(&work_dir) + .with_context(|| format!("failed to create {}", work_dir.display()))?; + let mirror = config + .mirrors + .iter() + .find(|mirror| mirror.name == group) + .with_context(|| format!("no mirror group matched '{group}'"))?; + let repo_filter = mirror.repo_filter()?; + if !repo_filter.matches(repo_name) { + crate::logln!( + " {} {} does not match configured repository filters", + style("skip").yellow().bold(), + style(repo_name).cyan() + ); + return Ok(()); + } + + let tokens = config + .sites + .iter() + .map(|site| site.token()) + .collect::>>()?; + let redactor = Redactor::new(tokens); + let mut ref_state = load_ref_state(&work_dir)?; + crate::logln!(); + crate::logln!( + "{} {}", + style("Mirror group").cyan().bold(), + style(&mirror.name).bold() + ); + let mut repos = targeted_endpoint_repos(config, mirror, repo_name)?; + let context = RepoSyncContext { + config, + mirror, + work_dir: &work_dir, + redactor, + dry_run: false, + jobs, + }; + let outcome = sync_assumed_repo( + &context, + repo_name, + &mut repos, + mirror.create_missing, + &ref_state, + )?; + if !outcome.created_repos.is_empty() { + webhook::ensure_configured_webhooks( + config, + mirror, + &outcome.created_repos, + &work_dir, + jobs, + )?; + } + if let Some(update) = outcome.state_update { + match update { + RepoStateUpdate::Set(refs) => ref_state.set_repo(&mirror.name, repo_name, refs), + RepoStateUpdate::Remove => ref_state.remove_repo(&mirror.name, repo_name), + } + save_ref_state(&work_dir, &ref_state)?; + } + Ok(()) +} + struct GroupSyncContext<'a> { config: &'a Config, options: &'a SyncOptions, @@ -413,7 +490,7 @@ fn ensure_missing_repos( repo_name: &str, existing: &mut Vec, create_missing: bool, -) -> Result<()> { +) -> Result> { let present = existing .iter() .map(|repo| repo.endpoint.clone()) @@ -447,7 +524,7 @@ fn ensure_missing_repos( style(format!("on {}", endpoint.label())).dim() ); } - return Ok(()); + return Ok(Vec::new()); } let description = template.and_then(|repo| repo.description); @@ -488,9 +565,13 @@ fn ensure_missing_repos( )) })?; created.sort_by_key(|(index, _)| *index); - existing.extend(created.into_iter().map(|(_, repo)| repo)); + let created = created + .into_iter() + .map(|(_, repo)| repo) + .collect::>(); + existing.extend(created.clone()); - Ok(()) + Ok(created) } fn visibility_for_created_repo(mirror: &MirrorConfig, template: Option<&RemoteRepo>) -> Visibility { @@ -517,6 +598,7 @@ struct RepoSyncContext<'a> { #[derive(Default)] struct RepoSyncOutcome { state_update: Option, + created_repos: Vec, } enum RepoStateUpdate { @@ -531,6 +613,46 @@ fn mirror_repo_path(context: &RepoSyncContext<'_>, repo_name: &str) -> PathBuf { .join(format!("{}.git", safe_remote_name(repo_name))) } +fn targeted_endpoint_repos( + config: &Config, + mirror: &MirrorConfig, + repo_name: &str, +) -> Result> { + mirror + .endpoints + .iter() + .map(|endpoint| { + let site = config.site(&endpoint.site).unwrap(); + Ok(EndpointRepo { + endpoint: endpoint.clone(), + repo: RemoteRepo { + name: repo_name.to_string(), + clone_url: endpoint_clone_url(site, endpoint, repo_name)?, + private: matches!(mirror.visibility, Visibility::Private), + description: None, + }, + }) + }) + .collect() +} + +fn endpoint_clone_url( + site: &crate::config::SiteConfig, + endpoint: &EndpointConfig, + repo_name: &str, +) -> Result { + let mut url = url::Url::parse(&site.base_url) + .with_context(|| format!("invalid base URL for site '{}'", site.name))?; + let base_path = url.path().trim_end_matches('/'); + let repo_path = format!("{}/{}.git", endpoint.namespace.trim_matches('/'), repo_name); + if base_path.is_empty() { + url.set_path(&repo_path); + } else { + url.set_path(&format!("{base_path}/{repo_path}")); + } + Ok(url.to_string()) +} + fn sync_repo( context: &RepoSyncContext<'_>, repo_name: &str, @@ -608,7 +730,7 @@ fn sync_repo( } } - ensure_missing_repos(context, repo_name, repos, create_missing)?; + let created_repos = ensure_missing_repos(context, repo_name, repos, create_missing)?; if repos.len() < 2 { crate::logln!( @@ -664,9 +786,143 @@ fn sync_repo( }; return Ok(RepoSyncOutcome { state_update: Some(RepoStateUpdate::Set(refs)), + created_repos, }); } - Ok(RepoSyncOutcome::default()) + Ok(RepoSyncOutcome { + created_repos, + ..RepoSyncOutcome::default() + }) +} + +fn sync_assumed_repo( + context: &RepoSyncContext<'_>, + repo_name: &str, + repos: &mut [EndpointRepo], + create_missing: bool, + ref_state: &RefState, +) -> Result { + crate::logln!(); + crate::logln!( + "{} {}", + style("Repo").magenta().bold(), + style(repo_name).bold() + ); + let previous_repo_refs = ref_state.repo(&context.mirror.name, repo_name); + let all_remotes = remote_specs(context, repos)?; + let Some(initial_ref_check) = check_assumed_remote_refs(context, repo_name, &all_remotes)? + else { + return Ok(RepoSyncOutcome::default()); + }; + if initial_ref_check.refs.is_empty() { + crate::logln!( + " {} {}", + style("skip").yellow().bold(), + style("repository not found on any endpoint").dim() + ); + return Ok(RepoSyncOutcome::default()); + } + + let existing_remote_names = initial_ref_check + .refs + .keys() + .cloned() + .collect::>(); + let mut existing_repos = repos + .iter() + .filter(|repo| existing_remote_names.contains(&remote_name_for_endpoint_repo(repo))) + .cloned() + .collect::>(); + let existing_remotes = all_remotes + .iter() + .filter(|remote| existing_remote_names.contains(&remote.name)) + .cloned() + .collect::>(); + + let path = mirror_repo_path(context, repo_name); + let mirror_repo = GitMirror::open(path, context.redactor.clone(), context.dry_run)?; + mirror_repo.configure_remotes(&all_remotes)?; + let cached_ref_state = cached_ref_state(&mirror_repo, &existing_remotes)?; + backup_branches_deleted_everywhere( + context, + &mirror_repo, + repo_name, + detailed_repo_ref_state(previous_repo_refs).or(cached_ref_state.as_ref()), + &initial_ref_check.refs, + )?; + + for remote in &existing_remotes { + if let Err(error) = mirror_repo.fetch_remote(remote) { + if is_disabled_repository_error(&error) { + crate::logln!( + " {} {} {}", + style("skip").yellow().bold(), + style(repo_name).cyan(), + style(format!("provider blocked access on {}", remote.display)).dim() + ); + return Ok(RepoSyncOutcome::default()); + } + if is_missing_repository_error(&error) { + crate::logln!( + " {} {} {}", + style("missing").yellow().bold(), + style(repo_name).cyan(), + style(format!("on {}", remote.display)).dim() + ); + existing_repos.retain(|repo| remote_name_for_endpoint_repo(repo) != remote.name); + continue; + } + return Err(error).with_context(|| format!("failed to fetch {}", remote.display)); + } + } + + let created_repos = + ensure_missing_repos(context, repo_name, &mut existing_repos, create_missing)?; + if existing_repos.len() < 2 { + crate::logln!( + " {} {} {}", + style("skip").yellow().bold(), + style(repo_name).cyan(), + style("fewer than two endpoints have this repository").dim() + ); + return Ok(RepoSyncOutcome { + created_repos, + ..RepoSyncOutcome::default() + }); + } + + let remotes = remote_specs(context, &existing_repos)?; + mirror_repo.configure_remotes(&remotes)?; + let result = push_repo_refs( + context, + &mirror_repo, + repo_name, + &remotes, + &existing_repos, + detailed_repo_ref_state(previous_repo_refs).or(cached_ref_state.as_ref()), + &initial_ref_check.refs, + )?; + if !context.dry_run && !result.had_conflicts { + let refs = if result.pushed { + let Some(refs) = check_remote_refs(context, repo_name, &remotes)? else { + return Ok(RepoSyncOutcome { + created_repos, + ..RepoSyncOutcome::default() + }); + }; + refs + } else { + initial_ref_check.refs + }; + return Ok(RepoSyncOutcome { + state_update: Some(RepoStateUpdate::Set(refs)), + created_repos, + }); + } + Ok(RepoSyncOutcome { + created_repos, + ..RepoSyncOutcome::default() + }) } fn handle_repo_deletion( @@ -698,6 +954,7 @@ fn handle_repo_deletion( backup_deleted_repo(context, repo_name, repos, previous_refs, current_refs)?; Ok(Some(RepoSyncOutcome { state_update: (!context.dry_run).then_some(RepoStateUpdate::Remove), + ..RepoSyncOutcome::default() })) } RepoDeletionDecision::Propagate { @@ -715,6 +972,7 @@ fn handle_repo_deletion( delete_repos(context, repo_name, repos, &target_remotes)?; Ok(Some(RepoSyncOutcome { state_update: (!context.dry_run).then_some(RepoStateUpdate::Remove), + ..RepoSyncOutcome::default() })) } RepoDeletionDecision::Conflict { @@ -920,6 +1178,69 @@ fn check_remote_refs( Ok(Some(refs)) } +struct AssumedRemoteRefState { + refs: BTreeMap, +} + +fn check_assumed_remote_refs( + context: &RepoSyncContext<'_>, + repo_name: &str, + remotes: &[RemoteSpec], +) -> Result> { + enum RemoteRefCheck { + Found(String, RemoteRefState), + Missing(String), + Blocked, + } + + let ref_jobs = remotes.to_vec(); + let results = crate::parallel::map(ref_jobs, context.jobs, |remote| { + crate::logln!( + " {} {}", + style("probe refs").cyan().bold(), + style(&remote.display).dim() + ); + match ls_remote_refs(&remote, &context.redactor) { + Ok(snapshot) => Ok(RemoteRefCheck::Found(remote.name, snapshot.into())), + Err(error) if is_missing_repository_error(&error) => { + crate::logln!( + " {} {} {}", + style("missing").yellow().bold(), + style(repo_name).cyan(), + style(format!("on {}", remote.display)).dim() + ); + Ok(RemoteRefCheck::Missing(remote.name)) + } + Err(error) if is_disabled_repository_error(&error) => { + crate::logln!( + " {} {} {}", + style("skip").yellow().bold(), + style(repo_name).cyan(), + style(format!("provider blocked access on {}", remote.display)).dim() + ); + Ok(RemoteRefCheck::Blocked) + } + Err(error) => { + Err(error).with_context(|| format!("failed to check refs for {}", remote.display)) + } + } + })?; + + let mut refs = BTreeMap::new(); + for result in results { + match result { + RemoteRefCheck::Found(remote, refs_for_remote) => { + refs.insert(remote, refs_for_remote); + } + RemoteRefCheck::Missing(remote) => { + let _ = remote; + } + RemoteRefCheck::Blocked => return Ok(None), + } + } + Ok(Some(AssumedRemoteRefState { refs })) +} + fn remote_specs(context: &RepoSyncContext<'_>, repos: &[EndpointRepo]) -> Result> { let endpoint_map = context .mirror diff --git a/src/webhook.rs b/src/webhook.rs index 6d6e087..cd20b2f 100644 --- a/src/webhook.rs +++ b/src/webhook.rs @@ -8,7 +8,6 @@ use std::time::Duration; use anyhow::{Context, Result, bail}; use console::style; use hmac::{Hmac, KeyInit, Mac}; -use regex::escape; use serde::{Deserialize, Serialize}; use serde_json::Value; use sha2::Sha256; @@ -22,7 +21,7 @@ use crate::provider::{ EndpointRepo, ProviderClient, RemoteRepo, WebhookInstallOutcome, list_mirror_repos, }; use crate::state::{load_toml_or_default, save_toml}; -use crate::sync::{SyncOptions, sync_all}; +use crate::sync::{SyncOptions, sync_all, sync_webhook_repo}; type HmacSha256 = Hmac; const WEBHOOK_STATE_FILE: &str = "webhook-state.toml"; @@ -153,6 +152,7 @@ fn full_sync_timer_loop( &config, SyncOptions { work_dir: work_dir.clone(), + jobs: config.jobs, ..SyncOptions::default() }, ) { @@ -377,15 +377,12 @@ fn worker_loop( let _sync_guard = sync_lock .lock() .unwrap_or_else(|poisoned| poisoned.into_inner()); - let result = sync_all( + let result = sync_webhook_repo( &config, - SyncOptions { - group: Some(job.group.clone()), - repo_pattern: Some(format!("^{}$", escape(&job.repo))), - work_dir: work_dir.clone(), - jobs: 1, - ..SyncOptions::default() - }, + &job.group, + &job.repo, + work_dir.clone(), + config.jobs, ); match result { Ok(()) => crate::logln!( diff --git a/tests/unit/git.rs b/tests/unit/git.rs index b28b698..ced51ac 100644 --- a/tests/unit/git.rs +++ b/tests/unit/git.rs @@ -41,6 +41,19 @@ fn detects_provider_disabled_repository_errors() { assert!(!is_disabled_repository_error(&generic_forbidden)); } +#[test] +fn detects_missing_repository_errors() { + let error: anyhow::Error = GitCommandError::new( + "git ls-remote", + "", + "remote: Repository not found.\nfatal: repository 'https://github.com/alice/missing.git/' not found", + ) + .into(); + + assert!(is_missing_repository_error(&error)); + assert!(!is_disabled_repository_error(&error)); +} + #[test] fn ls_remote_snapshot_changes_when_remote_refs_change() { let fixture = GitFixture::new(); diff --git a/tests/unit/sync.rs b/tests/unit/sync.rs index ce7c114..082b0f6 100644 --- a/tests/unit/sync.rs +++ b/tests/unit/sync.rs @@ -448,6 +448,46 @@ fn endpoint_remote_names_do_not_slug_collide() { ); } +#[test] +fn targeted_endpoint_repos_synthesize_clone_urls_without_listing() { + let mirror = MirrorConfig { + name: "sync-1".to_string(), + endpoints: vec![EndpointConfig { + site: "gitlab".to_string(), + kind: crate::config::NamespaceKind::Group, + namespace: "parent/child".to_string(), + }], + sync_visibility: crate::config::SyncVisibility::All, + repo_whitelist: None, + repo_blacklist: None, + create_missing: true, + delete_missing: true, + visibility: crate::config::Visibility::Private, + conflict_resolution: ConflictResolutionStrategy::Fail, + }; + let config = Config { + jobs: crate::config::DEFAULT_JOBS, + sites: vec![crate::config::SiteConfig { + name: "gitlab".to_string(), + provider: crate::config::ProviderKind::Gitlab, + base_url: "https://gitlab.example.test/root".to_string(), + api_url: None, + token: crate::config::TokenConfig::Value("token".to_string()), + git_username: None, + }], + mirrors: vec![mirror.clone()], + webhook: None, + }; + + let repos = targeted_endpoint_repos(&config, &mirror, "repo").unwrap(); + + assert_eq!(repos.len(), 1); + assert_eq!( + repos[0].repo.clone_url, + "https://gitlab.example.test/root/parent/child/repo.git" + ); +} + #[test] fn created_repo_visibility_follows_existing_public_repo() { let mirror = test_mirror();