From 9c5212de0588403268c720fa83ceb002381f49e9 Mon Sep 17 00:00:00 2001 From: Ganonmaster Date: Sat, 2 May 2026 02:48:24 +0200 Subject: [PATCH] feat: add emote import from legacy source with dry-run mode Add POST /manage/import (auth-protected) that fetches emotes from a legacy JSON endpoint, downloads each image, uploads it to S3, and inserts it into the DB with the original timestamps preserved. - Skip emotes whose name already exists (best-effort duplicate detection across SQLite and PostgreSQL via error code + message fallback) - Validate source_url against a configurable host allowlist ([import] allowed_hosts in config, default ["smutba.se"]) - dry_run: true previews the import without writing to S3 or DB; result statuses are "would_import" / "would_skip" instead of "imported" / "skipped" - Add db.name_exists() for efficient per-name existence checks used by dry-run - Add reqwest (rustls-tls + json) and url dependencies - Integration tests: auth guard, allowlist rejection, mirror + skip-duplicates, dry-run no-persist --- Cargo.lock | 179 ++++++++++++++++++++++++ Cargo.toml | 2 + README.md | 55 ++++++++ config.example.toml | 5 + src/config.rs | 20 +++ src/db.rs | 59 ++++++++ src/lib.rs | 1 + src/models.rs | 38 +++++ src/routes/manage.rs | 323 ++++++++++++++++++++++++++++++++++++++++++- tests/routes.rs | 237 ++++++++++++++++++++++++++++++- 10 files changed, 915 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 15426f6..1a2041d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -703,6 +703,12 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + [[package]] name = "chrono" version = "0.4.44" @@ -1214,8 +1220,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" dependencies = [ "cfg-if", + "js-sys", "libc", "wasi", + "wasm-bindgen", ] [[package]] @@ -1225,9 +1233,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ "cfg-if", + "js-sys", "libc", "r-efi 5.3.0", "wasip2", + "wasm-bindgen", ] [[package]] @@ -1520,6 +1530,7 @@ dependencies = [ "tokio", "tokio-rustls 0.26.4", "tower-service", + "webpki-roots", ] [[package]] @@ -1695,6 +1706,16 @@ version = "2.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" +[[package]] +name = "iri-string" +version = "0.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25e659a4bb38e810ebc252e53b5814ff908a8c58c2a9ce2fae1bbec24cbf4e20" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "itoa" version = "1.0.17" @@ -1812,6 +1833,12 @@ dependencies = [ "hashbrown 0.15.5", ] +[[package]] +name = "lru-slab" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" + [[package]] name = "matchers" version = "0.2.0" @@ -1856,6 +1883,7 @@ dependencies = [ "chrono", "config", "mime_guess", + "reqwest", "serde", "serde_json", "sqlx", @@ -1866,6 +1894,7 @@ dependencies = [ "tower-http", "tracing", "tracing-subscriber", + "url", "uuid", ] @@ -2220,6 +2249,61 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "quinn" +version = "0.11.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20" +dependencies = [ + "bytes", + "cfg_aliases", + "pin-project-lite", + "quinn-proto", + "quinn-udp", + "rustc-hash", + "rustls 0.23.37", + "socket2 0.6.3", + "thiserror 2.0.18", + "tokio", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-proto" +version = "0.11.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098" +dependencies = [ + "bytes", + "getrandom 0.3.4", + "lru-slab", + "rand 0.9.2", + "ring", + "rustc-hash", + "rustls 0.23.37", + "rustls-pki-types", + "slab", + "thiserror 2.0.18", + "tinyvec", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-udp" +version = "0.5.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "addec6a0dcad8a8d96a771f815f0eaf55f9d1805756410b39f5fa81332574cbd" +dependencies = [ + "cfg_aliases", + "libc", + "once_cell", + "socket2 0.6.3", + "tracing", + "windows-sys 0.52.0", +] + [[package]] name = "quote" version = "1.0.45" @@ -2353,6 +2437,44 @@ version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" +[[package]] +name = "reqwest" +version = "0.12.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" +dependencies = [ + "base64 0.22.1", + "bytes", + "futures-core", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.8.1", + "hyper-rustls 0.27.7", + "hyper-util", + "js-sys", + "log", + "percent-encoding", + "pin-project-lite", + "quinn", + "rustls 0.23.37", + "rustls-pki-types", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tokio-rustls 0.26.4", + "tower", + "tower-http", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "webpki-roots", +] + [[package]] name = "rfc6979" version = "0.3.1" @@ -2420,6 +2542,12 @@ dependencies = [ "ordered-multimap", ] +[[package]] +name = "rustc-hash" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe" + [[package]] name = "rustc_version" version = "0.4.1" @@ -2449,6 +2577,7 @@ checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4" dependencies = [ "aws-lc-rs", "once_cell", + "ring", "rustls-pki-types", "rustls-webpki 0.103.9", "subtle", @@ -2473,6 +2602,7 @@ version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" dependencies = [ + "web-time", "zeroize", ] @@ -3019,6 +3149,9 @@ name = "sync_wrapper" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" +dependencies = [ + "futures-core", +] [[package]] name = "synstructure" @@ -3281,9 +3414,12 @@ checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" dependencies = [ "bitflags", "bytes", + "futures-util", "http 1.4.0", "http-body 1.0.1", + "iri-string", "pin-project-lite", + "tower", "tower-layer", "tower-service", "tracing", @@ -3544,6 +3680,20 @@ dependencies = [ "wasm-bindgen-shared", ] +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9c5522b3a28661442748e09d40924dfb9ca614b21c00d3fd135720e48b67db8" +dependencies = [ + "cfg-if", + "futures-util", + "js-sys", + "once_cell", + "wasm-bindgen", + "web-sys", +] + [[package]] name = "wasm-bindgen-macro" version = "0.2.114" @@ -3610,6 +3760,35 @@ dependencies = [ "semver", ] +[[package]] +name = "web-sys" +version = "0.3.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "854ba17bb104abfb26ba36da9729addc7ce7f06f5c0f90f3c391f8461cca21f9" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "webpki-roots" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52f5ee44c96cf55f1b349600768e3ece3a8f26010c05265ab73f945bb1a2eb9d" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "whoami" version = "1.6.1" diff --git a/Cargo.toml b/Cargo.toml index efd7203..68b760a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,8 @@ mime_guess = "2" tokio-util = { version = "0.7", features = ["io"] } bytes = "1" base64 = "0.22" +reqwest = { version = "0.12", default-features = false, features = ["rustls-tls", "json"] } +url = "2" [dev-dependencies] tower = { version = "0.5", features = ["util"] } diff --git a/README.md b/README.md index 7872cc5..ca4b313 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,57 @@ # mikebase A Rust-based emote database and API. + +## Importing legacy emotes + +Use the protected management endpoint to mirror emotes from a legacy JSON feed. + +### Endpoint + +POST /manage/import + +- Auth: HTTP Basic (same credentials used for other protected routes) +- Content-Type: application/json +- Body: + +```json +{ + "source_url": "https://smutba.se/emoji/json/" +} +``` + +### Behavior + +- Fetches source JSON in the format `{"emotes": [{name, url, created, modified}, ...]}` +- Downloads each image URL and uploads bytes to this app's configured S3 bucket +- Inserts emote rows preserving source `created` and `modified` timestamps +- Skips entries where `name` already exists locally +- Continues processing after per-item failures and returns a batch summary + +### Example response + +```json +{ + "source_url": "https://smutba.se/emoji/json/", + "total": 2, + "imported": 1, + "skipped": 1, + "failed": 0, + "results": [ + {"name": "legacy_new", "status": "imported", "reason": null}, + {"name": "legacy_duplicate", "status": "skipped", "reason": "Name already exists"} + ] +} +``` + +### Allowlisted hosts + +Import is restricted to hosts in configuration: + +```toml +[import] +allowed_hosts = ["smutba.se"] +``` + +Environment override example: + +- `APP__IMPORT__ALLOWED_HOSTS=["smutba.se","legacy.example.org"]` diff --git a/config.example.toml b/config.example.toml index 57a2a75..dd0d49c 100644 --- a/config.example.toml +++ b/config.example.toml @@ -19,6 +19,11 @@ port = 3000 username = "admin" password = "changeme" +[import] +# Hosts allowed as migration sources for POST /manage/import. +# The request is rejected unless the source URL host matches one of these. +allowed_hosts = ["smutba.se"] + [s3] endpoint = "https://s3.eu-central-1.wasabisys.com" region = "eu-central-1" diff --git a/src/config.rs b/src/config.rs index 7c1f6e6..40541c3 100644 --- a/src/config.rs +++ b/src/config.rs @@ -43,6 +43,16 @@ pub struct AuthConfig { pub password: String, } +#[derive(Debug, Deserialize, Clone)] +pub struct ImportConfig { + #[serde(default = "default_allowed_hosts")] + pub allowed_hosts: Vec, +} + +fn default_allowed_hosts() -> Vec { + vec!["smutba.se".to_string()] +} + #[derive(Debug, Deserialize, Clone)] pub struct AppConfig { pub s3: S3Config, @@ -50,6 +60,8 @@ pub struct AppConfig { #[serde(default)] pub server: ServerConfig, pub auth: Option, + #[serde(default)] + pub import: ImportConfig, } impl Default for ServerConfig { @@ -61,6 +73,14 @@ impl Default for ServerConfig { } } +impl Default for ImportConfig { + fn default() -> Self { + Self { + allowed_hosts: default_allowed_hosts(), + } + } +} + impl AppConfig { pub fn load() -> Result { let cfg = Config::builder() diff --git a/src/db.rs b/src/db.rs index 23fb489..c8436e7 100644 --- a/src/db.rs +++ b/src/db.rs @@ -40,6 +40,14 @@ impl Database { Ok(row) } + pub async fn name_exists(&self, name: &str) -> Result { + let row: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM emotes WHERE name = $1") + .bind(name) + .fetch_one(&self.pool) + .await?; + Ok(row.0 > 0) + } + pub async fn create_emote( &self, uuid: &str, @@ -70,6 +78,37 @@ impl Database { }) } + pub async fn create_emote_with_timestamps( + &self, + uuid: &str, + name: &str, + alias: Option<&str>, + image_key: &str, + created: &str, + modified: &str, + ) -> Result { + sqlx::query( + "INSERT INTO emotes (uuid, name, alias, image_key, created, modified) VALUES ($1, $2, $3, $4, $5, $6)", + ) + .bind(uuid) + .bind(name) + .bind(alias) + .bind(image_key) + .bind(created) + .bind(modified) + .execute(&self.pool) + .await?; + + Ok(EmoteRow { + uuid: uuid.to_string(), + name: name.to_string(), + alias: alias.map(|s| s.to_string()), + image_key: image_key.to_string(), + created: created.to_string(), + modified: modified.to_string(), + }) + } + pub async fn update_emote( &self, uuid: &str, @@ -180,6 +219,26 @@ mod tests { assert_eq!(updated.image_key, "emoji/x.png"); // unchanged } + #[tokio::test] + async fn create_with_preserved_timestamps() { + let db = test_db().await; + let id = new_uuid(); + let created = "2020-01-01T10:00:00+00:00"; + let modified = "2021-02-03T11:30:00+00:00"; + + let row = db + .create_emote_with_timestamps(&id, "legacy", None, "emoji/legacy.png", created, modified) + .await + .unwrap(); + + assert_eq!(row.created, created); + assert_eq!(row.modified, modified); + + let fetched = db.get_emote_by_id(&id).await.unwrap().unwrap(); + assert_eq!(fetched.created, created); + assert_eq!(fetched.modified, modified); + } + #[tokio::test] async fn update_unspecified_fields_are_kept() { let db = test_db().await; diff --git a/src/lib.rs b/src/lib.rs index bf86e4a..c32fd8d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -29,6 +29,7 @@ pub fn build_router(state: AppState) -> Router { let protected = Router::new() .route("/manage", get(routes::manage::manage_root)) .route("/manage/emotes", get(routes::manage::list_admin_emotes)) + .route("/manage/import", post(routes::manage::import_emotes)) .route("/emotes", post(routes::emotes::create_emote)) .route("/emotes/{uuid}", put(routes::emotes::update_emote)) .route("/emotes/{uuid}", delete(routes::emotes::delete_emote)) diff --git a/src/models.rs b/src/models.rs index cd269b0..06183c8 100644 --- a/src/models.rs +++ b/src/models.rs @@ -51,6 +51,44 @@ pub struct AdminEmoteResponse { pub modified: DateTime, } +#[derive(Debug, Deserialize)] +pub struct ImportEmotesRequest { + pub source_url: String, + #[serde(default)] + pub dry_run: bool, +} + +#[derive(Debug, Deserialize)] +pub struct LegacyEmotesPayload { + pub emotes: Vec, +} + +#[derive(Debug, Deserialize)] +pub struct LegacyEmote { + pub name: String, + pub url: String, + pub created: DateTime, + pub modified: DateTime, +} + +#[derive(Debug, Serialize)] +pub struct ImportEmotesResponse { + pub source_url: String, + pub dry_run: bool, + pub total: usize, + pub imported: usize, + pub skipped: usize, + pub failed: usize, + pub results: Vec, +} + +#[derive(Debug, Serialize)] +pub struct ImportEmoteResult { + pub name: String, + pub status: String, + pub reason: Option, +} + /// Payload for updating an existing emote. /// /// `alias` uses a double-Option to distinguish three states: diff --git a/src/routes/manage.rs b/src/routes/manage.rs index b3d9a92..c43a2a0 100644 --- a/src/routes/manage.rs +++ b/src/routes/manage.rs @@ -2,10 +2,20 @@ use axum::{ extract::State, http::StatusCode, response::{Html, IntoResponse, Json}, + Json as AxumJson, }; +use reqwest::header::CONTENT_TYPE; use serde_json::json; +use std::time::Duration; +use url::Url; -use crate::{models::AdminEmoteResponse, AppState}; +use crate::{ + models::{ + new_uuid, AdminEmoteResponse, ImportEmoteResult, ImportEmotesRequest, ImportEmotesResponse, + LegacyEmotesPayload, + }, + AppState, +}; /// GET /manage /// Serves the emote management HTML page. @@ -41,3 +51,314 @@ pub async fn list_admin_emotes(State(state): State) -> impl IntoRespon } } } + +/// POST /manage/import +/// Import emotes from a legacy JSON endpoint and mirror image bytes to local storage. +/// When `dry_run` is true, validates and previews the import without writing anything. +pub async fn import_emotes( + State(state): State, + AxumJson(payload): AxumJson, +) -> impl IntoResponse { + let dry_run = payload.dry_run; + let source_url = payload.source_url.trim(); + if source_url.is_empty() { + return ( + StatusCode::BAD_REQUEST, + Json(json!({"error": "source_url is required"})), + ) + .into_response(); + } + + let parsed_source = match Url::parse(source_url) { + Ok(url) => url, + Err(_) => { + return ( + StatusCode::BAD_REQUEST, + Json(json!({"error": "Invalid source_url"})), + ) + .into_response(); + } + }; + + if !is_host_allowed(parsed_source.host_str(), &state.cfg.import.allowed_hosts) { + return ( + StatusCode::BAD_REQUEST, + Json(json!({"error": "source_url host is not allowlisted"})), + ) + .into_response(); + } + + let client = match reqwest::Client::builder() + .connect_timeout(Duration::from_secs(10)) + .timeout(Duration::from_secs(30)) + .build() + { + Ok(c) => c, + Err(e) => { + tracing::error!("Failed to build HTTP client: {e}"); + return ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(json!({"error": "Failed to initialize import client"})), + ) + .into_response(); + } + }; + + let source_resp = match client.get(parsed_source.clone()).send().await { + Ok(resp) => resp, + Err(e) => { + tracing::error!("Failed to fetch source payload: {e}"); + return ( + StatusCode::BAD_GATEWAY, + Json(json!({"error": "Failed to fetch source payload"})), + ) + .into_response(); + } + }; + + if !source_resp.status().is_success() { + return ( + StatusCode::BAD_GATEWAY, + Json(json!({ + "error": "Source payload request failed", + "status": source_resp.status().as_u16() + })), + ) + .into_response(); + } + + let payload: LegacyEmotesPayload = match source_resp.json().await { + Ok(p) => p, + Err(e) => { + tracing::error!("Failed to parse source payload JSON: {e}"); + return ( + StatusCode::BAD_REQUEST, + Json(json!({"error": "Invalid source payload schema"})), + ) + .into_response(); + } + }; + + let total = payload.emotes.len(); + let mut imported = 0usize; + let mut skipped = 0usize; + let mut failed = 0usize; + let mut results = Vec::with_capacity(total); + + for legacy in payload.emotes { + let name = legacy.name.trim().to_string(); + if name.is_empty() { + failed += 1; + results.push(ImportEmoteResult { + name: legacy.name, + status: "failed".to_string(), + reason: Some("Missing emote name".to_string()), + }); + continue; + } + + let image_url = match Url::parse(&legacy.url) { + Ok(url) => url, + Err(_) => { + failed += 1; + results.push(ImportEmoteResult { + name, + status: "failed".to_string(), + reason: Some("Invalid image URL".to_string()), + }); + continue; + } + }; + + let image_resp = match client.get(image_url.clone()).send().await { + Ok(resp) => resp, + Err(e) => { + failed += 1; + results.push(ImportEmoteResult { + name, + status: "failed".to_string(), + reason: Some(format!("Image download failed: {e}")), + }); + continue; + } + }; + + if !image_resp.status().is_success() { + failed += 1; + results.push(ImportEmoteResult { + name, + status: "failed".to_string(), + reason: Some(format!("Image download failed with status {}", image_resp.status())), + }); + continue; + } + + let content_type = image_resp + .headers() + .get(CONTENT_TYPE) + .and_then(|h| h.to_str().ok()) + .map(|ct| ct.split(';').next().unwrap_or(ct).trim().to_string()) + .unwrap_or_else(|| { + mime_guess::from_path(image_url.path()) + .first_or_octet_stream() + .to_string() + }); + + let data = match image_resp.bytes().await { + Ok(b) => b, + Err(e) => { + failed += 1; + results.push(ImportEmoteResult { + name, + status: "failed".to_string(), + reason: Some(format!("Failed reading image bytes: {e}")), + }); + continue; + } + }; + + if dry_run { + // Check for a name collision without writing anything. + let exists = match state.db.name_exists(&name).await { + Ok(v) => v, + Err(e) => { + failed += 1; + results.push(ImportEmoteResult { + name, + status: "failed".to_string(), + reason: Some(format!("DB check failed: {e}")), + }); + continue; + } + }; + if exists { + skipped += 1; + results.push(ImportEmoteResult { + name, + status: "would_skip".to_string(), + reason: Some("Name already exists".to_string()), + }); + } else { + imported += 1; + results.push(ImportEmoteResult { + name, + status: "would_import".to_string(), + reason: None, + }); + } + continue; + } + + let ext = infer_extension(&image_url, &content_type); + let id = new_uuid(); + let key = format!("emoji/{id}.{ext}"); + + if let Err(e) = state.storage.upload(&key, data, &content_type).await { + failed += 1; + results.push(ImportEmoteResult { + name, + status: "failed".to_string(), + reason: Some(format!("Storage upload failed: {e}")), + }); + continue; + } + + let created = legacy.created.to_rfc3339(); + let modified = legacy.modified.to_rfc3339(); + match state + .db + .create_emote_with_timestamps(&id, &name, None, &key, &created, &modified) + .await + { + Ok(_) => { + imported += 1; + results.push(ImportEmoteResult { + name, + status: "imported".to_string(), + reason: None, + }); + } + Err(e) if is_unique_name_violation(&e) => { + skipped += 1; + if let Err(del_err) = state.storage.delete(&key).await { + tracing::warn!("Failed to delete skipped upload key {key}: {del_err}"); + } + results.push(ImportEmoteResult { + name, + status: "skipped".to_string(), + reason: Some("Name already exists".to_string()), + }); + } + Err(e) => { + failed += 1; + if let Err(del_err) = state.storage.delete(&key).await { + tracing::warn!("Failed to cleanup failed upload key {key}: {del_err}"); + } + results.push(ImportEmoteResult { + name, + status: "failed".to_string(), + reason: Some(format!("Database insert failed: {e}")), + }); + } + } + } + + let response = ImportEmotesResponse { + source_url: parsed_source.to_string(), + dry_run, + total, + imported, + skipped, + failed, + results, + }; + + (StatusCode::OK, Json(json!(response))).into_response() +} + +fn is_host_allowed(host: Option<&str>, allowed_hosts: &[String]) -> bool { + let Some(host) = host else { + return false; + }; + let host = host.to_ascii_lowercase(); + allowed_hosts + .iter() + .any(|allowed| allowed.eq_ignore_ascii_case(&host)) +} + +fn infer_extension(url: &Url, content_type: &str) -> String { + if let Some(ext) = std::path::Path::new(url.path()) + .extension() + .and_then(|e| e.to_str()) + .filter(|e| !e.is_empty()) + { + let lower = ext.to_ascii_lowercase(); + if lower.chars().all(|c| c.is_ascii_alphanumeric()) { + return lower; + } + } + + if let Some(mime) = mime_guess::get_mime_extensions_str(content_type) + .and_then(|exts| exts.first()) + { + return (*mime).to_string(); + } + + "bin".to_string() +} + +fn is_unique_name_violation(err: &sqlx::Error) -> bool { + match err { + sqlx::Error::Database(db_err) => { + if let Some(code) = db_err.code() { + if code == "23505" || code == "2067" || code == "1555" { + return true; + } + } + + let msg = db_err.message().to_ascii_lowercase(); + msg.contains("unique") || msg.contains("duplicate") + } + _ => false, + } +} diff --git a/tests/routes.rs b/tests/routes.rs index c1bccff..c37dffd 100644 --- a/tests/routes.rs +++ b/tests/routes.rs @@ -3,14 +3,20 @@ use std::sync::Arc; use axum::{ body::Body, http::{Request, StatusCode}, + routing::get, + Json, + Router, }; use base64::{engine::general_purpose::STANDARD, Engine}; +use chrono::{DateTime, Utc}; +use serde_json::json; use sqlx::any::install_default_drivers; use tower::ServiceExt; +use tokio::net::TcpListener; use mikebase::{ build_router, - config::{AppConfig, AuthConfig, DatabaseConfig, S3Config, ServerConfig}, + config::{AppConfig, AuthConfig, DatabaseConfig, ImportConfig, S3Config, ServerConfig}, db::Database, models::new_uuid, storage::S3Storage, @@ -20,6 +26,14 @@ use mikebase::{ // ── Helpers ─────────────────────────────────────────────────────────────────── async fn test_state() -> AppState { + test_state_with_s3( + "http://localhost:19999".to_string(), + "http://localhost:19999/test-bucket".to_string(), + ) + .await +} + +async fn test_state_with_s3(s3_endpoint: String, s3_public_url: String) -> AppState { install_default_drivers(); let pool = sqlx::pool::PoolOptions::::new() .max_connections(1) @@ -31,12 +45,12 @@ async fn test_state() -> AppState { let cfg = Arc::new(AppConfig { s3: S3Config { - endpoint: "http://localhost:19999".to_string(), + endpoint: s3_endpoint, region: "us-east-1".to_string(), bucket: "test-bucket".to_string(), access_key: "test".to_string(), secret_key: "test".to_string(), - public_url: "http://localhost:19999/test-bucket".to_string(), + public_url: s3_public_url, }, database: DatabaseConfig { url: "sqlite::memory:".to_string() }, server: ServerConfig::default(), @@ -44,12 +58,84 @@ async fn test_state() -> AppState { username: "admin".to_string(), password: "secret".to_string(), }), + import: ImportConfig { + allowed_hosts: vec!["smutba.se".to_string(), "localhost".to_string(), "127.0.0.1".to_string()], + }, }); let storage = S3Storage::new(&cfg); AppState { db, storage, cfg } } +async fn spawn_mock_s3_server() -> (String, tokio::task::JoinHandle<()>) { + async fn ok() -> StatusCode { + StatusCode::OK + } + + let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); + let addr = listener.local_addr().unwrap(); + let base = format!("http://{}", addr); + + let app = Router::new() + .route("/", get(ok)) + .route("/{*path}", axum::routing::any(ok)); + + let handle = tokio::spawn(async move { + axum::serve(listener, app).await.unwrap(); + }); + + (base, handle) +} + +async fn spawn_legacy_source_server() -> (String, tokio::task::JoinHandle<()>) { + async fn image_new() -> ([(&'static str, &'static str); 1], &'static [u8]) { + ([ ("content-type", "image/png") ], b"PNGDATA") + } + + async fn image_dup() -> ([(&'static str, &'static str); 1], &'static [u8]) { + ([ ("content-type", "image/png") ], b"PNGDATA2") + } + + let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); + let addr = listener.local_addr().unwrap(); + let base = format!("http://{}", addr); + + let payload_base = base.clone(); + let app = Router::new() + .route( + "/emoji/json/", + get(move || { + let payload_base = payload_base.clone(); + async move { + Json(json!({ + "emotes": [ + { + "name": "legacy_new", + "url": format!("{}/images/new.png", payload_base), + "created": "2020-01-01T00:00:00+00:00", + "modified": "2020-02-02T00:00:00+00:00" + }, + { + "name": "legacy_duplicate", + "url": format!("{}/images/duplicate.png", payload_base), + "created": "2021-01-01T00:00:00+00:00", + "modified": "2021-01-02T00:00:00+00:00" + } + ] + })) + } + }), + ) + .route("/images/new.png", get(image_new)) + .route("/images/duplicate.png", get(image_dup)); + + let handle = tokio::spawn(async move { + axum::serve(listener, app).await.unwrap(); + }); + + (base, handle) +} + fn auth_header() -> String { format!("Basic {}", STANDARD.encode("admin:secret")) } @@ -204,6 +290,151 @@ async fn manage_emotes_requires_auth() { assert_eq!(resp.status(), StatusCode::UNAUTHORIZED); } +#[tokio::test] +async fn manage_import_requires_auth() { + let app = build_router(test_state().await); + let resp = app + .oneshot( + Request::builder() + .method("POST") + .uri("/manage/import") + .header("content-type", "application/json") + .body(Body::from(r#"{"source_url":"https://smutba.se/emoji/json/"}"#)) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::UNAUTHORIZED); +} + +#[tokio::test] +async fn manage_import_rejects_non_allowlisted_host() { + let app = build_router(test_state().await); + let resp = app + .oneshot( + Request::builder() + .method("POST") + .uri("/manage/import") + .header("authorization", auth_header()) + .header("content-type", "application/json") + .body(Body::from(r#"{"source_url":"https://example.com/emoji/json/"}"#)) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::BAD_REQUEST); +} + +#[tokio::test] +async fn manage_import_mirrors_and_skips_duplicates() { + let (legacy_base, server_handle) = spawn_legacy_source_server().await; + let (s3_base, s3_handle) = spawn_mock_s3_server().await; + + let state = test_state_with_s3(s3_base.clone(), format!("{}/test-bucket", s3_base)).await; + // Pre-seed one duplicate emote name. + let existing_id = new_uuid(); + state + .db + .create_emote(&existing_id, "legacy_duplicate", None, "emoji/existing.png") + .await + .unwrap(); + + let app = build_router(state.clone()); + let resp = app + .oneshot( + Request::builder() + .method("POST") + .uri("/manage/import") + .header("authorization", auth_header()) + .header("content-type", "application/json") + .body(Body::from(format!( + "{{\"source_url\":\"{}/emoji/json/\"}}", + legacy_base + ))) + .unwrap(), + ) + .await + .unwrap(); + + assert_eq!(resp.status(), StatusCode::OK); + let json = response_json(resp.into_body()).await; + assert_eq!(json["total"], 2); + assert_eq!(json["imported"], 1); + assert_eq!(json["skipped"], 1); + assert_eq!(json["failed"], 0); + + let rows = state.db.list_emotes().await.unwrap(); + let imported = rows.iter().find(|r| r.name == "legacy_new").unwrap(); + let created = DateTime::parse_from_rfc3339(&imported.created) + .unwrap() + .with_timezone(&Utc) + .to_rfc3339(); + let modified = DateTime::parse_from_rfc3339(&imported.modified) + .unwrap() + .with_timezone(&Utc) + .to_rfc3339(); + assert_eq!(created, "2020-01-01T00:00:00+00:00"); + assert_eq!(modified, "2020-02-02T00:00:00+00:00"); + + server_handle.abort(); + s3_handle.abort(); +} + +#[tokio::test] +async fn manage_import_dry_run_does_not_persist() { + let (legacy_base, server_handle) = spawn_legacy_source_server().await; + let (s3_base, s3_handle) = spawn_mock_s3_server().await; + + let state = test_state_with_s3(s3_base.clone(), format!("{}/test-bucket", s3_base)).await; + // Pre-seed one duplicate so we can verify would_skip detection. + let existing_id = new_uuid(); + state + .db + .create_emote(&existing_id, "legacy_duplicate", None, "emoji/existing.png") + .await + .unwrap(); + + let app = build_router(state.clone()); + let body = format!( + "{{\"source_url\":\"{}/emoji/json/\",\"dry_run\":true}}", + legacy_base + ); + let resp = app + .oneshot( + Request::builder() + .method("POST") + .uri("/manage/import") + .header("authorization", auth_header()) + .header("content-type", "application/json") + .body(Body::from(body)) + .unwrap(), + ) + .await + .unwrap(); + + assert_eq!(resp.status(), StatusCode::OK); + let json = response_json(resp.into_body()).await; + assert_eq!(json["dry_run"], true); + assert_eq!(json["total"], 2); + assert_eq!(json["imported"], 1); + assert_eq!(json["skipped"], 1); + assert_eq!(json["failed"], 0); + + let results = json["results"].as_array().unwrap(); + let new_result = results.iter().find(|r| r["name"] == "legacy_new").unwrap(); + assert_eq!(new_result["status"], "would_import"); + let dup_result = results.iter().find(|r| r["name"] == "legacy_duplicate").unwrap(); + assert_eq!(dup_result["status"], "would_skip"); + + // Nothing new should have been written to the DB. + let rows = state.db.list_emotes().await.unwrap(); + assert_eq!(rows.len(), 1, "dry-run must not insert any rows"); + assert_eq!(rows[0].name, "legacy_duplicate"); + + server_handle.abort(); + s3_handle.abort(); +} + // ── POST /emotes input validation ───────────────────────────────────────────── #[tokio::test]