From 80a233d6b727ded1c85911d248b0ac2353e04b0e Mon Sep 17 00:00:00 2001 From: Reiner Herrmann Date: Fri, 9 Feb 2024 17:23:08 +0100 Subject: [PATCH] Preview URLs via upstream PR by Reiner Herrmann: !347 (4b54a8b6) Signed-off-by: rooot --- Cargo.lock | 225 ++++++++++++++++++++++++--- Cargo.toml | 2 + conduwuit-example.toml | 2 +- debian/postinst | 2 +- src/api/client_server/media.rs | 265 +++++++++++++++++++++++++++++++- src/config/mod.rs | 4 + src/database/key_value/media.rs | 105 ++++++++++++- src/database/mod.rs | 2 + src/main.rs | 1 + src/service/globals/mod.rs | 4 + src/service/media/data.rs | 17 ++ src/service/media/mod.rs | 60 +++++++- src/service/mod.rs | 7 +- 13 files changed, 670 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f653a2d0..79f57e23 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -102,7 +102,7 @@ checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.48", ] [[package]] @@ -234,7 +234,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn", + "syn 2.0.48", ] [[package]] @@ -367,7 +367,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn", + "syn 2.0.48", ] [[package]] @@ -438,6 +438,7 @@ dependencies = [ "tracing-opentelemetry", "tracing-subscriber", "trust-dns-resolver", + "webpage", ] [[package]] @@ -536,7 +537,7 @@ checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.48", ] [[package]] @@ -645,7 +646,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn", + "syn 2.0.48", ] [[package]] @@ -720,6 +721,16 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "futf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" +dependencies = [ + "mac", + "new_debug_unreachable", +] + [[package]] name = "futures-channel" version = "0.3.30" @@ -760,7 +771,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.48", ] [[package]] @@ -935,6 +946,20 @@ dependencies = [ "winapi", ] +[[package]] +name = "html5ever" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bea68cab48b8459f17cf1c944c67ddc572d272d9f2b274140f223ecb1da4a3b7" +dependencies = [ + "log", + "mac", + "markup5ever", + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "http" version = "0.2.11" @@ -1324,12 +1349,44 @@ dependencies = [ "libc", ] +[[package]] +name = "mac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" + [[package]] name = "maplit" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" +[[package]] +name = "markup5ever" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2629bb1404f3d34c2e921f21fd34ba00b206124c81f65c50b43b6aaefeb016" +dependencies = [ + "log", + "phf", + "phf_codegen", + "string_cache", + "string_cache_codegen", + "tendril", +] + +[[package]] +name = "markup5ever_rcdom" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9521dd6750f8e80ee6c53d65e2e4656d7de37064f3a7a5d2d11d05df93839c2" +dependencies = [ + "html5ever", + "markup5ever", + "tendril", + "xml5ever", +] + [[package]] name = "match_cfg" version = "0.1.0" @@ -1390,6 +1447,12 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "new_debug_unreachable" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54" + [[package]] name = "nix" version = "0.27.1" @@ -1681,7 +1744,7 @@ dependencies = [ "proc-macro2", "proc-macro2-diagnostics", "quote", - "syn", + "syn 2.0.48", ] [[package]] @@ -1706,6 +1769,44 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +[[package]] +name = "phf" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_codegen" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" +dependencies = [ + "phf_shared", + "rand", +] + +[[package]] +name = "phf_shared" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" +dependencies = [ + "siphasher", +] + [[package]] name = "pin-project" version = "1.1.4" @@ -1723,7 +1824,7 @@ checksum = "266c042b60c9c76b8d53061e52b2e0d1116abc57cefc8c5cd671619a56ac3690" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.48", ] [[package]] @@ -1785,6 +1886,12 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + [[package]] name = "prettyplease" version = "0.2.16" @@ -1792,7 +1899,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a41cf62165e97c7f814d2221421dbb9afcbcdb0a88068e5ea206e19951c2cbb5" dependencies = [ "proc-macro2", - "syn", + "syn 2.0.48", ] [[package]] @@ -1822,7 +1929,7 @@ checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.48", "version_check", "yansi", ] @@ -2154,7 +2261,7 @@ dependencies = [ "quote", "ruma-identifiers-validation", "serde", - "syn", + "syn 2.0.48", "toml", ] @@ -2367,7 +2474,7 @@ checksum = "33c85360c95e7d137454dc81d9a4ed2b8efd8fbe19cee57357b32b9771fccb67" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.48", ] [[package]] @@ -2522,6 +2629,12 @@ dependencies = [ "time", ] +[[package]] +name = "siphasher" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" + [[package]] name = "slab" version = "0.4.9" @@ -2563,6 +2676,32 @@ dependencies = [ "der", ] +[[package]] +name = "string_cache" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b" +dependencies = [ + "new_debug_unreachable", + "once_cell", + "parking_lot", + "phf_shared", + "precomputed-hash", + "serde", +] + +[[package]] +name = "string_cache_codegen" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bb30289b722be4ff74a408c3cc27edeaad656e06cb1fe8fa9231fa59c728988" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", +] + [[package]] name = "subslice" version = "0.2.3" @@ -2578,6 +2717,17 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "syn" version = "2.0.48" @@ -2616,6 +2766,17 @@ dependencies = [ "libc", ] +[[package]] +name = "tendril" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0" +dependencies = [ + "futf", + "mac", + "utf-8", +] + [[package]] name = "thiserror" version = "1.0.56" @@ -2633,7 +2794,7 @@ checksum = "fa0faa943b50f3db30a20aa7e265dbc66076993efed8463e8de414e5d06d3471" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.48", ] [[package]] @@ -2769,7 +2930,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.48", ] [[package]] @@ -2923,7 +3084,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.48", ] [[package]] @@ -3126,6 +3287,12 @@ version = "2.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + [[package]] name = "uuid" version = "1.7.0" @@ -3189,7 +3356,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn", + "syn 2.0.48", "wasm-bindgen-shared", ] @@ -3223,7 +3390,7 @@ checksum = "bae1abb6806dc1ad9e560ed242107c0f6c84335f1749dd4e8ddb012ebd5e25a7" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.48", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3254,6 +3421,17 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "webpage" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8598785beeb5af95abe95e7bb20c7e747d1188347080d6811d5a56d2b9a5f368" +dependencies = [ + "html5ever", + "markup5ever_rcdom", + "serde_json", +] + [[package]] name = "weezl" version = "0.1.8" @@ -3445,6 +3623,17 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "xml5ever" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4034e1d05af98b51ad7214527730626f019682d797ba38b51689212118d8e650" +dependencies = [ + "log", + "mac", + "markup5ever", +] + [[package]] name = "yansi" version = "1.0.0-rc.1" @@ -3468,7 +3657,7 @@ checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.48", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 41a5a8b5..8fc01f21 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -103,6 +103,8 @@ ipaddress = "0.1.3" sd-notify = { version = "0.4.1", optional = true } +webpage = { version = "1.6", default-features = false } + [target.'cfg(unix)'.dependencies] nix = { version = "0.27.1", features = ["resource"] } diff --git a/conduwuit-example.toml b/conduwuit-example.toml index 2381ddb1..2653ea29 100644 --- a/conduwuit-example.toml +++ b/conduwuit-example.toml @@ -155,7 +155,7 @@ allow_public_room_directory_without_auth = false # If federation is disabled entirely (`allow_federation`), this is inherently false. For privacy, this is best disabled. allow_device_name_federation = false - +url_preview_allowlist = [] ### Misc diff --git a/debian/postinst b/debian/postinst index 630e4432..aa3a8cec 100644 --- a/debian/postinst +++ b/debian/postinst @@ -192,7 +192,7 @@ allow_public_room_directory_without_auth = false # If federation is disabled entirely (`allow_federation`), this is inherently false. For privacy, this is best disabled. allow_device_name_federation = false - +url_preview_allowlist = [] ### Misc diff --git a/src/api/client_server/media.rs b/src/api/client_server/media.rs index e36d700d..4c408ffc 100644 --- a/src/api/client_server/media.rs +++ b/src/api/client_server/media.rs @@ -1,13 +1,21 @@ use std::time::Duration; -use crate::{service::media::FileMeta, services, utils, Error, Result, Ruma}; +use crate::{service::media::{FileMeta, UrlPreviewData}, services, utils, Error, Result, Ruma}; use ruma::api::client::{ error::ErrorKind, media::{ create_content, get_content, get_content_as_filename, get_content_thumbnail, - get_media_config, + get_media_config, get_media_preview }, }; + +use { + webpage::HTML, + reqwest::Url, + std::{io::Cursor, net::IpAddr, sync::Arc}, + image::io::Reader as ImgReader, +}; + use tracing::info; /// generated MXC ID (`media-id`) length @@ -24,6 +32,259 @@ pub async fn get_media_config_route( }) } +async fn download_image( + client: &reqwest::Client, + url: &str, +) -> Result { + let image = client.get(url).send().await?.bytes().await?; + let mxc = format!( + "mxc://{}/{}", + services().globals.server_name(), + utils::random_string(MXC_LENGTH) + ); + services().media + .create(mxc.clone(), None, None, &image) + .await?; + + let (width, height) = match ImgReader::new(Cursor::new(&image)).with_guessed_format() { + Err(_) => (None, None), + Ok(reader) => match reader.into_dimensions() { + Err(_) => (None, None), + Ok((width, height)) => (Some(width), Some(height)), + }, + }; + + Ok(UrlPreviewData { + image: Some(mxc), + image_size: Some(image.len()), + image_width: width, + image_height: height, + ..Default::default() + }) +} + +async fn download_html( + client: &reqwest::Client, + url: &str, +) -> Result { + let max_download_size = 300_000; + + let mut response = client.get(url).send().await?; + + let mut bytes: Vec = Vec::new(); + while let Some(chunk) = response.chunk().await? { + bytes.extend_from_slice(&chunk); + if bytes.len() > max_download_size { + break; + } + } + let body = String::from_utf8_lossy(&bytes); + let html = match HTML::from_string(body.to_string(), Some(url.to_owned())) { + Ok(html) => html, + Err(_) => { + return Err(Error::BadRequest( + ErrorKind::Unknown, + "Failed to parse HTML", + )) + } + }; + + let mut data = match html.opengraph.images.first() { + None => UrlPreviewData::default(), + Some(obj) => download_image(client, &obj.url).await?, + }; + + let props = html.opengraph.properties; + /* use OpenGraph title/description, but fall back to HTML if not available */ + data.title = props.get("title").cloned().or(html.title); + data.description = props.get("description").cloned().or(html.description); + Ok(data) +} + +fn url_request_allowed(addr: &IpAddr) -> bool { + // could be implemented with reqwest when it supports IP filtering: + // https://github.com/seanmonstar/reqwest/issues/1515 + + // These checks have been taken from the Rust core/net/ipaddr.rs crate, + // IpAddr::V4.is_global() and IpAddr::V6.is_global(), as .is_global is not + // yet stabilized. TODO: Once this is stable, this match can be simplified. + match addr { + IpAddr::V4(ip4) => { + !(ip4.octets()[0] == 0 // "This network" + || ip4.is_private() + || (ip4.octets()[0] == 100 && (ip4.octets()[1] & 0b1100_0000 == 0b0100_0000)) // is_shared() + || ip4.is_loopback() + || ip4.is_link_local() + // addresses reserved for future protocols (`192.0.0.0/24`) + || (ip4.octets()[0] == 192 && ip4.octets()[1] == 0 && ip4.octets()[2] == 0) + || ip4.is_documentation() + || (ip4.octets()[0] == 198 && (ip4.octets()[1] & 0xfe) == 18) // is_benchmarking() + || (ip4.octets()[0] & 240 == 240 && !ip4.is_broadcast()) // is_reserved() + || ip4.is_broadcast()) + } + IpAddr::V6(ip6) => { + !(ip6.is_unspecified() + || ip6.is_loopback() + // IPv4-mapped Address (`::ffff:0:0/96`) + || matches!(ip6.segments(), [0, 0, 0, 0, 0, 0xffff, _, _]) + // IPv4-IPv6 Translat. (`64:ff9b:1::/48`) + || matches!(ip6.segments(), [0x64, 0xff9b, 1, _, _, _, _, _]) + // Discard-Only Address Block (`100::/64`) + || matches!(ip6.segments(), [0x100, 0, 0, 0, _, _, _, _]) + // IETF Protocol Assignments (`2001::/23`) + || (matches!(ip6.segments(), [0x2001, b, _, _, _, _, _, _] if b < 0x200) + && !( + // Port Control Protocol Anycast (`2001:1::1`) + u128::from_be_bytes(ip6.octets()) == 0x2001_0001_0000_0000_0000_0000_0000_0001 + // Traversal Using Relays around NAT Anycast (`2001:1::2`) + || u128::from_be_bytes(ip6.octets()) == 0x2001_0001_0000_0000_0000_0000_0000_0002 + // AMT (`2001:3::/32`) + || matches!(ip6.segments(), [0x2001, 3, _, _, _, _, _, _]) + // AS112-v6 (`2001:4:112::/48`) + || matches!(ip6.segments(), [0x2001, 4, 0x112, _, _, _, _, _]) + // ORCHIDv2 (`2001:20::/28`) + || matches!(ip6.segments(), [0x2001, b, _, _, _, _, _, _] if b >= 0x20 && b <= 0x2F) + )) + || ((ip6.segments()[0] == 0x2001) && (ip6.segments()[1] == 0xdb8)) // is_documentation() + || ((ip6.segments()[0] & 0xfe00) == 0xfc00) // is_unique_local() + || ((ip6.segments()[0] & 0xffc0) == 0xfe80)) // is_unicast_link_local + } + } +} + +async fn request_url_preview(url: &str) -> Result { + let client = services().globals.default_client(); + let response = client.head(url).send().await?; + + if !response + .remote_addr() + .map_or(false, |a| url_request_allowed(&a.ip())) + { + return Err(Error::BadRequest( + ErrorKind::Forbidden, + "Requesting from this address forbidden", + )); + } + + let content_type = match response + .headers() + .get(reqwest::header::CONTENT_TYPE) + .and_then(|x| x.to_str().ok()) + { + Some(ct) => ct, + None => { + return Err(Error::BadRequest( + ErrorKind::Unknown, + "Unknown Content-Type", + )) + } + }; + let data = match content_type { + html if html.starts_with("text/html") => download_html(&client, url).await?, + img if img.starts_with("image/") => download_image(&client, url).await?, + _ => { + return Err(Error::BadRequest( + ErrorKind::Unknown, + "Unsupported Content-Type", + )) + } + }; + + services().media.set_url_preview(url, &data).await?; + + Ok(data) +} + +async fn get_url_preview(url: &str) -> Result { + if let Some(preview) = services().media.get_url_preview(url).await { + return Ok(preview); + } + + // ensure that only one request is made per URL + let mutex_request = Arc::clone( + services() + .media + .url_preview_mutex + .write() + .unwrap() + .entry(url.to_owned()) + .or_default(), + ); + let _request_lock = mutex_request.lock().await; + + match services().media.get_url_preview(url).await { + Some(preview) => Ok(preview), + None => request_url_preview(url).await + } +} + +fn url_preview_allowed(url_str: &str) -> bool { + const DEFAULT_ALLOWLIST: &[&str] = &[ + "matrix.org", + "mastodon.social", + "youtube.com", + "wikipedia.org", + ]; + + let url = match Url::parse(url_str) { + Ok(u) => u, + Err(_) => return false, + }; + if ["http", "https"].iter().all(|&scheme| scheme != url.scheme().to_lowercase()) { + return false; + } + let mut host = match url.host_str() { + None => return false, + Some(h) => h.to_lowercase(), + }; + + let allowlist = services().globals.url_preview_allowlist(); + if allowlist.contains(&"*".to_owned()) { + return true; + } + while !host.is_empty() { + if allowlist.contains(&host) { + return true; + } + if allowlist.contains(&"default".to_owned()) && DEFAULT_ALLOWLIST.contains(&host.as_str()) { + return true; + } + /* also check higher level domains, so that e.g. `en.m.wikipedia.org` is matched by `wikipedia.org` on allowlist. */ + host = match host.split_once('.') { + None => return false, + Some((_, domain)) => domain.to_owned(), + } + } + false +} + +/// # `GET /_matrix/media/r0/preview_url` +/// +/// Returns URL preview. +pub async fn get_media_preview_route( + body: Ruma, +) -> Result { + let url = &body.url; + if !url_preview_allowed(url) { + return Err(Error::BadRequest( + ErrorKind::Forbidden, + "Previewing URL not allowed", + )); + } + + if let Ok(preview) = get_url_preview(url).await { + let res = serde_json::value::to_raw_value(&preview).expect("Converting to JSON failed"); + return Ok(get_media_preview::v3::Response::from_raw_value(res)); + } + + Err(Error::BadRequest( + ErrorKind::LimitExceeded { + retry_after_ms: Some(Duration::from_secs(5)), + }, + "Retry later", + )) +} + /// # `POST /_matrix/media/v3/upload` /// /// Permanently save media in the server. diff --git a/src/config/mod.rs b/src/config/mod.rs index d5cecfe2..2e476d42 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -132,6 +132,9 @@ pub struct Config { #[serde(default = "default_ip_range_denylist")] pub ip_range_denylist: Vec, + #[serde(default = "Vec::new")] + pub url_preview_allowlist: Vec, + #[serde(flatten)] pub catchall: BTreeMap, } @@ -319,6 +322,7 @@ impl fmt::Display for Config { } &lst.join(", ") }), + ("URL preview allowlist", &self.url_preview_allowlist.join(", ")), ]; let mut msg: String = "Active config values:\n\n".to_owned(); diff --git a/src/database/key_value/media.rs b/src/database/key_value/media.rs index 6abe5ba5..6d05a9f4 100644 --- a/src/database/key_value/media.rs +++ b/src/database/key_value/media.rs @@ -1,6 +1,6 @@ use ruma::api::client::error::ErrorKind; -use crate::{database::KeyValueDatabase, service, utils, Error, Result}; +use crate::{database::KeyValueDatabase, service::{self, media::UrlPreviewData}, utils, Error, Result}; impl service::media::Data for KeyValueDatabase { fn create_file_metadata( @@ -79,4 +79,107 @@ impl service::media::Data for KeyValueDatabase { }; Ok((content_disposition, content_type, key)) } + + fn remove_url_preview(&self, url: &str) -> Result<()> { + self.url_previews.remove(url.as_bytes()) + } + + fn set_url_preview(&self, url: &str, data: &UrlPreviewData, timestamp: std::time::Duration) -> Result<()> { + let mut value = Vec::::new(); + value.extend_from_slice(×tamp.as_secs().to_be_bytes()); + value.push(0xff); + value.extend_from_slice( + data.title + .as_ref() + .map(|t| t.as_bytes()) + .unwrap_or_default(), + ); + value.push(0xff); + value.extend_from_slice( + data.description + .as_ref() + .map(|d| d.as_bytes()) + .unwrap_or_default(), + ); + value.push(0xff); + value.extend_from_slice( + data.image + .as_ref() + .map(|i| i.as_bytes()) + .unwrap_or_default(), + ); + value.push(0xff); + value.extend_from_slice(&data.image_size.unwrap_or(0).to_be_bytes()); + value.push(0xff); + value.extend_from_slice(&data.image_width.unwrap_or(0).to_be_bytes()); + value.push(0xff); + value.extend_from_slice(&data.image_height.unwrap_or(0).to_be_bytes()); + + self.url_previews.insert(url.as_bytes(), &value) + } + + fn get_url_preview(&self, url: &str) -> Option { + let values = self.url_previews.get(url.as_bytes()).ok()??; + + let mut values = values.split(|&b| b == 0xff); + + let _ts = match values + .next() + .map(|b| u64::from_be_bytes(b.try_into().expect("valid BE array"))) + { + Some(0) => None, + x => x, + }; + let title = match values + .next() + .and_then(|b| String::from_utf8(b.to_vec()).ok()) + { + Some(s) if s.is_empty() => None, + x => x, + }; + let description = match values + .next() + .and_then(|b| String::from_utf8(b.to_vec()).ok()) + { + Some(s) if s.is_empty() => None, + x => x, + }; + let image = match values + .next() + .and_then(|b| String::from_utf8(b.to_vec()).ok()) + { + Some(s) if s.is_empty() => None, + x => x, + }; + let image_size = match values + .next() + .map(|b| usize::from_be_bytes(b.try_into().expect("valid BE array"))) + { + Some(0) => None, + x => x, + }; + let image_width = match values + .next() + .map(|b| u32::from_be_bytes(b.try_into().expect("valid BE array"))) + { + Some(0) => None, + x => x, + }; + let image_height = match values + .next() + .map(|b| u32::from_be_bytes(b.try_into().expect("valid BE array"))) + { + Some(0) => None, + x => x, + }; + + Some(UrlPreviewData { + title, + description, + image, + image_size, + image_width, + image_height, + }) + } } diff --git a/src/database/mod.rs b/src/database/mod.rs index 739f5b20..c0d64b93 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -145,6 +145,7 @@ pub struct KeyValueDatabase { //pub media: media::Media, pub(super) mediaid_file: Arc, // MediaId = MXC + WidthHeight + ContentDisposition + ContentType + pub(super) url_previews: Arc, //pub key_backups: key_backups::KeyBackups, pub(super) backupid_algorithm: Arc, // BackupId = UserId + Version(Count) pub(super) backupid_etag: Arc, // BackupId = UserId + Version(Count) @@ -348,6 +349,7 @@ impl KeyValueDatabase { roomuserdataid_accountdata: builder.open_tree("roomuserdataid_accountdata")?, roomusertype_roomuserdataid: builder.open_tree("roomusertype_roomuserdataid")?, mediaid_file: builder.open_tree("mediaid_file")?, + url_previews: builder.open_tree("url_previews")?, backupid_algorithm: builder.open_tree("backupid_algorithm")?, backupid_etag: builder.open_tree("backupid_etag")?, backupkeyid_backup: builder.open_tree("backupkeyid_backup")?, diff --git a/src/main.rs b/src/main.rs index 38339206..a30e7eb9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -461,6 +461,7 @@ fn routes() -> Router { .ruma_route(client_server::turn_server_route) .ruma_route(client_server::send_event_to_device_route) .ruma_route(client_server::get_media_config_route) + .ruma_route(client_server::get_media_preview_route) .ruma_route(client_server::create_content_route) .ruma_route(client_server::get_content_route) .ruma_route(client_server::get_content_as_filename_route) diff --git a/src/service/globals/mod.rs b/src/service/globals/mod.rs index 44a22166..3f35c4e9 100644 --- a/src/service/globals/mod.rs +++ b/src/service/globals/mod.rs @@ -329,6 +329,10 @@ impl Service<'_> { self.config.allow_device_name_federation } + pub fn url_preview_allowlist(&self) -> &Vec { + &self.config.url_preview_allowlist + } + pub fn allow_room_creation(&self) -> bool { self.config.allow_room_creation } diff --git a/src/service/media/data.rs b/src/service/media/data.rs index 75a682cb..6e13cad0 100644 --- a/src/service/media/data.rs +++ b/src/service/media/data.rs @@ -17,4 +17,21 @@ pub trait Data: Send + Sync { width: u32, height: u32, ) -> Result<(Option, Option, Vec)>; + + fn remove_url_preview( + &self, + url: &str + ) -> Result<()>; + + fn set_url_preview( + &self, + url: &str, + data: &super::UrlPreviewData, + timestamp: std::time::Duration, + ) -> Result<()>; + + fn get_url_preview( + &self, + url: &str + ) -> Option; } diff --git a/src/service/media/mod.rs b/src/service/media/mod.rs index 4a016bda..6612f36c 100644 --- a/src/service/media/mod.rs +++ b/src/service/media/mod.rs @@ -1,5 +1,10 @@ mod data; -use std::io::Cursor; +use std::{ + io::Cursor, + collections::HashMap, + sync::{Arc, RwLock}, + time::SystemTime, +}; pub(crate) use data::Data; @@ -9,7 +14,9 @@ use image::imageops::FilterType; use tokio::{ fs::File, io::{AsyncReadExt, AsyncWriteExt, BufReader}, + sync::Mutex, }; +use serde::Serialize; pub struct FileMeta { pub content_disposition: Option, @@ -17,8 +24,43 @@ pub struct FileMeta { pub file: Vec, } +#[derive(Serialize, Default)] +pub struct UrlPreviewData { + #[serde( + skip_serializing_if = "Option::is_none", + rename(serialize = "og:title") + )] + pub title: Option, + #[serde( + skip_serializing_if = "Option::is_none", + rename(serialize = "og:description") + )] + pub description: Option, + #[serde( + skip_serializing_if = "Option::is_none", + rename(serialize = "og:image") + )] + pub image: Option, + #[serde( + skip_serializing_if = "Option::is_none", + rename(serialize = "matrix:image:size") + )] + pub image_size: Option, + #[serde( + skip_serializing_if = "Option::is_none", + rename(serialize = "og:image:width") + )] + pub image_width: Option, + #[serde( + skip_serializing_if = "Option::is_none", + rename(serialize = "og:image:height") + )] + pub image_height: Option, +} + pub struct Service { pub db: &'static dyn Data, + pub url_preview_mutex: RwLock>>>, } impl Service { @@ -260,6 +302,22 @@ impl Service { Ok(None) } } + + pub async fn get_url_preview(&self, url: &str) -> Option { + self.db.get_url_preview(url) + } + + pub async fn remove_url_preview(&self, url: &str) -> Result<()> { + // TODO: also remove the downloaded image + self.db.remove_url_preview(url) + } + + pub async fn set_url_preview(&self, url: &str, data: &UrlPreviewData) -> Result<()> { + let now = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .expect("valid system time"); + self.db.set_url_preview(url, data, now) + } } #[cfg(test)] diff --git a/src/service/mod.rs b/src/service/mod.rs index 0597c211..1902fa8c 100644 --- a/src/service/mod.rs +++ b/src/service/mod.rs @@ -1,6 +1,6 @@ use std::{ collections::{BTreeMap, HashMap}, - sync::{Arc, Mutex}, + sync::{Arc, Mutex, RwLock}, }; use lru_cache::LruCache; @@ -114,7 +114,10 @@ impl Services<'_> { account_data: account_data::Service { db }, admin: admin::Service::build(), key_backups: key_backups::Service { db }, - media: media::Service { db }, + media: media::Service { + db, + url_preview_mutex: RwLock::new(HashMap::new()), + }, sending: sending::Service::build(db, &config), globals: globals::Service::load(db, config)?,