Compare commits

...

11 commits

Author SHA1 Message Date
Jacob Taylor
676bfffd1d more funny settings (part 3 of 12)
Some checks failed
Checks / Prefligit / prefligit (push) Failing after 2s
Release Docker Image / define-variables (push) Failing after 3s
Release Docker Image / build-image (linux/amd64, release, linux-amd64, base) (push) Has been skipped
Release Docker Image / build-image (linux/arm64, release, linux-arm64, base) (push) Has been skipped
Release Docker Image / merge (push) Has been skipped
Checks / Rust / Format (push) Failing after 2s
Checks / Rust / Clippy (push) Failing after 38s
Checks / Rust / Cargo Test (push) Failing after 40s
2025-07-03 13:14:22 -07:00
Jacob Taylor
2b5bbbb638 sender_workers scaling. this time, with feeling! 2025-07-03 13:14:22 -07:00
Jacob Taylor
2e4d4d09c8 vehicle loan documentation now available at window 7
also print event id
2025-07-03 13:14:22 -07:00
Jacob Taylor
1186eace4d lock the getter instead ??? c/o M 2025-07-03 13:14:22 -07:00
Jacob Taylor
9093ee2485 make fetching key room events less smart 2025-07-03 13:14:22 -07:00
Jacob Taylor
7923e05982 change rocksdb stats level to 3
scale rocksdb background jobs and subcompactions

change rocksdb default error level to info from error

delete unused num_threads function

fix warns from cargo
2025-07-03 13:14:22 -07:00
nexy7574
879a59a1e4 modify more log strings so they're more useful than not 2025-07-03 13:14:22 -07:00
nexy7574
864704a536 When in doubt, log all the things 2025-07-03 13:14:22 -07:00
nexy7574
17fb69e3c2 log which room struggled to get mainline depth 2025-07-03 13:13:32 -07:00
nexy7574
feeb418e81 more logs 2025-07-03 13:13:32 -07:00
nexy7574
6e7390a9b7 Unsafe, untested, and potentially overeager PDU sanity checks 2025-07-03 13:13:32 -07:00
10 changed files with 3951 additions and 119 deletions

View file

@ -1001,7 +1001,7 @@
# 3 to 5 = Statistics with possible performance impact.
# 6 = All statistics.
#
#rocksdb_stats_level = 1
#rocksdb_stats_level = 3
# This is a password that can be configured that will let you login to the
# server bot account (currently `@conduit`) for emergency troubleshooting

2765
src/api/client/membership.rs Normal file

File diff suppressed because it is too large Load diff

View file

@ -1171,7 +1171,7 @@ pub struct Config {
/// 3 to 5 = Statistics with possible performance impact.
/// 6 = All statistics.
///
/// default: 1
/// default: 3
#[serde(default = "default_rocksdb_stats_level")]
pub rocksdb_stats_level: u8,
@ -1839,9 +1839,9 @@ pub struct Config {
pub stream_amplification: usize,
/// Number of sender task workers; determines sender parallelism. Default is
/// '4'. Override by setting a different value. Values clamped 1 to core count.
/// core count. Override by setting a different value.
///
/// default: 4
/// default: core count
#[serde(default = "default_sender_workers")]
pub sender_workers: usize,
@ -2075,40 +2075,41 @@ fn default_db_write_buffer_capacity_mb() -> f64 { 48.0 + parallelism_scaled_f64(
fn default_db_cache_capacity_mb() -> f64 { 512.0 + parallelism_scaled_f64(512.0) }
fn default_pdu_cache_capacity() -> u32 { parallelism_scaled_u32(50_000).saturating_add(500_000) }
fn default_pdu_cache_capacity() -> u32 { parallelism_scaled_u32(50_000).saturating_add(100_000) }
fn default_cache_capacity_modifier() -> f64 { 1.0 }
fn default_auth_chain_cache_capacity() -> u32 {
parallelism_scaled_u32(50_000).saturating_add(500_000)
parallelism_scaled_u32(50_000).saturating_add(100_000)
}
fn default_shorteventid_cache_capacity() -> u32 {
parallelism_scaled_u32(100_000).saturating_add(500_000)
parallelism_scaled_u32(100_000).saturating_add(100_000)
}
fn default_eventidshort_cache_capacity() -> u32 {
parallelism_scaled_u32(100_000).saturating_add(500_000)
parallelism_scaled_u32(50_000).saturating_add(100_000)
}
fn default_eventid_pdu_cache_capacity() -> u32 {
parallelism_scaled_u32(50_000).saturating_add(500_000)
parallelism_scaled_u32(50_000).saturating_add(100_000)
}
fn default_shortstatekey_cache_capacity() -> u32 {
parallelism_scaled_u32(50_000).saturating_add(500_000)
parallelism_scaled_u32(50_000).saturating_add(100_000)
}
fn default_statekeyshort_cache_capacity() -> u32 {
parallelism_scaled_u32(50_000).saturating_add(500_000)
parallelism_scaled_u32(50_000).saturating_add(100_000)
}
fn default_servernameevent_data_cache_capacity() -> u32 {
parallelism_scaled_u32(200_000).saturating_add(500_000)
parallelism_scaled_u32(100_000).saturating_add(100_000)
}
fn default_stateinfo_cache_capacity() -> u32 {
parallelism_scaled_u32(500).clamp(100, 12000) }
parallelism_scaled_u32(500).clamp(100, 12000)
}
fn default_roomid_spacehierarchy_cache_capacity() -> u32 {
parallelism_scaled_u32(500).clamp(100, 12000) }
@ -2215,7 +2216,7 @@ fn default_typing_client_timeout_max_s() -> u64 { 45 }
fn default_rocksdb_recovery_mode() -> u8 { 1 }
fn default_rocksdb_log_level() -> String { "error".to_owned() }
fn default_rocksdb_log_level() -> String { "info".to_owned() }
fn default_rocksdb_log_time_to_roll() -> usize { 0 }
@ -2247,7 +2248,7 @@ fn default_rocksdb_compression_level() -> i32 { 32767 }
#[allow(clippy::doc_markdown)]
fn default_rocksdb_bottommost_compression_level() -> i32 { 32767 }
fn default_rocksdb_stats_level() -> u8 { 1 }
fn default_rocksdb_stats_level() -> u8 { 3 }
// I know, it's a great name
#[must_use]
@ -2328,7 +2329,7 @@ fn default_stream_width_scale() -> f32 { 1.0 }
fn default_stream_amplification() -> usize { 1024 }
fn default_sender_workers() -> usize { 4 }
fn default_sender_workers() -> usize { parallelism_scaled(1) }
fn default_client_receive_timeout() -> u64 { 75 }

View file

@ -13,6 +13,7 @@ use ruma::{
power_levels::RoomPowerLevelsEventContent,
third_party_invite::RoomThirdPartyInviteEventContent,
},
EventId,
int,
serde::{Base64, Raw},
};
@ -21,7 +22,6 @@ use serde::{
de::{Error as _, IgnoredAny},
};
use serde_json::{from_str as from_json_str, value::RawValue as RawJsonValue};
use super::{
Error, Event, Result, StateEventType, StateKey, TimelineEventType,
power_levels::{
@ -242,16 +242,27 @@ where
}
*/
let (room_create_event, power_levels_event, sender_member_event) = join3(
fetch_state(&StateEventType::RoomCreate, ""),
fetch_state(&StateEventType::RoomPowerLevels, ""),
fetch_state(&StateEventType::RoomMember, sender.as_str()),
)
.await;
// let (room_create_event, power_levels_event, sender_member_event) = join3(
// fetch_state(&StateEventType::RoomCreate, ""),
// fetch_state(&StateEventType::RoomPowerLevels, ""),
// fetch_state(&StateEventType::RoomMember, sender.as_str()),
// )
// .await;
let room_create_event = fetch_state(&StateEventType::RoomCreate, "").await;
let power_levels_event = fetch_state(&StateEventType::RoomPowerLevels, "").await;
let sender_member_event = fetch_state(&StateEventType::RoomMember, sender.as_str()).await;
let room_create_event = match room_create_event {
| None => {
error!("no m.room.create event found for {}!", incoming_event.event_id());
error!(
create_event = room_create_event.as_ref().map(Event::event_id).unwrap_or(<&EventId>::try_from("$unknown").unwrap()).as_str(),
power_levels = power_levels_event.as_ref().map(Event::event_id).unwrap_or(<&EventId>::try_from("$unknown").unwrap()).as_str(),
member_event = sender_member_event.as_ref().map(Event::event_id).unwrap_or(<&EventId>::try_from("$unknown").unwrap()).as_str(),
"no m.room.create event found for {} ({})!",
incoming_event.event_id().as_str(),
incoming_event.room_id().as_str()
);
return Ok(false);
},
| Some(e) => e,
@ -264,9 +275,11 @@ where
if room_id_server_name != room_create_event.sender().server_name() {
warn!(
"servername of room ID origin ({}) does not match servername of m.room.create sender ({})",
"servername of room ID origin ({}) does not match servername of m.room.create \
sender ({})",
room_id_server_name,
room_create_event.sender().server_name());
room_create_event.sender().server_name()
);
return Ok(false);
}

View file

@ -616,7 +616,7 @@ where
.map(ToOwned::to_owned),
)
};
debug!("running auth check on {:?}", event.event_id());
let auth_result =
auth_check(room_version, &event, current_third_party, fetch_state).await;
@ -733,8 +733,12 @@ where
Fut: Future<Output = Option<E>> + Send,
E: Event + Send + Sync,
{
let mut room_id = None;
while let Some(sort_ev) = event {
debug!(event_id = sort_ev.event_id().as_str(), "mainline");
trace!(event_id = sort_ev.event_id().as_str(), "mainline");
if room_id.is_none() {
room_id = Some(sort_ev.room_id().to_owned());
}
let id = sort_ev.event_id();
if let Some(depth) = mainline_map.get(id) {
@ -753,7 +757,7 @@ where
}
}
}
warn!("could not find a power event in the mainline map, defaulting to zero depth");
warn!("could not find a power event in the mainline map for {room_id:?}, defaulting to zero depth");
Ok(0)
}

View file

@ -1,8 +1,6 @@
use std::{cmp, convert::TryFrom};
use conduwuit::{Config, Result, utils};
use conduwuit::{Config, Result};
use rocksdb::{Cache, DBRecoveryMode, Env, LogLevel, Options, statistics::StatsLevel};
use conduwuit::config::{parallelism_scaled_i32, parallelism_scaled_u32};
use super::{cf_opts::cache_size_f64, logger::handle as handle_log};
/// Create database-wide options suitable for opening the database. This also
@ -23,8 +21,8 @@ pub(crate) fn db_options(config: &Config, env: &Env, row_cache: &Cache) -> Resul
set_logging_defaults(&mut opts, config);
// Processing
opts.set_max_background_jobs(num_threads::<i32>(config)?);
opts.set_max_subcompactions(num_threads::<u32>(config)?);
opts.set_max_background_jobs(parallelism_scaled_i32(1));
opts.set_max_subcompactions(parallelism_scaled_u32(1));
opts.set_avoid_unnecessary_blocking_io(true);
opts.set_max_file_opening_threads(0);
@ -126,15 +124,3 @@ fn set_logging_defaults(opts: &mut Options, config: &Config) {
opts.set_callback_logger(rocksdb_log_level, &handle_log);
}
}
fn num_threads<T: TryFrom<usize>>(config: &Config) -> Result<T> {
const MIN_PARALLELISM: usize = 2;
let requested = if config.rocksdb_parallelism_threads != 0 {
config.rocksdb_parallelism_threads
} else {
utils::available_parallelism()
};
utils::math::try_into::<T, usize>(cmp::max(MIN_PARALLELISM, requested))
}

View file

@ -80,7 +80,7 @@ where
// 5. Reject "due to auth events" if can't get all the auth events or some of
// the auth events are also rejected "due to auth events"
// NOTE: Step 5 is not applied anymore because it failed too often
debug!("Fetching auth events");
debug!("Fetching auth events for {}", incoming_pdu.event_id);
Box::pin(self.fetch_and_handle_outliers(
origin,
pdu_event.auth_events(),
@ -92,12 +92,12 @@ where
// 6. Reject "due to auth events" if the event doesn't pass auth based on the
// auth events
debug!("Checking based on auth events");
debug!("Checking {} based on auth events", incoming_pdu.event_id);
// Build map of auth events
let mut auth_events = HashMap::with_capacity(pdu_event.auth_events().count());
for id in pdu_event.auth_events() {
let Ok(auth_event) = self.services.timeline.get_pdu(id).await else {
warn!("Could not find auth event {id}");
warn!("Could not find auth event {id} for {}", incoming_pdu.event_id);
continue;
};
@ -131,6 +131,7 @@ where
ready(auth_events.get(&key).map(ToOwned::to_owned))
};
debug!("running auth check to handle outlier pdu {:?}", incoming_pdu.event_id);
let auth_check = state_res::event_auth::auth_check(
&to_room_version(&room_version_id),
&pdu_event,

View file

@ -1,12 +1,6 @@
use std::{borrow::Borrow, collections::BTreeMap, iter::once, sync::Arc, time::Instant};
use conduwuit::{
Err, Result, debug, debug_info, err, implement, is_equal_to,
matrix::{Event, EventTypeExt, PduEvent, StateKey, state_res},
trace,
utils::stream::{BroadbandExt, ReadyExt},
warn,
};
use conduwuit::{Err, Result, debug, debug_info, err, implement, matrix::{EventTypeExt, PduEvent, StateKey, state_res}, trace, utils::stream::{BroadbandExt, ReadyExt}, warn, info};
use futures::{FutureExt, StreamExt, future::ready};
use ruma::{CanonicalJsonValue, RoomId, ServerName, events::StateEventType};
@ -17,22 +11,19 @@ use crate::rooms::{
};
#[implement(super::Service)]
pub(super) async fn upgrade_outlier_to_timeline_pdu<Pdu>(
pub(super) async fn upgrade_outlier_to_timeline_pdu(
&self,
incoming_pdu: PduEvent,
val: BTreeMap<String, CanonicalJsonValue>,
create_event: &Pdu,
create_event: &PduEvent,
origin: &ServerName,
room_id: &RoomId,
) -> Result<Option<RawPduId>>
where
Pdu: Event + Send + Sync,
{
) -> Result<Option<RawPduId>> {
// Skip the PDU if we already have it as a timeline event
if let Ok(pduid) = self
.services
.timeline
.get_pdu_id(incoming_pdu.event_id())
.get_pdu_id(&incoming_pdu.event_id)
.await
{
return Ok(Some(pduid));
@ -41,13 +32,13 @@ where
if self
.services
.pdu_metadata
.is_event_soft_failed(incoming_pdu.event_id())
.is_event_soft_failed(&incoming_pdu.event_id)
.await
{
return Err!(Request(InvalidParam("Event has been soft failed")));
}
debug!("Upgrading to timeline pdu");
debug!("Upgrading pdu {} from outlier to timeline pdu", incoming_pdu.event_id);
let timer = Instant::now();
let room_version_id = get_room_version_id(create_event)?;
@ -55,8 +46,8 @@ where
// backwards extremities doing all the checks in this list starting at 1.
// These are not timeline events.
debug!("Resolving state at event");
let mut state_at_incoming_event = if incoming_pdu.prev_events().count() == 1 {
debug!("Resolving state at event {}", incoming_pdu.event_id);
let mut state_at_incoming_event = if incoming_pdu.prev_events.len() == 1 {
self.state_at_incoming_degree_one(&incoming_pdu).await?
} else {
self.state_at_incoming_resolved(&incoming_pdu, room_id, &room_version_id)
@ -65,16 +56,15 @@ where
if state_at_incoming_event.is_none() {
state_at_incoming_event = self
.fetch_state(origin, create_event, room_id, incoming_pdu.event_id())
.fetch_state(origin, create_event, room_id, &incoming_pdu.event_id)
.await?;
}
let state_at_incoming_event =
state_at_incoming_event.expect("we always set this to some above");
let room_version = to_room_version(&room_version_id);
debug!("Performing auth check");
debug!("Performing auth check to upgrade {}", incoming_pdu.event_id);
// 11. Check the auth of the event passes based on the state of the event
let state_fetch_state = &state_at_incoming_event;
let state_fetch = |k: StateEventType, s: StateKey| async move {
@ -84,6 +74,7 @@ where
self.services.timeline.get_pdu(event_id).await.ok()
};
debug!("running auth check on {}", incoming_pdu.event_id);
let auth_check = state_res::event_auth::auth_check(
&room_version,
&incoming_pdu,
@ -97,24 +88,25 @@ where
return Err!(Request(Forbidden("Event has failed auth check with state at the event.")));
}
debug!("Gathering auth events");
debug!("Gathering auth events for {}", incoming_pdu.event_id);
let auth_events = self
.services
.state
.get_auth_events(
room_id,
incoming_pdu.kind(),
incoming_pdu.sender(),
incoming_pdu.state_key(),
incoming_pdu.content(),
&incoming_pdu.kind,
&incoming_pdu.sender,
incoming_pdu.state_key.as_deref(),
&incoming_pdu.content,
)
.await?;
let state_fetch = |k: &StateEventType, s: &str| {
let key = k.with_state_key(s);
ready(auth_events.get(&key).map(ToOwned::to_owned))
ready(auth_events.get(&key).cloned())
};
debug!("running auth check on {} with claimed state auth", incoming_pdu.event_id);
let auth_check = state_res::event_auth::auth_check(
&room_version,
&incoming_pdu,
@ -125,7 +117,7 @@ where
.map_err(|e| err!(Request(Forbidden("Auth check failed: {e:?}"))))?;
// Soft fail check before doing state res
debug!("Performing soft-fail check");
debug!("Performing soft-fail check on {}", incoming_pdu.event_id);
let soft_fail = match (auth_check, incoming_pdu.redacts_id(&room_version_id)) {
| (false, _) => true,
| (true, None) => false,
@ -133,7 +125,7 @@ where
!self
.services
.state_accessor
.user_can_redact(&redact_id, incoming_pdu.sender(), incoming_pdu.room_id(), true)
.user_can_redact(&redact_id, &incoming_pdu.sender, &incoming_pdu.room_id, true)
.await?,
};
@ -149,11 +141,11 @@ where
let extremities: Vec<_> = self
.services
.state
.get_forward_extremities(room_id)
.get_forward_extremities(room_id, &state_lock)
.map(ToOwned::to_owned)
.ready_filter(|event_id| {
// Remove any that are referenced by this incoming event's prev_events
!incoming_pdu.prev_events().any(is_equal_to!(event_id))
!incoming_pdu.prev_events.contains(event_id)
})
.broad_filter_map(|event_id| async move {
// Only keep those extremities were not referenced yet
@ -167,10 +159,12 @@ where
.collect()
.await;
if extremities.len() == 0 { info!("Retained zero extremities when upgrading outlier PDU to timeline PDU with {} previous events, event id: {}", incoming_pdu.prev_events.len(), incoming_pdu.event_id) }
debug!(
"Retained {} extremities checked against {} prev_events",
extremities.len(),
incoming_pdu.prev_events().count()
incoming_pdu.prev_events.len()
);
let state_ids_compressed: Arc<CompressedState> = self
@ -185,20 +179,20 @@ where
.map(Arc::new)
.await;
if incoming_pdu.state_key().is_some() {
if incoming_pdu.state_key.is_some() {
debug!("Event is a state-event. Deriving new room state");
// We also add state after incoming event to the fork states
let mut state_after = state_at_incoming_event.clone();
if let Some(state_key) = incoming_pdu.state_key() {
if let Some(state_key) = &incoming_pdu.state_key {
let shortstatekey = self
.services
.short
.get_or_create_shortstatekey(&incoming_pdu.kind().to_string().into(), state_key)
.get_or_create_shortstatekey(&incoming_pdu.kind.to_string().into(), state_key)
.await;
let event_id = incoming_pdu.event_id();
state_after.insert(shortstatekey, event_id.to_owned());
let event_id = &incoming_pdu.event_id;
state_after.insert(shortstatekey, event_id.clone());
}
let new_room_state = self
@ -222,7 +216,7 @@ where
// 14. Check if the event passes auth based on the "current state" of the room,
// if not soft fail it
if soft_fail {
debug!("Soft failing event");
info!("Soft failing event {}", incoming_pdu.event_id);
let extremities = extremities.iter().map(Borrow::borrow);
self.services
@ -240,9 +234,9 @@ where
// Soft fail, we keep the event as an outlier but don't add it to the timeline
self.services
.pdu_metadata
.mark_event_soft_failed(incoming_pdu.event_id());
.mark_event_soft_failed(&incoming_pdu.event_id);
warn!("Event was soft failed: {:?}", incoming_pdu.event_id());
warn!("Event was soft failed: {incoming_pdu:?}");
return Err!(Request(InvalidParam("Event has been soft failed")));
}
@ -253,7 +247,7 @@ where
let extremities = extremities
.iter()
.map(Borrow::borrow)
.chain(once(incoming_pdu.event_id()));
.chain(once(incoming_pdu.event_id.borrow()));
let pdu_id = self
.services

View file

@ -388,6 +388,7 @@ impl Service {
pub fn get_forward_extremities<'a>(
&'a self,
room_id: &'a RoomId,
_state_lock: &'a RoomMutexGuard,
) -> impl Stream<Item = &EventId> + Send + '_ {
let prefix = (room_id, Interfix);

File diff suppressed because it is too large Load diff