continuwuity/src/service/rooms/state_compressor/mod.rs
Jason Volk 946ca364e0 Database Refactor
combine service/users data w/ mod unit

split sliding sync related out of service/users

instrument database entry points

remove increment crap from database interface

de-wrap all database get() calls

de-wrap all database insert() calls

de-wrap all database remove() calls

refactor database interface for async streaming

add query key serializer for database

implement Debug for result handle

add query deserializer for database

add deserialization trait for option handle

start a stream utils suite

de-wrap/asyncify/type-query count_one_time_keys()

de-wrap/asyncify users count

add admin query users command suite

de-wrap/asyncify users exists

de-wrap/partially asyncify user filter related

asyncify/de-wrap users device/keys related

asyncify/de-wrap user auth/misc related

asyncify/de-wrap users blurhash

asyncify/de-wrap account_data get; merge Data into Service

partial asyncify/de-wrap uiaa; merge Data into Service

partially asyncify/de-wrap transaction_ids get; merge Data into Service

partially asyncify/de-wrap key_backups; merge Data into Service

asyncify/de-wrap pusher service getters; merge Data into Service

asyncify/de-wrap rooms alias getters/some iterators

asyncify/de-wrap rooms directory getters/iterator

partially asyncify/de-wrap rooms lazy-loading

partially asyncify/de-wrap rooms metadata

asyncify/dewrap rooms outlier

asyncify/dewrap rooms pdu_metadata

dewrap/partially asyncify rooms read receipt

de-wrap rooms search service

de-wrap/partially asyncify rooms user service

partial de-wrap rooms state_compressor

de-wrap rooms state_cache

de-wrap room state et al

de-wrap rooms timeline service

additional users device/keys related

de-wrap/asyncify sender

asyncify services

refactor database to TryFuture/TryStream

refactor services for TryFuture/TryStream

asyncify api handlers

additional asyncification for admin module

abstract stream related; support reverse streams

additional stream conversions

asyncify state-res related

Signed-off-by: Jason Volk <jason@zemos.net>
2024-10-25 00:32:30 -04:00

359 lines
10 KiB
Rust

mod data;
use std::{
collections::HashSet,
fmt::Write,
mem::size_of,
sync::{Arc, Mutex as StdMutex, Mutex},
};
use conduit::{checked, utils, utils::math::usize_from_f64, Result};
use data::Data;
use lru_cache::LruCache;
use ruma::{EventId, RoomId};
use self::data::StateDiff;
use crate::{rooms, Dep};
type StateInfoLruCache = Mutex<
LruCache<
u64,
Vec<(
u64, // sstatehash
Arc<HashSet<CompressedStateEvent>>, // full state
Arc<HashSet<CompressedStateEvent>>, // added
Arc<HashSet<CompressedStateEvent>>, // removed
)>,
>,
>;
type ShortStateInfoResult = Vec<(
u64, // sstatehash
Arc<HashSet<CompressedStateEvent>>, // full state
Arc<HashSet<CompressedStateEvent>>, // added
Arc<HashSet<CompressedStateEvent>>, // removed
)>;
type ParentStatesVec = Vec<(
u64, // sstatehash
Arc<HashSet<CompressedStateEvent>>, // full state
Arc<HashSet<CompressedStateEvent>>, // added
Arc<HashSet<CompressedStateEvent>>, // removed
)>;
type HashSetCompressStateEvent = (u64, Arc<HashSet<CompressedStateEvent>>, Arc<HashSet<CompressedStateEvent>>);
pub type CompressedStateEvent = [u8; 2 * size_of::<u64>()];
pub struct Service {
db: Data,
services: Services,
pub stateinfo_cache: StateInfoLruCache,
}
struct Services {
short: Dep<rooms::short::Service>,
state: Dep<rooms::state::Service>,
}
impl crate::Service for Service {
fn build(args: crate::Args<'_>) -> Result<Arc<Self>> {
let config = &args.server.config;
let cache_capacity = f64::from(config.stateinfo_cache_capacity) * config.cache_capacity_modifier;
Ok(Arc::new(Self {
db: Data::new(args.db),
services: Services {
short: args.depend::<rooms::short::Service>("rooms::short"),
state: args.depend::<rooms::state::Service>("rooms::state"),
},
stateinfo_cache: StdMutex::new(LruCache::new(usize_from_f64(cache_capacity)?)),
}))
}
fn memory_usage(&self, out: &mut dyn Write) -> Result<()> {
let stateinfo_cache = self.stateinfo_cache.lock().expect("locked").len();
writeln!(out, "stateinfo_cache: {stateinfo_cache}")?;
Ok(())
}
fn clear_cache(&self) { self.stateinfo_cache.lock().expect("locked").clear(); }
fn name(&self) -> &str { crate::service::make_name(std::module_path!()) }
}
impl Service {
/// Returns a stack with info on shortstatehash, full state, added diff and
/// removed diff for the selected shortstatehash and each parent layer.
pub async fn load_shortstatehash_info(&self, shortstatehash: u64) -> Result<ShortStateInfoResult> {
if let Some(r) = self
.stateinfo_cache
.lock()
.expect("locked")
.get_mut(&shortstatehash)
{
return Ok(r.clone());
}
let StateDiff {
parent,
added,
removed,
} = self.db.get_statediff(shortstatehash).await?;
if let Some(parent) = parent {
let mut response = Box::pin(self.load_shortstatehash_info(parent)).await?;
let mut state = (*response.last().expect("at least one response").1).clone();
state.extend(added.iter().copied());
let removed = (*removed).clone();
for r in &removed {
state.remove(r);
}
response.push((shortstatehash, Arc::new(state), added, Arc::new(removed)));
self.stateinfo_cache
.lock()
.expect("locked")
.insert(shortstatehash, response.clone());
Ok(response)
} else {
let response = vec![(shortstatehash, added.clone(), added, removed)];
self.stateinfo_cache
.lock()
.expect("locked")
.insert(shortstatehash, response.clone());
Ok(response)
}
}
pub async fn compress_state_event(&self, shortstatekey: u64, event_id: &EventId) -> CompressedStateEvent {
let mut v = shortstatekey.to_be_bytes().to_vec();
v.extend_from_slice(
&self
.services
.short
.get_or_create_shorteventid(event_id)
.await
.to_be_bytes(),
);
v.try_into().expect("we checked the size above")
}
/// Returns shortstatekey, event id
#[inline]
pub async fn parse_compressed_state_event(
&self, compressed_event: &CompressedStateEvent,
) -> Result<(u64, Arc<EventId>)> {
use utils::u64_from_u8;
let shortstatekey = u64_from_u8(&compressed_event[0..size_of::<u64>()]);
let event_id = self
.services
.short
.get_eventid_from_short(u64_from_u8(&compressed_event[size_of::<u64>()..]))
.await?;
Ok((shortstatekey, event_id))
}
/// Creates a new shortstatehash that often is just a diff to an already
/// existing shortstatehash and therefore very efficient.
///
/// There are multiple layers of diffs. The bottom layer 0 always contains
/// the full state. Layer 1 contains diffs to states of layer 0, layer 2
/// diffs to layer 1 and so on. If layer n > 0 grows too big, it will be
/// combined with layer n-1 to create a new diff on layer n-1 that's
/// based on layer n-2. If that layer is also too big, it will recursively
/// fix above layers too.
///
/// * `shortstatehash` - Shortstatehash of this state
/// * `statediffnew` - Added to base. Each vec is shortstatekey+shorteventid
/// * `statediffremoved` - Removed from base. Each vec is
/// shortstatekey+shorteventid
/// * `diff_to_sibling` - Approximately how much the diff grows each time
/// for this layer
/// * `parent_states` - A stack with info on shortstatehash, full state,
/// added diff and removed diff for each parent layer
#[tracing::instrument(skip(self, statediffnew, statediffremoved, diff_to_sibling, parent_states), level = "debug")]
pub fn save_state_from_diff(
&self, shortstatehash: u64, statediffnew: Arc<HashSet<CompressedStateEvent>>,
statediffremoved: Arc<HashSet<CompressedStateEvent>>, diff_to_sibling: usize,
mut parent_states: ParentStatesVec,
) -> Result<()> {
let statediffnew_len = statediffnew.len();
let statediffremoved_len = statediffremoved.len();
let diffsum = checked!(statediffnew_len + statediffremoved_len)?;
if parent_states.len() > 3 {
// Number of layers
// To many layers, we have to go deeper
let parent = parent_states.pop().expect("parent must have a state");
let mut parent_new = (*parent.2).clone();
let mut parent_removed = (*parent.3).clone();
for removed in statediffremoved.iter() {
if !parent_new.remove(removed) {
// It was not added in the parent and we removed it
parent_removed.insert(*removed);
}
// Else it was added in the parent and we removed it again. We
// can forget this change
}
for new in statediffnew.iter() {
if !parent_removed.remove(new) {
// It was not touched in the parent and we added it
parent_new.insert(*new);
}
// Else it was removed in the parent and we added it again. We
// can forget this change
}
self.save_state_from_diff(
shortstatehash,
Arc::new(parent_new),
Arc::new(parent_removed),
diffsum,
parent_states,
)?;
return Ok(());
}
if parent_states.is_empty() {
// There is no parent layer, create a new state
self.db.save_statediff(
shortstatehash,
&StateDiff {
parent: None,
added: statediffnew,
removed: statediffremoved,
},
);
return Ok(());
};
// Else we have two options.
// 1. We add the current diff on top of the parent layer.
// 2. We replace a layer above
let parent = parent_states.pop().expect("parent must have a state");
let parent_2_len = parent.2.len();
let parent_3_len = parent.3.len();
let parent_diff = checked!(parent_2_len + parent_3_len)?;
if checked!(diffsum * diffsum)? >= checked!(2 * diff_to_sibling * parent_diff)? {
// Diff too big, we replace above layer(s)
let mut parent_new = (*parent.2).clone();
let mut parent_removed = (*parent.3).clone();
for removed in statediffremoved.iter() {
if !parent_new.remove(removed) {
// It was not added in the parent and we removed it
parent_removed.insert(*removed);
}
// Else it was added in the parent and we removed it again. We
// can forget this change
}
for new in statediffnew.iter() {
if !parent_removed.remove(new) {
// It was not touched in the parent and we added it
parent_new.insert(*new);
}
// Else it was removed in the parent and we added it again. We
// can forget this change
}
self.save_state_from_diff(
shortstatehash,
Arc::new(parent_new),
Arc::new(parent_removed),
diffsum,
parent_states,
)?;
} else {
// Diff small enough, we add diff as layer on top of parent
self.db.save_statediff(
shortstatehash,
&StateDiff {
parent: Some(parent.0),
added: statediffnew,
removed: statediffremoved,
},
);
}
Ok(())
}
/// Returns the new shortstatehash, and the state diff from the previous
/// room state
pub async fn save_state(
&self, room_id: &RoomId, new_state_ids_compressed: Arc<HashSet<CompressedStateEvent>>,
) -> Result<HashSetCompressStateEvent> {
let previous_shortstatehash = self
.services
.state
.get_room_shortstatehash(room_id)
.await
.ok();
let state_hash = utils::calculate_hash(
&new_state_ids_compressed
.iter()
.map(|bytes| &bytes[..])
.collect::<Vec<_>>(),
);
let (new_shortstatehash, already_existed) = self
.services
.short
.get_or_create_shortstatehash(&state_hash)
.await;
if Some(new_shortstatehash) == previous_shortstatehash {
return Ok((new_shortstatehash, Arc::new(HashSet::new()), Arc::new(HashSet::new())));
}
let states_parents = if let Some(p) = previous_shortstatehash {
self.load_shortstatehash_info(p).await.unwrap_or_default()
} else {
ShortStateInfoResult::new()
};
let (statediffnew, statediffremoved) = if let Some(parent_stateinfo) = states_parents.last() {
let statediffnew: HashSet<_> = new_state_ids_compressed
.difference(&parent_stateinfo.1)
.copied()
.collect();
let statediffremoved: HashSet<_> = parent_stateinfo
.1
.difference(&new_state_ids_compressed)
.copied()
.collect();
(Arc::new(statediffnew), Arc::new(statediffremoved))
} else {
(new_state_ids_compressed, Arc::new(HashSet::new()))
};
if !already_existed {
self.save_state_from_diff(
new_shortstatehash,
statediffnew.clone(),
statediffremoved.clone(),
2, // every state change is 2 event changes on average
states_parents,
)?;
};
Ok((new_shortstatehash, statediffnew, statediffremoved))
}
}