Fix spans in tokio::spawn-ed tasks

tokio::spawn is a span boundary, the spawned future has no parent span.

For short futures, we simply inherit the current span with `.in_current_span()`.

For long running futures containing a sleeping infinite loop, we don't actually want a span on the entire task or even
the entire loop body, both would result in very long spans. Instead, we put the outermost span (created using
 #[tracing::instrument] or .instrument()) around the actual work happening after the sleep, which results in a new root
span being created after every sleep.
This commit is contained in:
Xiretza 2024-05-06 17:20:58 +00:00
parent 69081a49aa
commit 56edd5b037
5 changed files with 104 additions and 97 deletions

View file

@ -96,7 +96,7 @@ pub(crate) async fn sync_events_route(
v.insert((body.since.clone(), rx.clone())); v.insert((body.since.clone(), rx.clone()));
tokio::spawn(sync_helper_wrapper(sender_user.clone(), sender_device.clone(), body, tx)); tokio::spawn(sync_helper_wrapper(sender_user.clone(), sender_device.clone(), body, tx).in_current_span());
rx rx
}, },
@ -108,7 +108,9 @@ pub(crate) async fn sync_events_route(
debug!("Sync started for {sender_user}"); debug!("Sync started for {sender_user}");
tokio::spawn(sync_helper_wrapper(sender_user.clone(), sender_device.clone(), body, tx)); tokio::spawn(
sync_helper_wrapper(sender_user.clone(), sender_device.clone(), body, tx).in_current_span(),
);
rx rx
} else { } else {

View file

@ -37,7 +37,7 @@ use serde::Deserialize;
#[cfg(unix)] #[cfg(unix)]
use tokio::signal::unix::{signal, SignalKind}; use tokio::signal::unix::{signal, SignalKind};
use tokio::time::{interval, Instant}; use tokio::time::{interval, Instant};
use tracing::{debug, error, warn}; use tracing::{debug, error, info_span, warn, Instrument as _};
use crate::{ use crate::{
database::migrations::migrations, service::rooms::timeline::PduCount, services, Config, Error, database::migrations::migrations, service::rooms::timeline::PduCount, services, Config, Error,
@ -436,6 +436,7 @@ impl KeyValueDatabase {
}); });
} }
#[tracing::instrument]
async fn try_handle_updates() -> Result<()> { async fn try_handle_updates() -> Result<()> {
let response = services() let response = services()
.globals .globals
@ -487,29 +488,35 @@ impl KeyValueDatabase {
loop { loop {
#[cfg(unix)] #[cfg(unix)]
tokio::select! { let msg = tokio::select! {
_ = i.tick() => { _ = i.tick() => || {
debug!(target: "database-cleanup", "Timer ticked"); debug!("Timer ticked");
} },
_ = hangup.recv() => { _ = hangup.recv() => || {
debug!(target: "database-cleanup","Received SIGHUP"); debug!("Received SIGHUP");
} },
_ = ctrl_c.recv() => { _ = ctrl_c.recv() => || {
debug!(target: "database-cleanup", "Received Ctrl+C"); debug!("Received Ctrl+C");
} },
_ = terminate.recv() => { _ = terminate.recv() => || {
debug!(target: "database-cleanup","Received SIGTERM"); debug!("Received SIGTERM");
} },
} };
#[cfg(not(unix))] #[cfg(not(unix))]
{ let msg = {
i.tick().await; i.tick().await;
debug!(target: "database-cleanup", "Timer ticked") || debug!("Timer ticked")
} };
async {
msg();
Self::perform_cleanup(); Self::perform_cleanup();
} }
.instrument(info_span!("database_cleanup"))
.await;
}
}); });
} }

View file

@ -24,7 +24,7 @@ use ruma::{
}; };
use serde_json::value::to_raw_value; use serde_json::value::to_raw_value;
use tokio::sync::{Mutex, MutexGuard}; use tokio::sync::{Mutex, MutexGuard};
use tracing::{error, warn}; use tracing::{error, info_span, warn, Instrument};
use self::{fsck::FsckCommand, tester::TesterCommands}; use self::{fsck::FsckCommand, tester::TesterCommands};
use super::pdu::PduBuilder; use super::pdu::PduBuilder;
@ -116,10 +116,32 @@ impl Service {
pub(crate) fn start_handler(self: &Arc<Self>) { pub(crate) fn start_handler(self: &Arc<Self>) {
let self2 = Arc::clone(self); let self2 = Arc::clone(self);
tokio::spawn(async move { tokio::spawn(async move {
self2 let receiver = self2.receiver.lock().await;
.handler() let Ok(Some(admin_room)) = Self::get_admin_room().await else {
.await return;
.expect("Failed to initialize admin room handler"); };
let server_name = services().globals.server_name();
let server_user = UserId::parse(format!("@conduit:{server_name}")).expect("server's username is valid");
loop {
debug_assert!(!receiver.is_closed(), "channel closed");
let event = receiver.recv_async().await;
async {
let ret = match event {
Ok(event) => self2.handle_event(event, &admin_room, &server_user).await,
Err(e) => {
error!("Failed to receive admin room event from channel: {e}");
return;
},
};
if let Err(e) = ret {
error!("Failed to handle admin room event: {e}");
}
}
.instrument(info_span!("admin_event_received"))
.await;
}
}); });
} }
@ -139,25 +161,6 @@ impl Service {
self.sender.send(message).expect("message sent"); self.sender.send(message).expect("message sent");
} }
async fn handler(&self) -> Result<()> {
let receiver = self.receiver.lock().await;
let Ok(Some(admin_room)) = Self::get_admin_room().await else {
return Ok(());
};
let server_name = services().globals.server_name();
let server_user = UserId::parse(format!("@conduit:{server_name}")).expect("server's username is valid");
loop {
debug_assert!(!receiver.is_closed(), "channel closed");
tokio::select! {
event = receiver.recv_async() => match event {
Ok(event) => self.handle_event(event, &admin_room, &server_user).await?,
Err(e) => error!("Failed to receive admin room event from channel: {e}"),
}
}
}
}
async fn handle_event(&self, event: AdminRoomEvent, admin_room: &RoomId, server_user: &UserId) -> Result<()> { async fn handle_event(&self, event: AdminRoomEvent, admin_room: &RoomId, server_user: &UserId) -> Result<()> {
let (mut message_content, reply) = match event { let (mut message_content, reply) = match event {
AdminRoomEvent::SendMessage(content) => (content, None), AdminRoomEvent::SendMessage(content) => (content, None),

View file

@ -11,7 +11,7 @@ use ruma::{
}; };
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use tokio::{sync::Mutex, time::sleep}; use tokio::{sync::Mutex, time::sleep};
use tracing::{debug, error}; use tracing::{debug, error, info_span, Instrument as _};
use crate::{ use crate::{
services, services,
@ -99,10 +99,34 @@ impl Service {
pub(crate) fn start_handler(self: &Arc<Self>) { pub(crate) fn start_handler(self: &Arc<Self>) {
let self_ = Arc::clone(self); let self_ = Arc::clone(self);
tokio::spawn(async move { tokio::spawn(async move {
self_ let mut presence_timers = FuturesUnordered::new();
.handler() let receiver = self_.timer_receiver.lock().await;
.await loop {
.expect("Failed to start presence handler"); tokio::select! {
event = receiver.recv_async() => async {
match event {
Ok((user_id, timeout)) => {
debug!("Adding timer {}: {user_id} timeout:{timeout:?}", presence_timers.len());
presence_timers.push(presence_timer(user_id, timeout));
}
Err(e) => {
// generally shouldn't happen
error!("Failed to receive presence timer through channel: {e}");
}
}
}
.instrument(info_span!("presence_event_received"))
.await,
Some(user_id) = presence_timers.next() => async {
if let Err(e) = process_presence_timer(&user_id) {
error!(?user_id, "Failed to process presence timer: {e}");
}
}
.instrument(info_span!("presence_timer_expired"))
.await,
}
}
}); });
} }
@ -186,32 +210,6 @@ impl Service {
pub(crate) fn presence_since(&self, since: u64) -> Box<dyn Iterator<Item = (OwnedUserId, u64, Vec<u8>)>> { pub(crate) fn presence_since(&self, since: u64) -> Box<dyn Iterator<Item = (OwnedUserId, u64, Vec<u8>)>> {
self.db.presence_since(since) self.db.presence_since(since)
} }
async fn handler(&self) -> Result<()> {
let mut presence_timers = FuturesUnordered::new();
let receiver = self.timer_receiver.lock().await;
loop {
tokio::select! {
event = receiver.recv_async() => {
match event {
Ok((user_id, timeout)) => {
debug!("Adding timer {}: {user_id} timeout:{timeout:?}", presence_timers.len());
presence_timers.push(presence_timer(user_id, timeout));
}
Err(e) => {
// generally shouldn't happen
error!("Failed to receive presence timer through channel: {e}");
}
}
}
Some(user_id) = presence_timers.next() => {
process_presence_timer(&user_id)?;
}
}
}
}
} }
async fn presence_timer(user_id: OwnedUserId, timeout: Duration) -> OwnedUserId { async fn presence_timer(user_id: OwnedUserId, timeout: Duration) -> OwnedUserId {

View file

@ -50,30 +50,25 @@ impl Service {
pub(crate) fn start_handler(self: &Arc<Self>) { pub(crate) fn start_handler(self: &Arc<Self>) {
let self2 = Arc::clone(self); let self2 = Arc::clone(self);
tokio::spawn(async move { tokio::spawn(async move {
self2.handler().await; let receiver = self2.receiver.lock().await;
});
}
#[tracing::instrument(skip_all, name = "sender")]
async fn handler(&self) {
let receiver = self.receiver.lock().await;
debug_assert!(!receiver.is_closed(), "channel error"); debug_assert!(!receiver.is_closed(), "channel error");
let mut futures: SendingFutures<'_> = FuturesUnordered::new(); let mut futures: SendingFutures<'_> = FuturesUnordered::new();
let mut statuses: CurTransactionStatus = CurTransactionStatus::new(); let mut statuses: CurTransactionStatus = CurTransactionStatus::new();
self.initial_transactions(&mut futures, &mut statuses); self2.initial_transactions(&mut futures, &mut statuses);
loop { loop {
tokio::select! { tokio::select! {
Ok(request) = receiver.recv_async() => { Ok(request) = receiver.recv_async() => {
self.handle_request(request, &mut futures, &mut statuses); self2.handle_request(request, &mut futures, &mut statuses);
}, },
Some(response) = futures.next() => { Some(response) = futures.next() => {
self.handle_response(response, &mut futures, &mut statuses); self2.handle_response(response, &mut futures, &mut statuses);
}, },
} }
} }
});
} }
#[tracing::instrument(skip(self, futures, statuses))]
fn handle_response( fn handle_response(
&self, response: SendingResult, futures: &mut SendingFutures<'_>, statuses: &mut CurTransactionStatus, &self, response: SendingResult, futures: &mut SendingFutures<'_>, statuses: &mut CurTransactionStatus,
) { ) {
@ -124,6 +119,7 @@ impl Service {
} }
} }
#[tracing::instrument(skip(self, futures, statuses))]
fn handle_request(&self, msg: Msg, futures: &mut SendingFutures<'_>, statuses: &mut CurTransactionStatus) { fn handle_request(&self, msg: Msg, futures: &mut SendingFutures<'_>, statuses: &mut CurTransactionStatus) {
let iv = vec![(msg.event, msg.queue_id)]; let iv = vec![(msg.event, msg.queue_id)];
if let Ok(Some(events)) = self.select_events(&msg.dest, iv, statuses) { if let Ok(Some(events)) = self.select_events(&msg.dest, iv, statuses) {
@ -135,6 +131,7 @@ impl Service {
} }
} }
#[tracing::instrument(skip(self, futures, statuses))]
fn initial_transactions(&self, futures: &mut SendingFutures<'_>, statuses: &mut CurTransactionStatus) { fn initial_transactions(&self, futures: &mut SendingFutures<'_>, statuses: &mut CurTransactionStatus) {
let keep = usize::try_from(self.startup_netburst_keep).unwrap_or(usize::MAX); let keep = usize::try_from(self.startup_netburst_keep).unwrap_or(usize::MAX);
let mut txns = HashMap::<Destination, Vec<SendingEvent>>::new(); let mut txns = HashMap::<Destination, Vec<SendingEvent>>::new();