Skip to content

Commit

Permalink
feat(prover): export prover traces through OTLP (matter-labs#1427)
Browse files Browse the repository at this point in the history
## What ❔

This PR adds an optional `tracing` layer to `vlog` that exports all
spans to a configurable OTEL collector.

This layer is then used in most prover components to export spans with
structured information about the block number that is currently being
processed.

## Why ❔

Better observability for prover and tracing for logs with a specific
block number

## Checklist

<!-- Check your PR fulfills the following items. -->
<!-- For draft PRs check the boxes as you complete them. -->

- [x] PR title corresponds to the body of PR (we generate changelog
entries from PRs).
- [ ] Tests for the changes have been added / updated.
- [ ] Documentation comments have been added / updated.
- [x] Code has been formatted via `zk fmt` and `zk lint`.
- [x] Spellcheck has been run via `zk spellcheck`.
- [x] Linkcheck has been run via `zk linkcheck`.
  • Loading branch information
itegulov authored Mar 25, 2024
1 parent 0d78122 commit 16dce75
Show file tree
Hide file tree
Showing 28 changed files with 842 additions and 42 deletions.
236 changes: 221 additions & 15 deletions Cargo.lock

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,9 @@ num = "0.4.0"
num_cpus = "1.13"
num_enum = "0.7.2"
once_cell = "1"
opentelemetry = "0.20.0"
opentelemetry-otlp = "0.13.0"
opentelemetry-semantic-conventions = "0.12.0"
pin-project-lite = "0.2.13"
pretty_assertions = "1"
prost = "0.12.1"
Expand Down Expand Up @@ -145,6 +148,7 @@ tower = "0.4.13"
tower-http = "0.4.1"
tracing = "0.1"
tracing-subscriber = "0.3"
tracing-opentelemetry = "0.21.0"
url = "2"
web3 = "0.19.0"

Expand Down
1 change: 1 addition & 0 deletions checks-config/era.dic
Original file line number Diff line number Diff line change
Expand Up @@ -921,3 +921,4 @@ p2p
StorageProcessor
StorageMarker
SIGINT
opentelemetry
1 change: 1 addition & 0 deletions core/bin/contract-verifier/src/verifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,7 @@ impl JobProcessor for ContractVerifier {
#[allow(clippy::async_yields_async)]
async fn process_job(
&self,
_job_id: &Self::JobId,
job: VerificationRequest,
started_at: Instant,
) -> tokio::task::JoinHandle<anyhow::Result<()>> {
Expand Down
1 change: 1 addition & 0 deletions core/bin/external_node/src/config/observability.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,6 @@ pub fn observability_config_from_env() -> anyhow::Result<ObservabilityConfig> {
sentry_url,
sentry_environment,
log_format,
opentelemetry: None,
})
}
2 changes: 1 addition & 1 deletion core/lib/config/src/configs/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ pub use self::{
fri_witness_generator::FriWitnessGeneratorConfig,
fri_witness_vector_generator::FriWitnessVectorGeneratorConfig,
object_store::ObjectStoreConfig,
observability::ObservabilityConfig,
observability::{ObservabilityConfig, OpentelemetryConfig},
proof_data_handler::ProofDataHandlerConfig,
snapshots_creator::SnapshotsCreatorConfig,
utils::PrometheusConfig,
Expand Down
10 changes: 10 additions & 0 deletions core/lib/config/src/configs/observability.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,17 @@ pub struct ObservabilityConfig {
pub sentry_url: Option<String>,
/// Name of the environment to use in Sentry.
pub sentry_environment: Option<String>,
/// Opentelemetry configuration.
pub opentelemetry: Option<OpentelemetryConfig>,
/// Format of the logs as expected by the `vlog` crate.
/// Currently must be either `plain` or `json`.
pub log_format: String,
}

#[derive(Debug, Clone, PartialEq)]
pub struct OpentelemetryConfig {
/// Enables export of span data of specified level (and above) using opentelemetry exporters.
pub level: String,
/// Opentelemetry HTTP collector endpoint.
pub endpoint: String,
}
10 changes: 10 additions & 0 deletions core/lib/config/src/testonly.rs
Original file line number Diff line number Diff line change
Expand Up @@ -754,6 +754,16 @@ impl RandomConfig for configs::ObservabilityConfig {
sentry_url: g.gen(),
sentry_environment: g.gen(),
log_format: g.gen(),
opentelemetry: g.gen(),
}
}
}

impl RandomConfig for configs::OpentelemetryConfig {
fn sample(g: &mut Gen<impl Rng>) -> Self {
Self {
level: g.gen(),
endpoint: g.gen(),
}
}
}
9 changes: 8 additions & 1 deletion core/lib/env_config/src/observability.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use zksync_config::configs::ObservabilityConfig;
use zksync_config::configs::{ObservabilityConfig, OpentelemetryConfig};

use crate::FromEnv;

Expand Down Expand Up @@ -33,11 +33,18 @@ impl FromEnv for ObservabilityConfig {
} else {
"plain".to_string()
};
let opentelemetry_level = std::env::var("OPENTELEMETRY_LEVEL").ok();
let otlp_endpoint = std::env::var("OTLP_ENDPOINT").ok();
let opentelemetry = match (opentelemetry_level, otlp_endpoint) {
(Some(level), Some(endpoint)) => Some(OpentelemetryConfig { level, endpoint }),
_ => None,
};

Ok(ObservabilityConfig {
sentry_url,
sentry_environment,
log_format,
opentelemetry,
})
}
}
26 changes: 25 additions & 1 deletion core/lib/protobuf_config/src/observability.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use anyhow::Context as _;
use zksync_config::configs;
use zksync_config::configs::{self};
use zksync_protobuf::{required, ProtoRepr};

use crate::proto::observability as proto;
Expand All @@ -11,6 +11,11 @@ impl ProtoRepr for proto::Observability {
sentry_url: self.sentry_url.clone(),
sentry_environment: self.sentry_environment.clone(),
log_format: required(&self.log_format).context("log_format")?.clone(),
opentelemetry: self
.opentelemetry
.as_ref()
.map(|cfg| cfg.read().context("opentelemetry"))
.transpose()?,
})
}

Expand All @@ -19,6 +24,25 @@ impl ProtoRepr for proto::Observability {
sentry_url: this.sentry_url.clone(),
sentry_environment: this.sentry_environment.clone(),
log_format: Some(this.log_format.clone()),
opentelemetry: this.opentelemetry.as_ref().map(ProtoRepr::build),
}
}
}

impl ProtoRepr for proto::Opentelemetry {
type Type = configs::OpentelemetryConfig;

fn read(&self) -> anyhow::Result<Self::Type> {
Ok(Self::Type {
level: required(&self.level).context("level")?.clone(),
endpoint: required(&self.endpoint).context("endpoint")?.clone(),
})
}

fn build(this: &Self::Type) -> Self {
Self {
level: Some(this.level.clone()),
endpoint: Some(this.endpoint.clone()),
}
}
}
6 changes: 6 additions & 0 deletions core/lib/protobuf_config/src/proto/observability.proto
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,10 @@ message Observability {
optional string sentry_url = 1; // optional
optional string sentry_environment = 2; // optional
optional string log_format = 3; // required
optional Opentelemetry opentelemetry = 4; // optional
}

message Opentelemetry {
optional string level = 1; // required
optional string endpoint = 2; // required
}
3 changes: 2 additions & 1 deletion core/lib/queued_job_processor/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ pub trait JobProcessor: Sync + Send {
/// Function that processes a job
async fn process_job(
&self,
job_id: &Self::JobId,
job: Self::Job,
started_at: Instant,
) -> JoinHandle<anyhow::Result<Self::JobArtifacts>>;
Expand Down Expand Up @@ -83,7 +84,7 @@ pub trait JobProcessor: Sync + Send {
Self::SERVICE_NAME,
job_id
);
let task = self.process_job(job, started_at).await;
let task = self.process_job(&job_id, job, started_at).await;

self.wait_for_task(job_id, started_at, task)
.await
Expand Down
7 changes: 7 additions & 0 deletions core/lib/vlog/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,12 @@ tracing-subscriber = { workspace = true, features = [
"time",
"json",
] }
tracing-opentelemetry.workspace = true
sentry.workspace = true
serde_json.workspace = true
opentelemetry = { workspace = true, features = ["rt-tokio", "trace"] }
opentelemetry-otlp = { workspace = true, features = [
"http-proto",
"reqwest-client",
] }
opentelemetry-semantic-conventions.workspace = true
Loading

0 comments on commit 16dce75

Please sign in to comment.