@@ -15,17 +15,18 @@ use std::env;
15
15
use std:: sync:: Arc ;
16
16
17
17
use anyhow:: Result ;
18
+ use log:: info;
18
19
use pyo3:: exceptions:: { PyRuntimeError , PyTimeoutError } ;
19
20
use structopt:: StructOpt ;
20
21
use tokio:: runtime:: Runtime ;
21
22
use tokio:: task:: JoinHandle ;
22
- use tonic:: transport:: Channel ;
23
23
use tonic:: Status ;
24
24
25
25
pub mod torchftpb {
26
26
tonic:: include_proto!( "torchft" ) ;
27
27
}
28
28
29
+ use crate :: net:: Channel ;
29
30
use crate :: torchftpb:: manager_service_client:: ManagerServiceClient ;
30
31
use crate :: torchftpb:: { CheckpointAddressRequest , ManagerQuorumRequest , ShouldCommitRequest } ;
31
32
use pyo3:: prelude:: * ;
@@ -301,8 +302,7 @@ impl From<Status> for StatusError {
301
302
}
302
303
}
303
304
304
- #[ pymodule]
305
- fn torchft ( m : & Bound < ' _ , PyModule > ) -> PyResult < ( ) > {
305
+ fn init_logging ( ) -> PyResult < ( ) > {
306
306
// setup logging on import
307
307
let mut log = stderrlog:: new ( ) ;
308
308
log. verbosity ( 2 )
@@ -316,6 +316,92 @@ fn torchft(m: &Bound<'_, PyModule>) -> PyResult<()> {
316
316
log. init ( )
317
317
. map_err ( |e| PyRuntimeError :: new_err ( e. to_string ( ) ) ) ?;
318
318
319
+ Ok ( ( ) )
320
+ }
321
+
322
+ fn init_tracing ( ) -> PyResult < ( ) > {
323
+ use opentelemetry:: trace:: Tracer ;
324
+ use opentelemetry:: trace:: TracerProvider as OpenTelemetryTracerProvider ;
325
+ use opentelemetry_otlp:: WithExportConfig ;
326
+ use opentelemetry_sdk:: trace:: TracerProvider ;
327
+ use tracing_subscriber:: layer:: SubscriberExt ;
328
+ use tracing_subscriber:: { filter:: EnvFilter , Layer } ;
329
+
330
+ fn set_tracer_provider ( tracer_provider : TracerProvider ) -> PyResult < ( ) > {
331
+ opentelemetry:: global:: set_tracer_provider ( tracer_provider. clone ( ) ) ;
332
+
333
+ let layer = tracing_opentelemetry:: layer ( )
334
+ . with_error_records_to_exceptions ( true )
335
+ . with_tracer ( tracer_provider. tracer ( "" ) ) ;
336
+
337
+ // Create a new tracing::Fmt layer to print the logs to stdout. It has a
338
+ // default filter of `info` level and above, and `debug` and above for logs
339
+ // from OpenTelemetry crates. The filter levels can be customized as needed.
340
+ let filter_fmt =
341
+ EnvFilter :: new ( "info" ) . add_directive ( "opentelemetry=debug" . parse ( ) . unwrap ( ) ) ;
342
+ let fmt_layer = tracing_subscriber:: fmt:: layer ( )
343
+ . with_thread_names ( true )
344
+ . with_filter ( filter_fmt) ;
345
+
346
+ let subscriber = tracing_subscriber:: registry ( ) . with ( fmt_layer) . with ( layer) ;
347
+ tracing:: subscriber:: set_global_default ( subscriber)
348
+ . map_err ( |e| PyRuntimeError :: new_err ( e. to_string ( ) ) ) ?;
349
+
350
+ info ! ( "OpenTelemetry tracing enabled" ) ;
351
+
352
+ Ok ( ( ) )
353
+ }
354
+
355
+ match env:: var ( "TORCHFT_OTEL_OTLP" ) {
356
+ Ok ( endpoint) => {
357
+ let runtime = Runtime :: new ( ) ?;
358
+
359
+ runtime. block_on ( async move {
360
+ info ! ( "Enabling OpenTelemetry OTLP with {}" , endpoint) ;
361
+ let exporter = opentelemetry_otlp:: SpanExporter :: builder ( )
362
+ . with_tonic ( )
363
+ . with_endpoint ( endpoint)
364
+ . with_timeout ( Duration :: from_secs ( 10 ) )
365
+ . build ( )
366
+ . map_err ( |e| PyRuntimeError :: new_err ( e. to_string ( ) ) ) ?;
367
+
368
+ let tracer_provider = TracerProvider :: builder ( )
369
+ . with_batch_exporter ( exporter, opentelemetry_sdk:: runtime:: Tokio )
370
+ . build ( ) ;
371
+
372
+ set_tracer_provider ( tracer_provider) ?;
373
+
374
+ Ok :: < ( ) , pyo3:: PyErr > ( ( ) )
375
+ } ) ?;
376
+ }
377
+ Err ( _) => { }
378
+ } ;
379
+ match env:: var ( "TORCHFT_OTEL_STDOUT" ) {
380
+ Ok ( _) => {
381
+ info ! ( "Enabling OpenTelemetry stdout" ) ;
382
+ let exporter = opentelemetry_stdout:: SpanExporter :: default ( ) ;
383
+ let tracer_provider = TracerProvider :: builder ( )
384
+ . with_simple_exporter ( exporter)
385
+ . build ( ) ;
386
+
387
+ set_tracer_provider ( tracer_provider) ?;
388
+ }
389
+ Err ( _) => { }
390
+ }
391
+
392
+ let tracer = opentelemetry:: global:: tracer ( "my_tracer" ) ;
393
+ tracer. in_span ( "doing_work" , |cx| {
394
+ // Traced app logic here...
395
+ } ) ;
396
+
397
+ Ok ( ( ) )
398
+ }
399
+
400
+ #[ pymodule]
401
+ fn torchft ( m : & Bound < ' _ , PyModule > ) -> PyResult < ( ) > {
402
+ init_logging ( ) ?;
403
+ init_tracing ( ) ?;
404
+
319
405
m. add_class :: < Manager > ( ) ?;
320
406
m. add_class :: < ManagerClient > ( ) ?;
321
407
m. add_class :: < Lighthouse > ( ) ?;
0 commit comments