diff --git a/CHANGELOG.md b/CHANGELOG.md index d9d30c6c..73522e81 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Release notes +## 2023.1.0 (2023-1-20) + +### Bugfixes + +- Fix regression where source code would sometimes be missing from flamegraphs, most notably in Jupyter profiling. ([#474](https://github.com/pythonspeed/filprofiler/issues/474)) + ## 2022.11.0 (2022-11-07) ### Features diff --git a/Cargo.lock b/Cargo.lock index 4a9e601b..21af4ded 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -408,6 +408,12 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "indoc" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da2d6f23ffea9d7e76c53eee25dfb67bcd8fde7f1198b0855350698c9f07c780" + [[package]] name = "inferno" version = "0.11.13" @@ -540,6 +546,15 @@ dependencies = [ "autocfg", ] +[[package]] +name = "memoffset" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d61c719bcfbcf5d62b3a09efa6088de8c54bc0bfcd3ea7ae39fcc186108b8de1" +dependencies = [ + "autocfg", +] + [[package]] name = "minimal-lexical" version = "0.2.1" @@ -565,7 +580,7 @@ dependencies = [ "cc", "cfg-if", "libc", - "memoffset", + "memoffset 0.6.5", ] [[package]] @@ -737,33 +752,38 @@ dependencies = [ "libc", "libloading", "once_cell", + "parking_lot", "proc-maps", "proptest", "psutil", "pyo3", + "rusty-fork", "serde", "tempfile", ] [[package]] name = "pyo3" -version = "0.17.3" +version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "268be0c73583c183f2b14052337465768c07726936a260f480f0857cb95ba543" +checksum = "ccd4149c8c3975099622b4e1962dac27565cf5663b76452c3e2b66e0b6824277" dependencies = [ "cfg-if", + "indoc", "libc", - "memoffset", + "memoffset 0.8.0", "parking_lot", "pyo3-build-config", "pyo3-ffi", + "pyo3-macros", + "unindent", ] [[package]] name = "pyo3-build-config" -version = "0.17.3" +version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28fcd1e73f06ec85bf3280c48c67e731d8290ad3d730f8be9dc07946923005c8" +checksum = "9cd09fe469834db21ee60e0051030339e5d361293d8cb5ec02facf7fdcf52dbf" dependencies = [ "once_cell", "target-lexicon", @@ -771,14 +791,37 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.17.3" +version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f6cb136e222e49115b3c51c32792886defbfb0adead26a688142b346a0b9ffc" +checksum = "0c427c9a96b9c5b12156dbc11f76b14f49e9aae8905ca783ea87c249044ef137" dependencies = [ "libc", "pyo3-build-config", ] +[[package]] +name = "pyo3-macros" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16b822bbba9d60630a44d2109bc410489bb2f439b33e3a14ddeb8a40b378a7c4" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84ae898104f7c99db06231160770f3e40dad6eb9021daddc0fedfa3e41dff10a" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "quick-error" version = "1.2.3" @@ -1084,6 +1127,12 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dcc811dc4066ac62f84f11307873c4850cb653bfa9b1719cee2bd2204a4bc5dd" +[[package]] +name = "unindent" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1766d682d402817b5ac4490b3c3002d91dfa0d22812f341609f97b08757359c" + [[package]] name = "version_check" version = "0.9.4" diff --git a/filpreload/Cargo.toml b/filpreload/Cargo.toml index bd9c1133..93fb6293 100644 --- a/filpreload/Cargo.toml +++ b/filpreload/Cargo.toml @@ -18,7 +18,7 @@ path = "../memapi" features = [] [dependencies.pyo3] -version = "0.17" +version = "0.18" default-features = false [build-dependencies] diff --git a/filpreload/src/lib.rs b/filpreload/src/lib.rs index 05ca54c8..77d733da 100644 --- a/filpreload/src/lib.rs +++ b/filpreload/src/lib.rs @@ -2,12 +2,14 @@ use parking_lot::Mutex; use pymemprofile_api::memorytracking::LineNumberInfo::LineNumber; use pymemprofile_api::memorytracking::{ - AllocationTracker, CallSiteId, Callstack, FunctionId, VecFunctionLocations, PARENT_PROCESS, + AllocationTracker, CallSiteId, Callstack, FunctionId, IdentityCleaner, VecFunctionLocations, + PARENT_PROCESS, }; use pymemprofile_api::oom::{InfiniteMemory, OutOfMemoryEstimator, RealMemoryInfo}; use std::cell::RefCell; use std::ffi::CStr; use std::os::raw::{c_char, c_int, c_void}; +use std::path::Path; #[macro_use] extern crate lazy_static; @@ -87,6 +89,7 @@ fn set_current_callstack(callstack: &Callstack) { } extern "C" { + fn _exit(exit_code: std::os::raw::c_int); fn free(address: *mut c_void); } @@ -152,7 +155,23 @@ fn add_allocation( if oom { // Uh-oh, we're out of memory. - allocations.oom_dump(); + eprintln!( + "=fil-profile= We'll try to dump out SVGs. Note that no HTML file will be written." + ); + let default_path = allocations.default_path.clone(); + // Release the lock, since dumping the flamegraph will reacquire it: + drop(tracker_state); + + dump_to_flamegraph( + &default_path, + false, + "out-of-memory", + "Current allocations at out-of-memory time", + false, + ); + unsafe { + _exit(53); + } }; Ok(()) } @@ -180,11 +199,55 @@ fn reset(default_path: String) { tracker_state.allocations.reset(default_path); } +fn dump_to_flamegraph( + path: &str, + peak: bool, + base_filename: &str, + title: &str, + to_be_post_processed: bool, +) { + // In order to render the flamegraph, we want to load source code using + // Python's linecache. That means calling into Python, which might release + // the GIL, allowing another thread to run, and it will try to allocation + // and hit the TRACKER_STATE mutex. And now we're deadlocked. So we make + // sure flamegraph rendering does not require TRACKER_STATE to be locked. + let (allocated_bytes, flamegraph_callstacks) = { + let mut tracker_state = TRACKER_STATE.lock(); + let allocations = &mut tracker_state.allocations; + + // Print warning if we're missing allocations. + allocations.warn_on_problems(peak); + let allocated_bytes = if peak { + allocations.get_peak_allocated_bytes() + } else { + allocations.get_current_allocated_bytes() + }; + let flamegraph_callstacks = allocations.combine_callstacks(peak, IdentityCleaner); + (allocated_bytes, flamegraph_callstacks) + }; + + eprintln!("=fil-profile= Preparing to write to {}", path); + let directory_path = Path::new(path); + + let title = format!( + "{} ({:.1} MiB)", + title, + allocated_bytes as f64 / (1024.0 * 1024.0) + ); + let subtitle = r#"Made with the Fil profiler. Try it on your code!"#; + flamegraph_callstacks.write_flamegraphs( + directory_path, + base_filename, + &title, + subtitle, + "bytes", + to_be_post_processed, + ) +} + /// Dump all callstacks in peak memory usage to format used by flamegraph. fn dump_peak_to_flamegraph(path: &str) { - let mut tracker_state = TRACKER_STATE.lock(); - let allocations = &mut tracker_state.allocations; - allocations.dump_peak_to_flamegraph(path); + dump_to_flamegraph(path, true, "peak-memory", "Peak Tracked Memory Usage", true); } #[no_mangle] @@ -318,8 +381,8 @@ extern "C" { fn is_initialized() -> c_int; // Increment/decrement reentrancy counter. - fn fil_increment_reentrancy(); - fn fil_decrement_reentrancy(); + //fn fil_increment_reentrancy(); + //fn fil_decrement_reentrancy(); } struct FilMmapAPI; @@ -337,7 +400,7 @@ impl pymemprofile_api::mmap::MmapAPI for FilMmapAPI { } fn is_initialized(&self) -> bool { - return unsafe { is_initialized() == 1 }; + unsafe { is_initialized() == 1 } } } diff --git a/filprofiler/_report.py b/filprofiler/_report.py index 308151d3..90a5bdd2 100644 --- a/filprofiler/_report.py +++ b/filprofiler/_report.py @@ -76,19 +76,16 @@ def render_report(output_path: str, now: datetime) -> str:

Profiling result

·

- +

-

Check out my other project:

-

Find memory and performance bottlenecks in production!

-

When your data pipeline is too slow in production, reproducing the problem - on your laptop is hard or impossible—which means identifying and fixing the problem can be tricky.

-

What if you knew the cause of the problem as soon as you realized it was happening?

-

That's how - the Sciagraph profiler can help you: - it's designed to find performance - and memory bottlenecks by continuously profiling in production.

+

Find performance bottlenecks in your data processing jobs with the Sciagraph profiler

+

The Sciagraph profiler can help you + find performance + and memory bottlenecks with low overhead, so you can use it in both development and production.

+

Unlike Fil, it includes performance profiling. Sciagraph's memory profiling uses sampling so it runs faster than Fil, but unlike Fil + it can't accurately profile small allocations or run natively on macOS.



· diff --git a/memapi/Cargo.toml b/memapi/Cargo.toml index b3d30580..b86565d7 100644 --- a/memapi/Cargo.toml +++ b/memapi/Cargo.toml @@ -2,7 +2,7 @@ name = "pymemprofile_api" version = "0.1.0" authors = ["Itamar Turner-Trauring "] -edition = "2018" +edition = "2021" license = "Apache-2.0" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html @@ -18,6 +18,7 @@ once_cell = "1.17" libloading = "0.7" libc = "0.2" serde = {version = "1", features = ["derive"] } +parking_lot = "0.12.1" [dependencies.inferno] version = "0.11" @@ -29,9 +30,7 @@ default-features = false features = ["memory", "process"] [dependencies.pyo3] -version = "0.17" -default-features = false -features = [] +version = "0.18" [target.'cfg(target_os = "linux")'.dependencies] cgroups-rs = "0.3.0" @@ -40,8 +39,9 @@ cgroups-rs = "0.3.0" proptest = "1.0" proc-maps = "0.3.0" tempfile = "3.3.0" +rusty-fork = "0.3.0" [features] default = [] # Optimize for the production version of Fil. -fil4prod = [] \ No newline at end of file +fil4prod = [] diff --git a/memapi/src/flamegraph.rs b/memapi/src/flamegraph.rs index 334a9e1b..ad93cbfe 100644 --- a/memapi/src/flamegraph.rs +++ b/memapi/src/flamegraph.rs @@ -1,8 +1,13 @@ -use std::{fs, io::Write, path::Path}; +use std::{borrow::Cow, fs, io::Write, path::Path}; use inferno::flamegraph; use itertools::Itertools; +use crate::{ + linecache::LineCacher, + memorytracking::{Callstack, FunctionLocations}, +}; + /// Filter down to top 99% of samples. /// /// 1. Empty samples are dropped. @@ -59,27 +64,179 @@ pub fn write_lines>(lines: I, path: &Path) -> std Ok(()) } -/// Write a flamegraph SVG to disk, given lines in summarized format. -pub fn write_flamegraph>( - lines: I, - path: &Path, - reversed: bool, - title: &str, - subtitle: &str, - count_name: &str, - to_be_post_processed: bool, -) -> Result<(), Box> { - let flamegraph = get_flamegraph( - lines, - reversed, - title, - subtitle, - count_name, - to_be_post_processed, - )?; - let mut file = std::fs::File::create(path)?; - file.write_all(&flamegraph)?; - Ok(()) +/// A strategy for cleaning up callstacks before rendering them to text. +pub trait CallstackCleaner { + fn cleanup<'a>(&self, callstack: &'a Callstack) -> Cow<'a, Callstack>; +} + +/// The data needed to create a flamegraph. +pub struct FlamegraphCallstacks { + data: D, + functions: FL, + callstack_cleaner: UC, +} + +impl<'a, D, FL, UC> FlamegraphCallstacks +where + &'a D: IntoIterator, + <&'a D as IntoIterator>::IntoIter: ExactSizeIterator, + D: 'a, + FL: FunctionLocations, + UC: CallstackCleaner, +{ + pub fn new(data: D, functions: FL, callstack_cleaner: UC) -> Self { + Self { + data, + functions, + callstack_cleaner, + } + } + + /// Create iterator over the line-based string format parsed by the inferno + /// crate. + pub fn to_lines( + &'a self, + to_be_post_processed: bool, + ) -> impl ExactSizeIterator + 'a { + let by_call = (&self.data).into_iter(); + let mut linecache = LineCacher::default(); + by_call.map(move |(callstack, size)| { + format!( + "{} {}", + self.callstack_cleaner.cleanup(callstack).as_string( + to_be_post_processed, + &self.functions, + ";", + &mut linecache, + ), + size, + ) + }) + } + + /// Low-level interface for writing flamegraphs with post-processing: + pub fn get_flamegraph_with_options( + &'a self, + to_be_post_processed: bool, + options: flamegraph::Options, + // special cased because it needs to be psot-processed: + subtitle: Option<&str>, + ) -> Result, Box> { + let lines = self.to_lines(to_be_post_processed); + get_flamegraph_with_options(lines, to_be_post_processed, options, subtitle) + } + + /// Write a flamegraph SVG to disk, given lines in summarized format. + pub fn get_flamegraph( + &'a self, + reversed: bool, + title: &str, + subtitle: &str, + count_name: &str, + to_be_post_processed: bool, + ) -> Result, Box> { + get_flamegraph( + self.to_lines(to_be_post_processed), + reversed, + title, + subtitle, + count_name, + to_be_post_processed, + ) + } + + /// Write a flamegraph SVG to disk. + pub fn write_flamegraph( + &'a self, + path: &Path, + reversed: bool, + title: &str, + subtitle: &str, + count_name: &str, + to_be_post_processed: bool, + ) -> Result<(), Box> { + let flamegraph = + self.get_flamegraph(reversed, title, subtitle, count_name, to_be_post_processed)?; + let mut file = std::fs::File::create(path)?; + file.write_all(&flamegraph)?; + Ok(()) + } + + /// Write .prof, -source.prof, .svg and -reversed.svg files for given lines. + pub fn write_flamegraphs( + &'a self, + directory_path: &Path, + base_filename: &str, + title: &str, + subtitle: &str, + count_name: &str, + to_be_post_processed: bool, + ) { + if !directory_path.exists() { + fs::create_dir_all(directory_path) + .expect("=fil-profile= Couldn't create the output directory."); + } else if !directory_path.is_dir() { + panic!("=fil-profile= Output path must be a directory."); + } + + let raw_path_without_source_code = directory_path.join(format!("{}.prof", base_filename)); + + let raw_path_with_source_code = + directory_path.join(format!("{}-source.prof", base_filename)); + + // Always write .prof file without source code, for use by tests and + // other automated post-processing. + if let Err(e) = write_lines(self.to_lines(false), &raw_path_without_source_code) { + eprintln!("=fil-profile= Error writing raw profiling data: {}", e); + return; + } + + // Optionally write version with source code for SVGs, if we're using + // source code. + if to_be_post_processed { + if let Err(e) = write_lines(self.to_lines(true), &raw_path_with_source_code) { + eprintln!("=fil-profile= Error writing raw profiling data: {}", e); + return; + } + } + + let svg_path = directory_path.join(format!("{}.svg", base_filename)); + match self.write_flamegraph( + &svg_path, + false, + title, + subtitle, + count_name, + to_be_post_processed, + ) { + Ok(_) => { + eprintln!("=fil-profile= Wrote flamegraph to {:?}", svg_path); + } + Err(e) => { + eprintln!("=fil-profile= Error writing SVG: {}", e); + } + } + let svg_path = directory_path.join(format!("{}-reversed.svg", base_filename)); + match self.write_flamegraph( + &svg_path, + true, + title, + subtitle, + count_name, + to_be_post_processed, + ) { + Ok(_) => { + eprintln!("=fil-profile= Wrote flamegraph to {:?}", svg_path); + } + Err(e) => { + eprintln!("=fil-profile= Error writing SVG: {}", e); + } + } + if to_be_post_processed { + // Don't need this file, and it'll be quite big, so delete it. + let _ = std::fs::remove_file(raw_path_with_source_code); + } + } } /// Low-level interface for writing flamegraphs with post-processing: @@ -146,86 +303,6 @@ pub fn get_flamegraph>( get_flamegraph_with_options(lines, to_be_post_processed, options, Some(subtitle)) } -/// Write .prof, -source.prof, .svg and -reversed.svg files for given lines. -pub fn write_flamegraphs( - directory_path: &Path, - base_filename: &str, - title: &str, - subtitle: &str, - count_name: &str, - to_be_post_processed: bool, - get_lines: F, -) where - I: IntoIterator, - F: Fn(bool) -> I, // (to_be_post_processed) -> lines -{ - if !directory_path.exists() { - fs::create_dir_all(directory_path) - .expect("=fil-profile= Couldn't create the output directory."); - } else if !directory_path.is_dir() { - panic!("=fil-profile= Output path must be a directory."); - } - - let raw_path_without_source_code = directory_path.join(format!("{}.prof", base_filename)); - - let raw_path_with_source_code = directory_path.join(format!("{}-source.prof", base_filename)); - - // Always write .prof file without source code, for use by tests and - // other automated post-processing. - if let Err(e) = write_lines(get_lines(false), &raw_path_without_source_code) { - eprintln!("=fil-profile= Error writing raw profiling data: {}", e); - return; - } - - // Optionally write version with source code for SVGs, if we're using - // source code. - if to_be_post_processed { - if let Err(e) = write_lines(get_lines(true), &raw_path_with_source_code) { - eprintln!("=fil-profile= Error writing raw profiling data: {}", e); - return; - } - } - - let svg_path = directory_path.join(format!("{}.svg", base_filename)); - match write_flamegraph( - get_lines(to_be_post_processed), - &svg_path, - false, - title, - subtitle, - count_name, - to_be_post_processed, - ) { - Ok(_) => { - eprintln!("=fil-profile= Wrote flamegraph to {:?}", svg_path); - } - Err(e) => { - eprintln!("=fil-profile= Error writing SVG: {}", e); - } - } - let svg_path = directory_path.join(format!("{}-reversed.svg", base_filename)); - match write_flamegraph( - get_lines(to_be_post_processed), - &svg_path, - true, - title, - subtitle, - count_name, - to_be_post_processed, - ) { - Ok(_) => { - eprintln!("=fil-profile= Wrote flamegraph to {:?}", svg_path); - } - Err(e) => { - eprintln!("=fil-profile= Error writing SVG: {}", e); - } - } - if to_be_post_processed { - // Don't need this file, and it'll be quite big, so delete it. - let _ = std::fs::remove_file(raw_path_with_source_code); - } -} - #[cfg(test)] mod tests { use super::filter_to_useful_callstacks; @@ -241,7 +318,7 @@ mod tests { ) { let total_size : usize = allocated_sizes.iter().sum(); let total_size_99 = (99 * total_size) / 100; - let callstacks = (&allocated_sizes).iter().enumerate(); + let callstacks = allocated_sizes.iter().enumerate(); let filtered : HashMap = filter_to_useful_callstacks(callstacks, total_size).collect(); let filtered_size :usize = filtered.values().into_iter().sum(); if filtered_size >= total_size_99 { diff --git a/memapi/src/lib.rs b/memapi/src/lib.rs index 22cbf9a0..eddf7c7f 100644 --- a/memapi/src/lib.rs +++ b/memapi/src/lib.rs @@ -5,7 +5,7 @@ pub mod linecache; pub mod memorytracking; pub mod mmap; pub mod oom; -mod python; +pub mod python; mod rangemap; pub mod util; diff --git a/memapi/src/linecache.rs b/memapi/src/linecache.rs index 673a1ba9..5ddfe232 100644 --- a/memapi/src/linecache.rs +++ b/memapi/src/linecache.rs @@ -1,72 +1,72 @@ -//! A line caching library, a bit like Python's linecache. +//! A Rust wrapper around Python's linecache. We can't just emulate it because +//! PEP 302 `__loader__`s and ipython shoving stuff into it and oh god oh god oh +//! god Python is complicated. -use std::{ - collections::HashMap, - fs::File, - io::{BufRead, BufReader}, -}; +use crate::python; -use ahash::RandomState; - -/// Store a cache of files in memory, allow easy reading of lines. +/// Wrapper around Python's linecache. #[derive(Default)] -pub struct LineCacher { - file_lines: HashMap, RandomState>, -} - -static EMPTY_STRING: String = String::new(); +pub struct LineCacher {} impl LineCacher { - /// Get the source code line for the given file. If the file doesn't exist - /// or line number is too big, an empty string is returned. Line endings are - /// stripped. - pub fn get_source_line<'a>(&'a mut self, filename: &str, line_number: usize) -> &'a str { + /// Get the source code line for the given file. + pub fn get_source_line(&mut self, filename: &str, line_number: usize) -> String { if line_number == 0 { - return &EMPTY_STRING; + return String::new(); } - let entry = - self.file_lines - .entry(filename.to_string()) - .or_insert_with(|| -> Vec { - File::open(filename) - .map(|f| { - BufReader::new(f) - .lines() - .map(|l| l.unwrap_or_else(|_| EMPTY_STRING.clone())) - .collect() - }) - .unwrap_or_else(|_| vec![]) - }); - entry.get(line_number - 1).unwrap_or(&EMPTY_STRING) + python::get_source_line(filename, line_number).unwrap_or_default() } } #[cfg(test)] mod tests { + use super::*; + use pyo3::prelude::*; + use rusty_fork::rusty_fork_test; use std::io::Write; - use super::LineCacher; + rusty_fork_test! { + /// The linecache can read files. + #[test] + fn linecacher_from_file() { + pyo3::prepare_freethreaded_python(); + let mut cache = LineCacher::default(); - #[test] - fn linecache() { - let mut cache = LineCacher::default(); + // Non-existent file + assert_eq!(cache.get_source_line("/no/such/file", 1), ""); - // Non-existent file - assert_eq!(cache.get_source_line("/no/such/file", 1), ""); + let mut f = tempfile::NamedTempFile::new().unwrap(); + f.as_file_mut().write_all(b"abc\ndef\r\nghijk").unwrap(); + let path = f.path().as_os_str().to_str().unwrap(); - let mut f = tempfile::NamedTempFile::new().unwrap(); - f.as_file_mut().write_all(b"abc\ndef\r\nghijk").unwrap(); - let path = f.path().as_os_str().to_str().unwrap(); + // 0 line number + assert_eq!(cache.get_source_line(path, 0), ""); - // 0 line number - assert_eq!(cache.get_source_line(path, 0), ""); + // Too high line number + assert_eq!(cache.get_source_line(path, 4), ""); - // Too high line number - assert_eq!(cache.get_source_line(path, 4), ""); + // Present line numbers + assert_eq!(cache.get_source_line(path, 1), "abc\n"); + assert_eq!(cache.get_source_line(path, 2), "def\n"); + assert_eq!(cache.get_source_line(path, 3), "ghijk\n"); + } - // Present line numbers - assert_eq!(cache.get_source_line(path, 1), "abc"); - assert_eq!(cache.get_source_line(path, 2), "def"); - assert_eq!(cache.get_source_line(path, 3), "ghijk"); + /// The linecache can read random crap shoved into the linecache module. + #[test] + fn linecacher_from_arbitrary_source() { + pyo3::prepare_freethreaded_python(); + let mut cache = LineCacher::default(); + + Python::with_gil(|py| { + let blah = vec!["arr\n", "boo"]; + let linecache = PyModule::import(py, "linecache")?; + linecache + .getattr("cache")?.set_item("blah", (8, 0, blah, "blah"))?; + Ok::<(), PyErr>(()) + }).unwrap(); + + assert_eq!(cache.get_source_line("blah", 1), "arr\n"); + assert_eq!(cache.get_source_line("blah", 2), "boo"); + } } } diff --git a/memapi/src/memorytracking.rs b/memapi/src/memorytracking.rs index 998a94f8..0bf41b4b 100644 --- a/memapi/src/memorytracking.rs +++ b/memapi/src/memorytracking.rs @@ -1,5 +1,6 @@ use crate::flamegraph::filter_to_useful_callstacks; -use crate::flamegraph::write_flamegraphs; +use crate::flamegraph::CallstackCleaner; +use crate::flamegraph::FlamegraphCallstacks; use crate::linecache::LineCacher; use crate::python::get_runpy_path; @@ -13,7 +14,6 @@ use serde::Serialize; use std::borrow::Cow; use std::collections::BTreeMap; use std::collections::HashMap; -use std::path::Path; extern "C" { fn _exit(exit_code: std::os::raw::c_int); @@ -41,27 +41,32 @@ struct FunctionLocation { function_name: String, } +/// The clone should be cheap, ideally, so probably an immutable data structures +/// would be good. pub trait FunctionLocations { fn get_function_and_filename(&self, id: FunctionId) -> (&str, &str); + + // Like Clone.clone(), but should be cheap. + fn cheap_clone(&self) -> Self; } /// Stores FunctionLocations, returns a FunctionId #[derive(Clone)] pub struct VecFunctionLocations { - functions: Vec, + functions: ImVector, } impl VecFunctionLocations { /// Create a new tracker. pub fn new() -> Self { Self { - functions: Vec::with_capacity(8192), + functions: ImVector::new(), } } /// Register a function, get back its id. pub fn add_function(&mut self, filename: String, function_name: String) -> FunctionId { - self.functions.push(FunctionLocation { + self.functions.push_back(FunctionLocation { filename, function_name, }); @@ -80,6 +85,12 @@ impl FunctionLocations for VecFunctionLocations { let location = &self.functions[id.0 as usize]; (&location.function_name, &location.filename) } + + fn cheap_clone(&self) -> Self { + Self { + functions: self.functions.clone(), + } + } } /// Either the line number, or the bytecode index needed to get it. @@ -186,10 +197,10 @@ impl Callstack { callstack_id } - pub fn as_string( + pub fn as_string( &self, to_be_post_processed: bool, - functions: &dyn FunctionLocations, + functions: &FL, separator: &'static str, linecache: &mut LineCacher, ) -> String { @@ -224,12 +235,17 @@ impl Callstack { // which I can't be bothered to fix right now, so for // now do hack where we shove in some other character // that can be fixed in post-processing. - let code = code.replace(" ", "\u{12e4}"); + let code = code.trim_end().replace(' ', "\u{12e4}"); + // Tabs == 8 spaces in Python. + let code = code.replace( + '\t', + "\u{12e4}\u{12e4}\u{12e4}\u{12e4}\u{12e4}\u{12e4}\u{12e4}\u{12e4}", + ); // Semicolons are used as separator in the flamegraph // input format, so need to replace them with some other // character. We use "full-width semicolon", and then // replace it back in post-processing. - let code = code.replace(";", "\u{ff1b}"); + let code = code.replace(';', "\u{ff1b}"); // The \u{2800} is to ensure we don't have empty lines, // and that whitespace doesn't get trimmed from start; // we'll get rid of this in post-processing. @@ -356,8 +372,13 @@ impl Allocation { } } -fn same_callstack(callstack: &Callstack) -> Cow { - Cow::Borrowed(callstack) +/// A CallstackCleaner that leaves the callstack unchanged. +pub struct IdentityCleaner; + +impl CallstackCleaner for IdentityCleaner { + fn cleanup<'a>(&self, callstack: &'a Callstack) -> Cow<'a, Callstack> { + Cow::Borrowed(callstack) + } } /// The main data structure tracking everything. @@ -381,7 +402,7 @@ pub struct AllocationTracker { current_allocated_bytes: usize, peak_allocated_bytes: usize, // Default directory to write out data lacking other info: - default_path: String, + pub default_path: String, // Allocations that somehow disappeared. Not relevant for sampling profiler. missing_allocated_bytes: usize, @@ -597,11 +618,12 @@ impl AllocationTracker { /// Combine Callstacks and make them human-readable. Duplicate callstacks /// have their allocated memory summed. - fn combine_callstacks( - &self, + pub fn combine_callstacks( + &mut self, // If false, will do the current allocations: peak: bool, - ) -> HashMap { + callstack_cleaner: CC, + ) -> FlamegraphCallstacks, FL, CC> { // Would be nice to validate if data is consistent. However, there are // edge cases that make it slightly inconsistent (e.g. see the // unexpected code path in add_allocation() above), and blowing up @@ -615,96 +637,24 @@ impl AllocationTracker { // flamegraph (which currently loads EVERYTHING into memory), just do // the top 99% of allocations. let callstacks = if peak { + self.check_if_new_peak(); &self.peak_memory_usage } else { &self.current_memory_usage }; let sum = callstacks.iter().sum(); - filter_to_useful_callstacks(callstacks.iter().enumerate(), sum) - .into_iter() - .map(|(k, v)| (k as CallstackId, v)) - .collect() - } - - /// Dump all callstacks in peak memory usage to various files describing the - /// memory usage. - pub fn dump_peak_to_flamegraph(&mut self, path: &str) { - self.dump_to_flamegraph(path, true, "peak-memory", "Peak Tracked Memory Usage", true); - } - - pub fn to_lines<'a, F>( - &'a self, - peak: bool, - to_be_post_processed: bool, - update_callstack: F, - ) -> impl ExactSizeIterator + '_ - where - F: Fn(&Callstack) -> Cow + 'a, - { - let by_call = self.combine_callstacks(peak).into_iter(); let id_to_callstack = self.interner.get_reverse_map(); - let mut linecache = LineCacher::default(); - by_call.map(move |(callstack_id, size)| { - format!( - "{} {}", - update_callstack(id_to_callstack.get(&callstack_id).unwrap()).as_string( - to_be_post_processed, - &self.functions, - ";", - &mut linecache, - ), - size, - ) - }) - } - - fn dump_to_flamegraph( - &mut self, - path: &str, - peak: bool, - base_filename: &str, - title: &str, - to_be_post_processed: bool, - ) { - // First, make sure peaks are correct: - self.check_if_new_peak(); - - // Print warning if we're missing allocations. - #[cfg(not(feature = "fil4prod"))] - { - let allocated_bytes = if peak { - self.peak_allocated_bytes - } else { - self.current_allocated_bytes - }; - if self.missing_allocated_bytes > 0 { - eprintln!("=fil-profile= WARNING: {:.2}% ({} bytes) of tracked memory somehow disappeared. If this is a small percentage you can just ignore this warning, since the missing allocations won't impact the profiling results. If the % is high, please run `export FIL_DEBUG=1` to get more output', re-run Fil on your script, and then file a bug report at https://github.com/pythonspeed/filprofiler/issues/new", self.missing_allocated_bytes as f64 * 100.0 / allocated_bytes as f64, self.missing_allocated_bytes); - } - if self.failed_deallocations > 0 { - eprintln!("=fil-profile= WARNING: Encountered {} deallocations of untracked allocations. A certain number are expected in normal operation, of allocations created before Fil started tracking, and even more if you're using the Fil API to turn tracking on and off.", self.failed_deallocations); - } - } - - eprintln!("=fil-profile= Preparing to write to {}", path); - let directory_path = Path::new(path); - - let title = format!( - "{} ({:.1} MiB)", - title, - self.peak_allocated_bytes as f64 / (1024.0 * 1024.0) - ); - #[cfg(not(feature = "fil4prod"))] - let subtitle = r#"Made with the Fil profiler. Try it on your code!"#; - #[cfg(feature = "fil4prod")] - let subtitle = r#"Made with the Fil4prod profiler. Try it on your code!"#; - write_flamegraphs( - directory_path, - base_filename, - &title, - subtitle, - "bytes", - to_be_post_processed, - |tbpp| self.to_lines(peak, tbpp, same_callstack), + FlamegraphCallstacks::new( + filter_to_useful_callstacks(callstacks.iter().enumerate(), sum) + .into_iter() + .filter_map(|(k, v)| { + id_to_callstack + .get(&(k as CallstackId)) + .map(|cs| ((**cs).clone(), v)) + }) + .collect(), + self.functions.cheap_clone(), + callstack_cleaner, ) } @@ -714,24 +664,6 @@ impl AllocationTracker { self.peak_memory_usage.clear(); } - /// Dump information about where we are. - pub fn oom_dump(&mut self) { - eprintln!( - "=fil-profile= We'll try to dump out SVGs. Note that no HTML file will be written." - ); - let default_path = self.default_path.clone(); - self.dump_to_flamegraph( - &default_path, - false, - "out-of-memory", - "Current allocations at out-of-memory time", - false, - ); - unsafe { - _exit(53); - } - } - /// Validate internal state is in a good state. This won't pass until /// check_if_new_peak() is called. fn validate(&self) { @@ -757,6 +689,22 @@ impl AllocationTracker { assert!(self.peak_memory_usage.iter().sum::() == self.peak_allocated_bytes); } + /// Warn of untracked allocations; only relevant if you are profiling _all_ + /// allocations, i.e. Fil but not Sciagraph. + pub fn warn_on_problems(&self, peak: bool) { + let allocated_bytes = if peak { + self.get_peak_allocated_bytes() + } else { + self.get_current_allocated_bytes() + }; + if self.missing_allocated_bytes > 0 { + eprintln!("=fil-profile= WARNING: {:.2}% ({} bytes) of tracked memory somehow disappeared. If this is a small percentage you can just ignore this warning, since the missing allocations won't impact the profiling results. If the % is high, please run `export FIL_DEBUG=1` to get more output', re-run Fil on your script, and then file a bug report at https://github.com/pythonspeed/filprofiler/issues/new", self.missing_allocated_bytes as f64 * 100.0 / allocated_bytes as f64, self.missing_allocated_bytes); + } + if self.failed_deallocations > 0 { + eprintln!("=fil-profile= WARNING: Encountered {} deallocations of untracked allocations. A certain number are expected in normal operation, of allocations created before Fil started tracking, and even more if you're using the Fil API to turn tracking on and off.", self.failed_deallocations); + } + } + /// Reset internal state in way that doesn't invalidate e.g. thread-local /// caching of callstack ID. pub fn reset(&mut self, default_path: String) { @@ -775,7 +723,7 @@ impl AllocationTracker { #[cfg(test)] mod tests { - use crate::memorytracking::{same_callstack, ProcessUid, PARENT_PROCESS}; + use crate::memorytracking::{IdentityCleaner, ProcessUid, PARENT_PROCESS}; use super::LineNumberInfo::LineNumber; use super::{ @@ -1235,7 +1183,10 @@ mod tests { "c:3 (cf) 234", "a:7 (af);b:2 (bf) 6000", ]; - let mut result2: Vec = tracker.to_lines(true, false, same_callstack).collect(); + let mut result2: Vec = tracker + .combine_callstacks(true, IdentityCleaner) + .to_lines(false) + .collect(); result2.sort(); expected2.sort(); assert_eq!(expected2, result2); diff --git a/memapi/src/python.rs b/memapi/src/python.rs index 448e3a40..61cc395d 100644 --- a/memapi/src/python.rs +++ b/memapi/src/python.rs @@ -1,7 +1,19 @@ -// Interactions with Python APIs. +//! Interactions with Python APIs. + use once_cell::sync::Lazy; use pyo3::prelude::*; -use pyo3::types::PyModule; + +// Get the source code line from a given filename. +pub fn get_source_line(filename: &str, line_number: usize) -> PyResult { + Python::with_gil(|py| { + let linecache = PyModule::import(py, "linecache")?; + let result: String = linecache + .getattr("getline")? + .call1((filename, line_number))? + .to_string(); + Ok(result) + }) +} // Return the filesystem path of the stdlib's runpy module. pub fn get_runpy_path() -> &'static str { diff --git a/tests/test_endtoend.py b/tests/test_endtoend.py index 423c1126..d5011071 100644 --- a/tests/test_endtoend.py +++ b/tests/test_endtoend.py @@ -491,7 +491,16 @@ def test_jupyter(tmpdir): assert "\tarr1, arr2 = make_".replace(" ", "\u00a0") in svg + assert ( + ">\tarr1, arr2 = make_".replace(" ", "\u00a0").replace("\t", "\u00a0" * 8) + in svg + ) # It's valid XML: ElementTree.fromstring(svg)