diff --git a/core/src/main/java/com/taobao/arthas/core/command/monitor200/ProfilerCommand.java b/core/src/main/java/com/taobao/arthas/core/command/monitor200/ProfilerCommand.java index 69c9b4737b6..85ab87efd86 100644 --- a/core/src/main/java/com/taobao/arthas/core/command/monitor200/ProfilerCommand.java +++ b/core/src/main/java/com/taobao/arthas/core/command/monitor200/ProfilerCommand.java @@ -234,6 +234,26 @@ public class ProfilerCommand extends AnnotatedCommand { */ private String timeout; + /** + * Features enabled for profiling + */ + private String features; + + /** + * Profiling signal to use + */ + private String signal; + + /* + * Clock source for sampling timestamps: monotonic or tsc + */ + private String clock; + + /* + * Normalize method names by removing unique numerical suffixes from lambda classes. + */ + private boolean norm; + private static String libPath; private static AsyncProfiler profiler = null; @@ -335,7 +355,8 @@ public void setLock(String lock) { } @Option(longName = "jfrsync") - @Description("start Java Flight Recording with the given config along with the profiler") + @Description("Start Java Flight Recording with the given config along with the profiler. " + + "Accepts a predefined profile name, a path to a .jfc file, or a list of JFR events starting with '+'. ") public void setJfrsync(String jfrsync) { this.jfrsync = jfrsync; } @@ -353,6 +374,30 @@ public void setThreads(boolean threads) { this.threads = threads; } + @Option(shortName = "F", longName = "features") + @Description("Features enabled for profiling") + public void setFeatures(String features) { + this.features = features; + } + + @Option(longName = "signal") + @Description("Set the profiling signal to use") + public void setSignal(String signal) { + this.signal = signal; + } + + @Option(longName = "clock") + @Description("Clock source for sampling timestamps: monotonic or tsc") + public void setClock(String clock) { + this.clock = clock; + } + + @Option(longName = "norm", flag = true) + @Description("Normalize method names by removing unique numerical suffixes from lambda classes.") + public void setNorm(boolean norm) { + this.norm = norm; + } + @Option(longName = "sched", flag = true) @Description("group threads by scheduling policy") public void setSched(boolean sched) { @@ -584,6 +629,15 @@ private String executeArgs(ProfilerAction action) { if (this.interval != null) { sb.append("interval=").append(this.interval).append(COMMA); } + if (this.features != null) { + sb.append("features=").append(this.features).append(COMMA); + } + if (this.signal != null) { + sb.append("signal=").append(this.signal).append(COMMA); + } + if (this.clock != null) { + sb.append("clock=").append(this.clock).append(COMMA); + } if (this.jstackdepth != null) { sb.append("jstackdepth=").append(this.jstackdepth).append(COMMA); } @@ -611,6 +665,9 @@ private String executeArgs(ProfilerAction action) { if (this.alluser) { sb.append("alluser").append(COMMA); } + if (this.norm) { + sb.append("norm").append(COMMA); + } if (this.includes != null) { for (String include : includes) { sb.append("include=").append(include).append(COMMA); diff --git a/site/docs/doc/profiler.md b/site/docs/doc/profiler.md index f6e76efac34..430937f0f3c 100644 --- a/site/docs/doc/profiler.md +++ b/site/docs/doc/profiler.md @@ -101,6 +101,7 @@ Basic events: lock wall itimer + ctimer ``` 在 linux 下面 @@ -113,6 +114,7 @@ Basic events: lock wall itimer + ctimer Java method calls: ClassName.methodName Perf events: @@ -314,31 +316,63 @@ profiler --cstack fp 此命令将收集 native 栈帧的 Frame Pointer 信息。 -## 当指定 native 函数执行时开始/停止 profiling +## 当指定 Native 函数执行时开始/停止 Profiling -使用 `--begin function` 和 `--end function` 选项在指定 native 函数被执行时让 profiling 过程启动或终止。主要用途是分析特定的 JVM 阶段,比如 GC 和安全点。需要使用特定 JVM 实现中的 native 函数名,比如 HotSpot JVM 中的 `SafepointSynchronize::begin` 和 `SafepointSynchronize::end`。 +使用 `--begin function` 和 `--end function` 选项,可以在指定的 Native 函数被执行时启动或终止性能分析。主要用途是分析特定的 JVM 阶段,比如 GC 和 Safepoint。需要使用特定 JVM 实现中的 Native 函数名,比如在 HotSpot JVM 中的 SafepointSynchronize::begin 和 SafepointSynchronize::end。 -### Time-to-safepoint profiling +### Time-to-Safepoint Profiling -选项 `--ttsp` 实际上是 `--begin SafepointSynchronize::begin --end RuntimeService::record_safepoint_synchronized` 的一个别名。它是一种约束而不是独立的 event 类型。无论选择哪种 event,profiler 都可以正常工作,但只有 VM 操作和 safepoint request 之间的事件会被记录下来。 +选项 `--ttsp` 实际上是 `--begin SafepointSynchronize::begin --end RuntimeService::record_safepoint_synchronized` 的一个别名。它是一种约束,而不是独立的事件类型。无论选择哪种事件,Profiler 都可以正常工作,但只有在 VM 操作和 Safepoint 请求之间的事件会被记录下来。 + +现在,当使用 `--ttsp` 选项并指定 JFR 输出格式时,`profiler` 会在生成的 JFR 文件中自动包含 profiler.Window 事件。这些事件表示每次 Time-to-Safepoint 暂停的时间区间,使您无需依赖 JVM 日志即可分析这些暂停。 + +示例 ```bash profiler start --begin SafepointSynchronize::begin --end RuntimeService::record_safepoint_synchronized -profiler --ttsp +profiler start --ttsp --format jfr ``` +生成的 JFR 文件将包含 profiler.Window 事件,可以使用 JDK Mission Control 等工具查看和分析这些事件。 + +**注意事项:** + +- profiler.Window 事件是通用的事件,适用于任何使用 --begin 和 --end 触发器的时间窗口,不仅限于 Safepoint 暂停。 + +- 在分析长时间的 Safepoint 暂停时,profiler.Window 事件可帮助您识别造成延迟的原因。 + +- 当使用 --ttsp 选项时,请确保使用 JFR 输出格式,以便能够生成并查看 profiler.Window 事件。 + ## 使用 profiler 记录的 event 生成 JFR 文件 用 `--jfrsync CONFIG` 选项可以指定配置启动 Java Flight Recording,输出的 jfr 文件会包含所有常规的 JFR event,但采样的来源是由 profiler 提供的。 -`CONFIG` 选项可以是 `profile`,表示使用在 `$JAVA_HOME/lib/jfr` 目录下预置的“profile”配置,也可以是自定义的 JFR 配置文件(.jfc),此选项的值采用与 [JFR.start 命令的 settings 选项](https://docs.oracle.com/en/java/javase/17/docs/specs/man/jcmd.html) 相同的格式。 +CONFIG 参数: + +- 预置配置:CONFIG 可以是 profile,表示使用 $JAVA_HOME/lib/jfr 目录下预置的 profile 配置。 +- 自定义配置文件:CONFIG 也可以是自定义的 JFR 配置文件(.jfc),此选项的值采用与 jcmd JFR.start 命令的 settings 选项相同的格式。 +- 指定 JFR 事件列表:现在,可以直接在 --jfrsync 中指定要启用的 JFR 事件列表,而无需创建 .jfc 文件。要指定事件列表,请以 + 开头,多个事件用 + 分隔。 -比如,以下命令使用“profile”配置启动 JFR: +示例: + +使用预置的 profile 配置启动 JFR: ```bash profiler start -e cpu --jfrsync profile -f combined.jfr ``` +直接指定 JFR 事件列表,例如启用 jdk.YoungGarbageCollection 和 jdk.OldGarbageCollection 事件: + +```bash +profiler start -e cpu --jfrsync +jdk.YoungGarbageCollection+jdk.OldGarbageCollection -f combined.jfr +``` + +**注意事项** + +- 当指定事件列表时,由于逗号 , 用于分隔不同的选项,因此事件之间使用加号 + 分隔。 +- 如果 --jfrsync 参数不以 + 开头,则被视为预置配置名或 .jfc 配置文件的路径。 +- 直接指定事件列表在目标应用运行在容器中时特别有用,无需额外的文件操作。 + ## 周期性保存结果 使用 `--loop TIME` 可以持续运行 profiler 并周期性保存结果。选项格式可以是具体时间 hh:mm:ss 或以秒、分钟、小时或天计算的时间间隔。需要确保指定的输出文件名中包含时间戳,否则每次输出的结果都会覆盖上次保存的结果。以下命令持续执行 profiling 并将每个小时内的记录保存到一个 jfr 文件中。 @@ -369,3 +403,117 @@ Linux 平台: 这个新功能仅在 Linux 平台上有效。macOS 上的 CPU 分 ```bash profiler start -e cpu -i 10 --wall 100 -f out.jfr ``` + +## `ctimer`事件 + +`ctimer` 事件是一种新的 CPU 采样模式,基于 `timer_create`,提供了无需 `perf_events` 的精确 CPU 采样。 + +在某些情况下,`perf_events` 可能不可用,例如由于 `perf_event_paranoid` 设置或 `seccomp` 限制,或者在容器环境中。虽然 itimer 事件可以在容器中工作,但可能存在采样不准确的问题。 + +`ctimer` 事件结合了 `cpu` 和 `itimer` 的优点: + +- 高准确性:提供精确的 CPU 采样。 +- 容器友好:默认在容器中可用。 +- 低资源消耗:不消耗文件描述符。 + +**请注意,`ctimer` 事件目前仅在 `Linux` 上支持,不支持 `macOS`。** +可参考 [async-profiler Github Issues](https://github.com/async-profiler/async-profiler/issues/855) 了解更多信息。 + +示例: + +```bash +profiler start -e ctimer -o jfr -f ./out-test.jfr +``` + +## `vtable`特性 + +在某些应用程序中,大量的 CPU 时间花费在调用 `megamorphic` 的虚方法或接口方法上,这在性能分析中显示为 `vtable stub` 或 `itable stub`。这无法帮助我们了解特定调用点为何是`megamorphic` 以及如何优化它。 + +vtable 特性可以在` vtable stub` 或 `itable stub` 之上添加一个伪帧,显示实际调用的对象类型。这有助于清楚地了解在特定调用点,不同接收者的比例。 + +该特性默认禁用,可以通过 `-F vtable` 选项启用(或使用 `features=vtable`)。 +可参考 [async-profiler Github Issues](https://github.com/async-profiler/async-profiler/issues/736) 了解更多信息。 + +示例: + +```bash +profiler start -F vtable +``` + +## `comptask` 特性 + +`profiler` 采样 JIT 编译器线程以及 Java 线程,可以显示 JIT 编译所消耗的 CPU 百分比。然而,Java 方法的编译资源消耗各不相同,了解哪些特定的 Java 方法在编译时消耗最多的 CPU 时间非常有用。 + +`comptask` 特性可以在 `C1/C2` 的堆栈跟踪中添加一个虚拟帧,显示当前正在编译的任务,即正在编译的 Java 方法。 + +该特性默认禁用,可以通过` -F comptask` 选项启用(或使用 `features=comptask`)。 +可参考 [async-profiler Github Issues](https://github.com/async-profiler/async-profiler/issues/777) 了解更多信息。 + +示例: + +```bash +profiler start -F comptask +``` + +## 配置替代的分析信号 + +`profiler` 使用 `POSIX` 信号来进行性能分析。默认情况下,`SIGPROF` 用于 `CPU` 分析,`SIGVTALRM` 用于 `Wall-Clock` 分析。然而,如果应用程序也使用这些信号,或者希望同时运行多个 `profiler` 实例,这可能会导致信号冲突。 + +现在,可以使用 `signal` 参数来配置用于分析的信号,以避免冲突。 +可参考 [async-profiler Github Issues](https://github.com/async-profiler/async-profiler/issues/759) 了解更多信息。 + +语法 + +```bash +profiler start --signal <信号号码> +``` + +如果需要分别指定 CPU 和 Wall-Clock 分析的信号,可以使用以下语法: + +```bash +profiler start --signal / +``` + +## `--clock` 选项 + +`--clock` 选项允许用户控制用于采样时间戳的时钟源。这对于需要将 `profiler` 的数据与其他工具的数据进行时间戳对齐的场景非常有用。 + +用法 + +```bash +profiler start --clock +``` + +参数 + +- `tsc`:使用 CPU 的时间戳计数器(`RDTSC`)。这是默认选项,提供高精度的时间戳。 +- `monotonic`:使用操作系统的单调时钟(`CLOCK_MONOTONIC`)。这有助于在多种数据源之间对齐时间戳。 + 可参考 [async-profiler Github Issues](https://github.com/async-profiler/async-profiler/issues/723) 了解更多信息。 + +示例 : + +使用 `CLOCK_MONOTONIC` 作为时间戳源: + +```bash +profiler start --clock monotonic +``` + +**注意事项:** + +- 当需要将 `profiler` 的数据与其他使用 `CLOCK_MONOTONIC` 的工具(例如 `perf`)的数据进行对齐时,使用 `--clock monotonic`。 +- 在使用 `jfrsync` 模式时,请谨慎使用 `--clock` 选项,因为 JVM 和 `profiler` 可能使用不同的时间戳源,这可能导致结果不一致。 + +## `--norm` 选项 + +在 Java 20 及更早的版本中,编译器为 `lambda` 表达式生成的方法名称包含唯一的数字后缀。例如,同一代码位置定义的 `lambda` 表达式,可能会生成多个不同的帧名称,因为每个 `lambda` 方法的名称都会附加一个唯一的数字后缀(如 `lambda$method$0`、`lambda$method$1` 等)。这会导致逻辑上相同的堆栈无法在火焰图中合并,增加了性能分析的复杂性。 + +为了解决这个问题,`profiler` 新增了 `--norm` 选项,可以在生成输出时自动规范化方法名称,去除这些数字后缀,使相同的堆栈能够正确地合并。 +可参考 [async-profiler Github Issues](https://github.com/async-profiler/async-profiler/issues/832) 了解更多信息。 + +**示例:** + +生成规范化的火焰图: + +```bash +profiler start --norm +``` diff --git a/site/docs/en/doc/profiler.md b/site/docs/en/doc/profiler.md index 69f225e2b59..d01ad979d39 100644 --- a/site/docs/en/doc/profiler.md +++ b/site/docs/en/doc/profiler.md @@ -101,6 +101,7 @@ Basic events: lock wall itimer + ctimer ``` Under linux @@ -113,6 +114,7 @@ Basic events: lock wall itimer + ctimer Java method calls: ClassName.methodName Perf events: @@ -314,31 +316,65 @@ profiler --cstack fp The command above will collection Frame Pointer of C stacks. -## Begin or end profiling when FUNCTION is executed +## Start/Stop Profiling When a Specified Native Function is Executed -Use `--begin function` and `--end function` to automatically start/stop profiling when the specified native function is executed. Its main purpose is to profile certain JVM phases like GC and Safepoint pauses. You should use native function name defined in a JVM implement, for example `SafepointSynchronize::begin` and `SafepointSynchronize::end` in HotSpot JVM. +Using the `--begin function` and `--end function` options, you can start or stop profiling when a specified native function is executed. The main use is to analyze specific JVM phases, such as GC and Safepoint. You need to use the native function names in the specific JVM implementation, such as SafepointSynchronize::begin and SafepointSynchronize::end in HotSpot JVM. -### Time-to-safepoint profiling +### Time-to-Safepoint Profiling -The `--ttsp` option is an alias for `--begin SafepointSynchronize::begin --end RuntimeService::record_safepoint_synchronized`. It is not a separate event type, but rather a constraint. Whatever event type you choose (e.g. cpu or wall), the profiler will work as usual, except that only events between the safepoint request and the start of the VM operation will be recorded. +The option `--ttsp` is actually an alias for `--begin SafepointSynchronize::begin --end RuntimeService::record_safepoint_synchronized`. It is a constraint, not a separate event type. The Profiler will work regardless of which event is selected, but only events between VM operations and Safepoint requests will be recorded. + +`profiler` now automatically includes profiler.Window events in the generated JFR file when the `--ttsp` option is used and a JFR output format is specified. These events represent the time interval of each Time-to-Safepoint pause, allowing you to analyze these pauses without relying on JVM logs. + +Example ```bash profiler start --begin SafepointSynchronize::begin --end RuntimeService::record_safepoint_synchronized -profiler --ttsp +profiler start --ttsp --format jfr ``` -## Use events from profiler for Java Flight Recording +The generated JFR file will contain profiler.Window events, which can be viewed and analyzed using tools such as JDK Mission Control. + +**Notes:** + +- profiler.Window events are generic events that apply to any time window using the --begin and --end triggers, not just Safepoint pauses. + +- When analyzing long Safepoint pauses, profiler.Window events can help you identify the cause of delays. + +- When using the --ttsp option, make sure to use the JFR output format so that profiler.Window events can be generated and viewed. + +## Generate JFR file using events recorded by profiler + +Use `--jfrsync CONFIG` option to specify configuration to start Java Flight Recording. The output jfr file will contain all normal JFR events, but the sampling sources are provided by the profiler. + +CONFIG parameters: + +- Preset configuration: CONFIG can be profile, which means to use the preset profile configuration in the $JAVA_HOME/lib/jfr directory. + +- Custom configuration file: CONFIG can also be a custom JFR configuration file (.jfc). The value of this option uses the same format as the settings option of the jcmd JFR.start command. -Use `--jfrsync CONFIG` to start Java Flight Recording with the given configuration synchronously with the profiler. The output .jfr file will include all regular JFR events, except that execution samples will be obtained from async-profiler. This option implies -o jfr. +- Specify a list of JFR events: Now, you can directly specify the list of JFR events to be enabled in --jfrsync without creating a .jfc file. To specify a list of events, start with + and separate multiple events with +. -`CONFIG` can be `profile`, means using the predefined JFR config "profile" in `$JAVA_HOME/lib/jfr/`, or full path of a JFR configuration file (.jfc), this value has the same format with [settings option of JFR.start](https://docs.oracle.com/en/java/javase/17/docs/specs/man/jcmd.html). +Example: -For example, command below use "profile" config of JFR: +Start JFR with a preset profile configuration: ```bash profiler start -e cpu --jfrsync profile -f combined.jfr ``` +Directly specify a list of JFR events, for example, to enable jdk.YoungGarbageCollection and jdk.OldGarbageCollection events: + +```bash +profiler start -e cpu --jfrsync +jdk.YoungGarbageCollection+jdk.OldGarbageCollection -f combined.jfr +``` + +**Notes** + +- When specifying a list of events, events are separated by a plus sign + because commas , are used to separate different options. +- If the --jfrsync parameter does not start with +, it is treated as a preset profile name or a path to a .jfc configuration file. +- Directly specifying a list of events is particularly useful when the target application is running in a container, without additional file operations. + ## Run profiler in a loop Use `--loop TIME` to run profiler in a loop (continuous profiling). The argument is either a clock time (hh:mm:ss) or a loop duration in seconds, minutes, hours, or days. Make sure the filename includes a timestamp pattern, or the output will be overwritten on each iteration. The command below will run profiling endlessly and save records of each hour to a jfr file. @@ -366,3 +402,122 @@ Performance overhead: Enabling Wall clock analysis will increase performance ove ```bash profiler start -e cpu -i 10 --wall 100 -f out.jfr ``` + +## `ctimer` events + +`ctimer` events are a new CPU sampling mode based on `timer_create`, providing accurate CPU sampling without `perf_events`. + +In some cases, `perf_events` may not be available, for example due to `perf_event_paranoid` settings or `seccomp` restrictions, or in container environments. Although itimer events can work in containers, there may be sampling inaccuracies. + +`ctimer` events combine the advantages of `cpu` and `itimer`: + +- High accuracy: provides accurate CPU sampling. + +- Container-friendly: available in containers by default. + +- Low resource consumption: does not consume file descriptors. + +**Note that `ctimer` events are currently only supported on `Linux`, not `macOS`. ** +See [async-profiler Github Issues](https://github.com/async-profiler/async-profiler/issues/855) for more information. + +Example: + +```bash +profiler start -e ctimer -o jfr -f ./out-test.jfr +``` + +## `vtable` Feature + +In some applications, a lot of CPU time is spent in calling `megamorphic` virtual or interface methods, which is shown as `vtable stub` or `itable stub` in performance analysis. This does not help us understand why a specific call site is `megamorphic` and how to optimize it. + +The vtable feature can add a pseudo frame on top of the `vtable stub` or `itable stub`, showing the actual object type being called. This helps to clearly understand the ratio of different receivers at a specific call site. + +This feature is disabled by default and can be enabled with the `-F vtable` option (or using `features=vtable`). +See the [async-profiler Github Issues](https://github.com/async-profiler/async-profiler/issues/736) for more information. + +Example: + +```bash +profiler start -F vtable +``` + +## `comptask` feature + +`profiler` samples the JIT compiler threads as well as the Java threads, and can show the percentage of CPU consumed by JIT compilation. However, the compilation resource consumption of Java methods varies, and it is useful to know which specific Java methods consume the most CPU time when compiling. + +The `comptask` feature adds a virtual frame to the stack trace of `C1/C2`, showing the current task being compiled, that is, the Java method being compiled. + +This feature is disabled by default and can be enabled with the `-F comptask` option (or using `features=comptask`). +See [async-profiler Github Issues](https://github.com/async-profiler/async-profiler/issues/777) for more information. + +Example: + +```bash +profiler start -F comptask +``` + +## Configuring Alternative Profiling Signals + +`profiler` uses `POSIX` signals for performance profiling. By default, `SIGPROF` is used for `CPU` profiling and `SIGVTALRM` is used for `Wall-Clock` profiling. However, this can lead to signal conflicts if your application also uses these signals or if you want to run multiple `profiler` instances simultaneously. + +You can now use the `signal` parameter to configure the signal used for profiling to avoid conflicts. + +See [async-profiler Github Issues](https://github.com/async-profiler/async-profiler/issues/759) for more information. + +Syntax + +```bash +profiler start --signal +``` + +If you need to specify the signal for CPU and Wall-Clock analysis separately, you can use the following syntax: + +```bash +profiler start --signal / +``` + +## `--clock` option + +The `--clock` option allows the user to control the clock source used for sampling timestamps. This is useful for scenarios where you need to align the timestamps of `profiler` data with data from other tools. + +Usage + +```bash +profiler start --clock +``` + +Parameters + +- `tsc`: Use the CPU's timestamp counter (`RDTSC`). This is the default option and provides high-precision timestamps. + +- `monotonic`: Use the operating system's monotonic clock (`CLOCK_MONOTONIC`). This helps align timestamps between multiple data sources. + See [async-profiler Github Issues](https://github.com/async-profiler/async-profiler/issues/723) for more information. + +Example: + +Using `CLOCK_MONOTONIC` as timestamp source: + +```bash +profiler start --clock monotonic +``` + +**Notes:** + +- Use `--clock monotonic` when you need to align `profiler` data with data from other tools that use `CLOCK_MONOTONIC` (e.g. `perf`). + +- Use `--clock` option with caution when using `jfrsync` mode, as the JVM and `profiler` may use different timestamp sources, which may lead to inconsistent results. + +## `--norm` option + +In Java 20 and earlier, the method names generated by the compiler for `lambda` expressions contain a unique numeric suffix. For example, a `lambda` expression defined in the same code location may generate multiple different frame names, because each `lambda` method name is appended with a unique numeric suffix (such as `lambda$method$0`, `lambda$method$1`, etc.). This causes logically identical stacks to not be merged in the flame graph, increasing the complexity of performance analysis. + +To solve this problem, `profiler` has added a `--norm` option that automatically normalizes method names when generating output, removes these numeric suffixes, and enables identical stacks to be merged correctly. +Please refer to [async-profiler Github Issues](https://github.com/async-profiler/async-profiler/issues/832) for more information. + +**Example:** + +Generate a normalized flame graph: + +```bash +profiler start --norm +```