From c8d1d94e772619a4bc1a5e49fdef2dc13459d019 Mon Sep 17 00:00:00 2001 From: Pedro Henrique Penna Date: Fri, 22 Mar 2024 16:29:48 -0700 Subject: [PATCH] [ci] Enhancement: Perf Workflow --- .github/workflows/perf.yml | 34 +++++ tools/plot.py | 270 +++++++++++++++++++++++++++++++++++++ 2 files changed, 304 insertions(+) create mode 100644 .github/workflows/perf.yml create mode 100644 tools/plot.py diff --git a/.github/workflows/perf.yml b/.github/workflows/perf.yml new file mode 100644 index 0000000000..98c4ab6fc4 --- /dev/null +++ b/.github/workflows/perf.yml @@ -0,0 +1,34 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +name: Performance Analysis + +on: + push: + branches: + - bugfix-* + - enhancement-* + - feature-* + - workaround-* + - dev + - unstable + - main + workflow_dispatch: + +jobs: + plot: + name: Plot + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Plot Performance + run: | + pip install --pre azure-data-tables azure-storage-blob pandas matplotlib + python3 tools/plot.py \ + --connection "${{ secrets.AZURE_STORAGE_CONNECTION_STRING }}" \ + --table ${{ secrets.AZURE_STORAGE_TABLE_NAME }} \ + --key ${{ secrets.AZURE_STORAGE_KEY }} \ + --container ${{ secrets.AZURE_STORAGE_CONTAINER }} \ + --no-plot + diff --git a/tools/plot.py b/tools/plot.py new file mode 100644 index 0000000000..914de6b96a --- /dev/null +++ b/tools/plot.py @@ -0,0 +1,270 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +import datetime +import fnmatch +import json +import subprocess +from typing import List +from azure.data.tables import TableServiceClient +import pandas +import matplotlib.pyplot as plt +import argparse +from azure.storage.blob import BlobClient + +# ===================================================================================================================== + + +# Drives the program. +def main(): + # Read arguments from command line and parse them. + args: argparse.Namespace = __read_args() + + # Extract optionss. + table_name: str = args.table + container_name: str = args.container + connection_str: str = args.connection + key: str = args.key + no_plot: bool = args.no_plot + + __plot_performance(table_name=table_name, container_name=container_name, + connection_str=connection_str, key=key, no_plot=no_plot) + + +# Reads and parses command line arguments. +def __read_args() -> argparse.Namespace: + description: str = "CI Utility for pllot performance statistics of Demikernel." + + # Initialize parser. + parser = argparse.ArgumentParser(prog="plot.py", description=description) + + # Options related to Storage account. + parser.add_argument("--table", required=True, help="Set Azure Table to use.") + parser.add_argument("--container", required=True, help="Set Azure Blob Container to use.") + + # Options related to credentials. + parser.add_argument("--connection", required=True, help="Set connection string to access Azure Storage Account.") + parser.add_argument("--key", required=True, help="Set connection key to access Azure Storage Account.") + + parser.add_argument("--no-plot", action="store_true", help="Do not plot the performance statistics.") + + # Read arguments from command line. + return parser.parse_args() + + +# Get first commit on branch. +def get_first_commit_on_branch() -> str: + cmd = "git rev-list --max-parents=0 HEAD" + git_cmd = "bash -l -c \'{}\'".format(cmd) + git_process = subprocess.Popen( + git_cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + git_stdout, _ = git_process.communicate() + return git_stdout.replace("\n", "") + + +def check_if_merge_commit(commit_hash: str) -> bool: + cmd = "git show --format=%P -s {}".format(commit_hash) + git_cmd = "bash -l -c \'{}\'".format(cmd) + git_process = subprocess.Popen( + git_cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + git_stdout, _ = git_process.communicate() + return len(git_stdout.split()) > 1 + + +def check_if_head_commit(commit_hash: str) -> bool: + cmd = "git show --format=%H -s HEAD" + git_cmd = "bash -l -c \'{}\'".format(cmd) + git_process = subprocess.Popen( + git_cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + git_stdout, _ = git_process.communicate() + True if commit_hash == git_stdout.replace("\n", "") else False + + +def get_short_commit_hash(commit_hash: str) -> int: + cmd = "git rev-parse --short {}".format(commit_hash) + git_cmd = "bash -l -c \'{}\'".format(cmd) + git_process = subprocess.Popen( + git_cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + git_stdout, _ = git_process.communicate() + return int(git_stdout.replace("\n", ""), 16) + +# Compute distance of two commit hashes. + + +def get_distance_of_commits(commit_hash1: str) -> int: + base_commit = get_first_commit_on_branch() + cmd = "git rev-list --count {}..{}".format(base_commit, commit_hash1) + git_cmd = "bash -l -c \'{}\'".format(cmd) + git_process = subprocess.Popen( + git_cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + git_stdout, _ = git_process.communicate() + git_stdout = git_stdout.replace("\n", "") + if git_stdout == "": + git_stdout = "0" + # print(f"Distance between {base_commit} and {commit_hash1} is {git_stdout}") + return int(git_stdout) + + +def __plot_performance(table_name: str, container_name: str, connection_str: str, key: str, no_plot: bool) -> None: + # Connect to Azure table. + table_service = TableServiceClient.from_connection_string(connection_str) + table_client = table_service.get_table_client(table_name) + + # Query Azure table for statistics on the past 30 days. + base_date = datetime.datetime.now() - datetime.timedelta(days=15) + # print(f"Querying Azure Table for performance statistics since {base_date}...") + query_filter: str = f"Timestamp gt datetime'{base_date.strftime('%Y-%m-%dT%H:%M:%S.%fZ')}' and" + \ + "(LibOS eq 'catnap' or LibOS eq 'catpowder' or LibOS eq 'catnip') and (Syscall eq 'push' or Syscall eq 'pop')" + select: List[str] = ["LibOS", "JobName", "CommitHash", "Syscall", "AverageCyclesPerSyscall"] + data = table_client.query_entities(query_filter=query_filter, select=select) + + cooked_data = { + "tcp-ping-pong-server": { + "push": { + "catnap": { + "diff": [], + "cycles": [], + "commit": [] + }, + "catpowder": { + "diff": [], + "cycles": [], + "commit": [] + }, + "catnip": { + "diff": [], + "cycles": [], + "commit": [] + }, + }, + "pop": { + "catnap": { + "diff": [], + "cycles": [], + "commit": [] + }, + "catpowder": { + "diff": [], + "cycles": [], + "commit": [] + }, + "catnip": { + "diff": [], + "cycles": [], + "commit": [] + }, + }, + }, + "tcp-ping-pong-client": { + "push": { + "catnap": { + "diff": [], + "cycles": [], + "commit": [] + }, + "catpowder": { + "diff": [], + "cycles": [], + "commit": [] + }, + "catnip": { + "diff": [], + "cycles": [], + "commit": [] + }, + }, + "pop": { + "catnap": { + "diff": [], + "cycles": [], + "commit": [] + }, + "catpowder": { + "diff": [], + "cycles": [], + "commit": [] + }, + "catnip": { + "diff": [], + "cycles": [], + "commit": [] + }, + }, + } + } + + job_types = ["tcp-ping-pong-server", "tcp-ping-pong-client"] + syscalls = ["push", "pop"] + libos_types = ["catnap", "catpowder", "catnip"] + + # Hashtable of commits. + commits = {} + head_commit = get_first_commit_on_branch() + + # Parse queried data. + for row in data: + for job_type in job_types: + if fnmatch.fnmatch(row["JobName"], f"*{job_type}*"): + for syscall in syscalls: + if syscall in row["Syscall"]: + for libos_type in libos_types: + if libos_type in row["LibOS"]: + hash = row["CommitHash"] + if check_if_merge_commit(hash) or check_if_head_commit(hash): + # check if we have already processed this commit + if not (libos_type, hash, syscall) in commits: + if check_if_head_commit(hash): + print(f"Processing head commit {hash}...") + else: + print(f"Processing merge commit {hash}...") + commits[(libos_type, hash, syscall)] = True + cooked_data[job_type][syscall][libos_type]["diff"].append( + get_distance_of_commits(hash)) + cooked_data[job_type][syscall][libos_type]["cycles"].append( + row["AverageCyclesPerSyscall"]) + cooked_data[job_type][syscall][libos_type]["commit"].append( + get_short_commit_hash(hash)) + + # print(json.dumps(cooked_data, indent=4)) + for job_type in job_types: + for syscall in syscalls: + catpowder_df = pandas.DataFrame(cooked_data[job_type][syscall]["catpowder"]) + catpowder_df.sort_values(by=['diff'], inplace=True) + catnap_df = pandas.DataFrame(cooked_data[job_type][syscall]["catnap"]) + catnap_df.sort_values(by=['diff'], inplace=True) + catnip_df = pandas.DataFrame(cooked_data[job_type][syscall]["catnip"]) + catnip_df.sort_values(by=['diff'], inplace=True) + df = pandas.merge(catpowder_df, catnap_df, on=["commit", "diff"]) + df = pandas.merge(df, catnip_df, on=["commit", "diff"]) + df.columns = ["Diff", "Catpowder", "Commit", "Catnap", "Catnip"] + new_order = ["Diff", "Commit", "Catnap", "Catpowder", "Catnip"] + df = df.reindex(columns=new_order) + if not df.empty: + if not no_plot: + df.plot(x="Diff", y=["Catpowder", "Catnap", "Catnip"], + kind="line", marker='o', + title=f"Performance for {syscall.capitalize()}()", + xlabel="Commit Hash", + ylabel="Average Cycles Spent in Syscall", + legend=True, ylim=(0, 5000)) + plt.xticks(rotation=90, ticks=df["Diff"], labels=df["Commit"]) + plt.savefig(f"{job_type}-{syscall}.png", bbox_inches='tight', dpi=300) + upload_image_to_blob("demikernel", key, container_name, + f"{head_commit}-{job_type}-{syscall}.png", f"{job_type}-{syscall}.png") + else: + print(f"\nPerformance for {syscall.capitalize()}() in {job_type}:") + print(df) + + +def upload_image_to_blob(account_name, account_key, container_name, blob_name, image_path): + blob_client = BlobClient(account_url=f"https://{account_name}.blob.core.windows.net", + container_name=container_name, + blob_name=blob_name, + credential=account_key) + + with open(image_path, 'rb') as f: + blob_client.upload_blob(f, overwrite=True) + + +if __name__ == "__main__": + main()