-
Notifications
You must be signed in to change notification settings - Fork 125
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
310 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
# Copyright (c) Microsoft Corporation. | ||
# Licensed under the MIT license. | ||
|
||
name: Performance Analysis | ||
|
||
on: | ||
push: | ||
branches: | ||
- bugfix-* | ||
- enhancement-* | ||
- feature-* | ||
- workaround-* | ||
- dev | ||
- unstable | ||
- main | ||
workflow_dispatch: | ||
|
||
jobs: | ||
plot: | ||
name: Plot | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Checkout | ||
uses: actions/checkout@v4 | ||
- name: Plot Performance | ||
run: | | ||
pip install --pre azure-data-tables azure-storage-blob pandas matplotlib | ||
python3 tools/plot.py \ | ||
--connection "${{ secrets.AZURE_STORAGE_CONNECTION_STRING }}" \ | ||
--table ${{ secrets.AZURE_STORAGE_TABLE_NAME }} \ | ||
--key ${{ secrets.AZURE_STORAGE_KEY }} \ | ||
--container ${{ secrets.AZURE_STORAGE_CONTAINER }} \ | ||
--no-plot | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,276 @@ | ||
# Copyright (c) Microsoft Corporation. | ||
# Licensed under the MIT license. | ||
|
||
import datetime | ||
import fnmatch | ||
import json | ||
import subprocess | ||
from typing import List | ||
from azure.data.tables import TableServiceClient | ||
import pandas | ||
import matplotlib.pyplot as plt | ||
import argparse | ||
from azure.storage.blob import BlobClient | ||
|
||
# ===================================================================================================================== | ||
|
||
|
||
# Drives the program. | ||
def main(): | ||
# Read arguments from command line and parse them. | ||
args: argparse.Namespace = __read_args() | ||
|
||
# Extract optionss. | ||
table_name: str = args.table | ||
container_name: str = args.container | ||
connection_str: str = args.connection | ||
key: str = args.key | ||
no_plot: bool = args.no_plot | ||
|
||
__plot_performance(table_name=table_name, container_name=container_name, | ||
connection_str=connection_str, key=key, no_plot=no_plot) | ||
|
||
|
||
# Reads and parses command line arguments. | ||
def __read_args() -> argparse.Namespace: | ||
description: str = "CI Utility for pllot performance statistics of Demikernel." | ||
|
||
# Initialize parser. | ||
parser = argparse.ArgumentParser(prog="plot.py", description=description) | ||
|
||
# Options related to Storage account. | ||
parser.add_argument("--table", required=True, help="Set Azure Table to use.") | ||
parser.add_argument("--container", required=True, help="Set Azure Blob Container to use.") | ||
|
||
# Options related to credentials. | ||
parser.add_argument("--connection", required=True, help="Set connection string to access Azure Storage Account.") | ||
parser.add_argument("--key", required=True, help="Set connection key to access Azure Storage Account.") | ||
|
||
parser.add_argument("--no-plot", action="store_true", help="Do not plot the performance statistics.") | ||
|
||
# Read arguments from command line. | ||
return parser.parse_args() | ||
|
||
|
||
# Get first commit on branch. | ||
def get_first_commit_on_branch() -> str: | ||
cmd = "git rev-list --max-parents=0 HEAD" | ||
git_cmd = "bash -l -c \'{}\'".format(cmd) | ||
git_process = subprocess.Popen( | ||
git_cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||
git_stdout, _ = git_process.communicate() | ||
return git_stdout.replace("\n", "") | ||
|
||
|
||
def check_if_merge_commit(commit_hash: str) -> bool: | ||
cmd = "git show --format=%P -s {}".format(commit_hash) | ||
git_cmd = "bash -l -c \'{}\'".format(cmd) | ||
git_process = subprocess.Popen( | ||
git_cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||
git_stdout, _ = git_process.communicate() | ||
return len(git_stdout.split()) > 1 | ||
|
||
|
||
def check_if_head_commit(commit_hash: str) -> bool: | ||
cmd = "git show --format=%H -s HEAD" | ||
git_cmd = "bash -l -c \'{}\'".format(cmd) | ||
git_process = subprocess.Popen( | ||
git_cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||
git_stdout, _ = git_process.communicate() | ||
True if commit_hash == git_stdout.replace("\n", "") else False | ||
|
||
|
||
def get_short_commit_hash(commit_hash: str) -> int: | ||
cmd = "git rev-parse --short {}".format(commit_hash) | ||
git_cmd = "bash -l -c \'{}\'".format(cmd) | ||
git_process = subprocess.Popen( | ||
git_cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||
git_stdout, _ = git_process.communicate() | ||
return int(git_stdout.replace("\n", ""), 16) | ||
|
||
# Compute distance of two commit hashes. | ||
|
||
|
||
def get_distance_of_commits(commit_hash1: str) -> int: | ||
base_commit = get_first_commit_on_branch() | ||
cmd = "git rev-list --count {}..{}".format(base_commit, commit_hash1) | ||
git_cmd = "bash -l -c \'{}\'".format(cmd) | ||
git_process = subprocess.Popen( | ||
git_cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||
git_stdout, _ = git_process.communicate() | ||
git_stdout = git_stdout.replace("\n", "") | ||
if git_stdout == "": | ||
git_stdout = "0" | ||
# print(f"Distance between {base_commit} and {commit_hash1} is {git_stdout}") | ||
return int(git_stdout) | ||
|
||
|
||
def __plot_performance(table_name: str, container_name: str, connection_str: str, key: str, no_plot: bool) -> None: | ||
# Connect to Azure table. | ||
table_service = TableServiceClient.from_connection_string(connection_str) | ||
table_client = table_service.get_table_client(table_name) | ||
|
||
print(f"Querying Azure Table for performance statistics...") | ||
|
||
# Query Azure table for statistics on the past 30 days. | ||
base_date = datetime.datetime.now() - datetime.timedelta(days=15) | ||
# print(f"Querying Azure Table for performance statistics since {base_date}...") | ||
query_filter: str = f"Timestamp gt datetime'{base_date.strftime('%Y-%m-%dT%H:%M:%S.%fZ')}' and" + \ | ||
"(LibOS eq 'catnap' or LibOS eq 'catpowder' or LibOS eq 'catnip') and (Syscall eq 'push' or Syscall eq 'pop')" | ||
select: List[str] = ["LibOS", "JobName", "CommitHash", "Syscall", "AverageCyclesPerSyscall"] | ||
data = table_client.query_entities(query_filter=query_filter, select=select) | ||
|
||
cooked_data = { | ||
"tcp-ping-pong-server": { | ||
"push": { | ||
"catnap": { | ||
"diff": [], | ||
"cycles": [], | ||
"commit": [] | ||
}, | ||
"catpowder": { | ||
"diff": [], | ||
"cycles": [], | ||
"commit": [] | ||
}, | ||
"catnip": { | ||
"diff": [], | ||
"cycles": [], | ||
"commit": [] | ||
}, | ||
}, | ||
"pop": { | ||
"catnap": { | ||
"diff": [], | ||
"cycles": [], | ||
"commit": [] | ||
}, | ||
"catpowder": { | ||
"diff": [], | ||
"cycles": [], | ||
"commit": [] | ||
}, | ||
"catnip": { | ||
"diff": [], | ||
"cycles": [], | ||
"commit": [] | ||
}, | ||
}, | ||
}, | ||
"tcp-ping-pong-client": { | ||
"push": { | ||
"catnap": { | ||
"diff": [], | ||
"cycles": [], | ||
"commit": [] | ||
}, | ||
"catpowder": { | ||
"diff": [], | ||
"cycles": [], | ||
"commit": [] | ||
}, | ||
"catnip": { | ||
"diff": [], | ||
"cycles": [], | ||
"commit": [] | ||
}, | ||
}, | ||
"pop": { | ||
"catnap": { | ||
"diff": [], | ||
"cycles": [], | ||
"commit": [] | ||
}, | ||
"catpowder": { | ||
"diff": [], | ||
"cycles": [], | ||
"commit": [] | ||
}, | ||
"catnip": { | ||
"diff": [], | ||
"cycles": [], | ||
"commit": [] | ||
}, | ||
}, | ||
} | ||
} | ||
|
||
job_types = ["tcp-ping-pong-server", "tcp-ping-pong-client"] | ||
syscalls = ["push", "pop"] | ||
libos_types = ["catnap", "catpowder", "catnip"] | ||
|
||
# Hashtable of commits. | ||
commits = {} | ||
head_commit = get_first_commit_on_branch() | ||
|
||
print(f"Processing commits since {head_commit}...") | ||
|
||
# Parse queried data. | ||
for row in data: | ||
for job_type in job_types: | ||
if fnmatch.fnmatch(row["JobName"], f"*{job_type}*"): | ||
for syscall in syscalls: | ||
if syscall in row["Syscall"]: | ||
for libos_type in libos_types: | ||
if libos_type in row["LibOS"]: | ||
hash = row["CommitHash"] | ||
if check_if_merge_commit(hash) or check_if_head_commit(hash): | ||
# check if we have already processed this commit | ||
if not (libos_type, hash, syscall) in commits: | ||
if check_if_head_commit(hash): | ||
print(f"Processing head commit {hash}...") | ||
else: | ||
print(f"Processing merge commit {hash}...") | ||
commits[(libos_type, hash, syscall)] = True | ||
cooked_data[job_type][syscall][libos_type]["diff"].append( | ||
get_distance_of_commits(hash)) | ||
cooked_data[job_type][syscall][libos_type]["cycles"].append( | ||
row["AverageCyclesPerSyscall"]) | ||
cooked_data[job_type][syscall][libos_type]["commit"].append( | ||
get_short_commit_hash(hash)) | ||
else: | ||
print(f"Skipping commit {hash}...") | ||
|
||
# print(json.dumps(cooked_data, indent=4)) | ||
for job_type in job_types: | ||
for syscall in syscalls: | ||
catpowder_df = pandas.DataFrame(cooked_data[job_type][syscall]["catpowder"]) | ||
catpowder_df.sort_values(by=['diff'], inplace=True) | ||
catnap_df = pandas.DataFrame(cooked_data[job_type][syscall]["catnap"]) | ||
catnap_df.sort_values(by=['diff'], inplace=True) | ||
catnip_df = pandas.DataFrame(cooked_data[job_type][syscall]["catnip"]) | ||
catnip_df.sort_values(by=['diff'], inplace=True) | ||
df = pandas.merge(catpowder_df, catnap_df, on=["commit", "diff"]) | ||
df = pandas.merge(df, catnip_df, on=["commit", "diff"]) | ||
df.columns = ["Diff", "Catpowder", "Commit", "Catnap", "Catnip"] | ||
new_order = ["Diff", "Commit", "Catnap", "Catpowder", "Catnip"] | ||
df = df.reindex(columns=new_order) | ||
if not df.empty: | ||
if not no_plot: | ||
df.plot(x="Diff", y=["Catpowder", "Catnap", "Catnip"], | ||
kind="line", marker='o', | ||
title=f"Performance for {syscall.capitalize()}()", | ||
xlabel="Commit Hash", | ||
ylabel="Average Cycles Spent in Syscall", | ||
legend=True, ylim=(0, 5000)) | ||
plt.xticks(rotation=90, ticks=df["Diff"], labels=df["Commit"]) | ||
plt.savefig(f"{job_type}-{syscall}.png", bbox_inches='tight', dpi=300) | ||
upload_image_to_blob("demikernel", key, container_name, | ||
f"{head_commit}-{job_type}-{syscall}.png", f"{job_type}-{syscall}.png") | ||
else: | ||
print(f"\nPerformance for {syscall.capitalize()}() in {job_type}:") | ||
print(df) | ||
|
||
|
||
def upload_image_to_blob(account_name, account_key, container_name, blob_name, image_path): | ||
blob_client = BlobClient(account_url=f"https://{account_name}.blob.core.windows.net", | ||
container_name=container_name, | ||
blob_name=blob_name, | ||
credential=account_key) | ||
|
||
with open(image_path, 'rb') as f: | ||
blob_client.upload_blob(f, overwrite=True) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |