Skip to content

Commit

Permalink
[ci] Enhancement: Perf Workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
ppenna committed Mar 25, 2024
1 parent 4a6c161 commit 18b755e
Show file tree
Hide file tree
Showing 2 changed files with 313 additions and 0 deletions.
37 changes: 37 additions & 0 deletions .github/workflows/perf.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

name: Performance Analysis

on:
push:
branches:
- bugfix-*
- enhancement-*
- feature-*
- workaround-*
- dev
- unstable
- main
workflow_dispatch:

jobs:
plot:
name: Plot
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Plot Performance
run: |
git branch
git show --format=%H -s HEAD
git rev-list --max-parents=0 HEAD
pip install --pre azure-data-tables azure-storage-blob pandas matplotlib
python3 tools/plot.py \
--connection "${{ secrets.AZURE_STORAGE_CONNECTION_STRING }}" \
--table ${{ secrets.AZURE_STORAGE_TABLE_NAME }} \
--key ${{ secrets.AZURE_STORAGE_KEY }} \
--container ${{ secrets.AZURE_STORAGE_CONTAINER }} \
--no-plot
276 changes: 276 additions & 0 deletions tools/plot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,276 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import datetime
import fnmatch
import json
import subprocess
from typing import List
from azure.data.tables import TableServiceClient
import pandas
import matplotlib.pyplot as plt
import argparse
from azure.storage.blob import BlobClient

# =====================================================================================================================


# Drives the program.
def main():
# Read arguments from command line and parse them.
args: argparse.Namespace = __read_args()

# Extract optionss.
table_name: str = args.table
container_name: str = args.container
connection_str: str = args.connection
key: str = args.key
no_plot: bool = args.no_plot

__plot_performance(table_name=table_name, container_name=container_name,
connection_str=connection_str, key=key, no_plot=no_plot)


# Reads and parses command line arguments.
def __read_args() -> argparse.Namespace:
description: str = "CI Utility for pllot performance statistics of Demikernel."

# Initialize parser.
parser = argparse.ArgumentParser(prog="plot.py", description=description)

# Options related to Storage account.
parser.add_argument("--table", required=True, help="Set Azure Table to use.")
parser.add_argument("--container", required=True, help="Set Azure Blob Container to use.")

# Options related to credentials.
parser.add_argument("--connection", required=True, help="Set connection string to access Azure Storage Account.")
parser.add_argument("--key", required=True, help="Set connection key to access Azure Storage Account.")

parser.add_argument("--no-plot", action="store_true", help="Do not plot the performance statistics.")

# Read arguments from command line.
return parser.parse_args()


# Get first commit on branch.
def get_first_commit_on_branch() -> str:
cmd = "git rev-list --max-parents=0 HEAD"
git_cmd = "bash -l -c \'{}\'".format(cmd)
git_process = subprocess.Popen(
git_cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
git_stdout, _ = git_process.communicate()
return git_stdout.replace("\n", "")


def check_if_merge_commit(commit_hash: str) -> bool:
cmd = "git show --format=%P -s {}".format(commit_hash)
git_cmd = "bash -l -c \'{}\'".format(cmd)
git_process = subprocess.Popen(
git_cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
git_stdout, _ = git_process.communicate()
return len(git_stdout.split()) > 1


def check_if_head_commit(commit_hash: str) -> bool:
cmd = "git show --format=%H -s HEAD"
git_cmd = "bash -l -c \'{}\'".format(cmd)
git_process = subprocess.Popen(
git_cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
git_stdout, _ = git_process.communicate()
True if commit_hash == git_stdout.replace("\n", "") else False


def get_short_commit_hash(commit_hash: str) -> int:
cmd = "git rev-parse --short {}".format(commit_hash)
git_cmd = "bash -l -c \'{}\'".format(cmd)
git_process = subprocess.Popen(
git_cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
git_stdout, _ = git_process.communicate()
return int(git_stdout.replace("\n", ""), 16)

# Compute distance of two commit hashes.


def get_distance_of_commits(commit_hash1: str) -> int:
base_commit = get_first_commit_on_branch()
cmd = "git rev-list --count {}..{}".format(base_commit, commit_hash1)
git_cmd = "bash -l -c \'{}\'".format(cmd)
git_process = subprocess.Popen(
git_cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
git_stdout, _ = git_process.communicate()
git_stdout = git_stdout.replace("\n", "")
if git_stdout == "":
git_stdout = "0"
# print(f"Distance between {base_commit} and {commit_hash1} is {git_stdout}")
return int(git_stdout)


def __plot_performance(table_name: str, container_name: str, connection_str: str, key: str, no_plot: bool) -> None:
# Connect to Azure table.
table_service = TableServiceClient.from_connection_string(connection_str)
table_client = table_service.get_table_client(table_name)

print(f"Querying Azure Table for performance statistics...")

# Query Azure table for statistics on the past 30 days.
base_date = datetime.datetime.now() - datetime.timedelta(days=15)
# print(f"Querying Azure Table for performance statistics since {base_date}...")
query_filter: str = f"Timestamp gt datetime'{base_date.strftime('%Y-%m-%dT%H:%M:%S.%fZ')}' and" + \
"(LibOS eq 'catnap' or LibOS eq 'catpowder' or LibOS eq 'catnip') and (Syscall eq 'push' or Syscall eq 'pop')"
select: List[str] = ["LibOS", "JobName", "CommitHash", "Syscall", "AverageCyclesPerSyscall"]
data = table_client.query_entities(query_filter=query_filter, select=select)

cooked_data = {
"tcp-ping-pong-server": {
"push": {
"catnap": {
"diff": [],
"cycles": [],
"commit": []
},
"catpowder": {
"diff": [],
"cycles": [],
"commit": []
},
"catnip": {
"diff": [],
"cycles": [],
"commit": []
},
},
"pop": {
"catnap": {
"diff": [],
"cycles": [],
"commit": []
},
"catpowder": {
"diff": [],
"cycles": [],
"commit": []
},
"catnip": {
"diff": [],
"cycles": [],
"commit": []
},
},
},
"tcp-ping-pong-client": {
"push": {
"catnap": {
"diff": [],
"cycles": [],
"commit": []
},
"catpowder": {
"diff": [],
"cycles": [],
"commit": []
},
"catnip": {
"diff": [],
"cycles": [],
"commit": []
},
},
"pop": {
"catnap": {
"diff": [],
"cycles": [],
"commit": []
},
"catpowder": {
"diff": [],
"cycles": [],
"commit": []
},
"catnip": {
"diff": [],
"cycles": [],
"commit": []
},
},
}
}

job_types = ["tcp-ping-pong-server", "tcp-ping-pong-client"]
syscalls = ["push", "pop"]
libos_types = ["catnap", "catpowder", "catnip"]

# Hashtable of commits.
commits = {}
head_commit = get_first_commit_on_branch()

print(f"Processing commits since {head_commit}...")

# Parse queried data.
for row in data:
for job_type in job_types:
if fnmatch.fnmatch(row["JobName"], f"*{job_type}*"):
for syscall in syscalls:
if syscall in row["Syscall"]:
for libos_type in libos_types:
if libos_type in row["LibOS"]:
hash = row["CommitHash"]
if check_if_merge_commit(hash) or check_if_head_commit(hash):
# check if we have already processed this commit
if not (libos_type, hash, syscall) in commits:
if check_if_head_commit(hash):
print(f"Processing head commit {hash}...")
else:
print(f"Processing merge commit {hash}...")
commits[(libos_type, hash, syscall)] = True
cooked_data[job_type][syscall][libos_type]["diff"].append(
get_distance_of_commits(hash))
cooked_data[job_type][syscall][libos_type]["cycles"].append(
row["AverageCyclesPerSyscall"])
cooked_data[job_type][syscall][libos_type]["commit"].append(
get_short_commit_hash(hash))
else:
print(f"Skipping commit {hash}...")

# print(json.dumps(cooked_data, indent=4))
for job_type in job_types:
for syscall in syscalls:
catpowder_df = pandas.DataFrame(cooked_data[job_type][syscall]["catpowder"])
catpowder_df.sort_values(by=['diff'], inplace=True)
catnap_df = pandas.DataFrame(cooked_data[job_type][syscall]["catnap"])
catnap_df.sort_values(by=['diff'], inplace=True)
catnip_df = pandas.DataFrame(cooked_data[job_type][syscall]["catnip"])
catnip_df.sort_values(by=['diff'], inplace=True)
df = pandas.merge(catpowder_df, catnap_df, on=["commit", "diff"])
df = pandas.merge(df, catnip_df, on=["commit", "diff"])
df.columns = ["Diff", "Catpowder", "Commit", "Catnap", "Catnip"]
new_order = ["Diff", "Commit", "Catnap", "Catpowder", "Catnip"]
df = df.reindex(columns=new_order)
if not df.empty:
if not no_plot:
df.plot(x="Diff", y=["Catpowder", "Catnap", "Catnip"],
kind="line", marker='o',
title=f"Performance for {syscall.capitalize()}()",
xlabel="Commit Hash",
ylabel="Average Cycles Spent in Syscall",
legend=True, ylim=(0, 5000))
plt.xticks(rotation=90, ticks=df["Diff"], labels=df["Commit"])
plt.savefig(f"{job_type}-{syscall}.png", bbox_inches='tight', dpi=300)
upload_image_to_blob("demikernel", key, container_name,
f"{head_commit}-{job_type}-{syscall}.png", f"{job_type}-{syscall}.png")
else:
print(f"\nPerformance for {syscall.capitalize()}() in {job_type}:")
print(df)


def upload_image_to_blob(account_name, account_key, container_name, blob_name, image_path):
blob_client = BlobClient(account_url=f"https://{account_name}.blob.core.windows.net",
container_name=container_name,
blob_name=blob_name,
credential=account_key)

with open(image_path, 'rb') as f:
blob_client.upload_blob(f, overwrite=True)


if __name__ == "__main__":
main()

0 comments on commit 18b755e

Please sign in to comment.