Skip to content

Commit

Permalink
[ci] Enhancement: Perf Workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
ppenna committed Mar 23, 2024
1 parent 4a6c161 commit 4afffe4
Show file tree
Hide file tree
Showing 2 changed files with 307 additions and 0 deletions.
46 changes: 46 additions & 0 deletions .github/workflows/perf.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

name: Performance Analysis

on:
push:
branches:
- bugfix-*
- enhancement-*
- feature-*
- workaround-*
- dev
- unstable
- main
workflow_dispatch:

jobs:
plot:
name: Plot
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup SSH
shell: bash
run: |
mkdir -p $HOME/.ssh/
echo "${{ secrets.SSHKEY }}" > "$HOME/.ssh/id_rsa"
chmod 400 $HOME/.ssh/id_rsa
echo "Host *" > $HOME/.ssh/config
echo -e "\tStrictHostKeyChecking no" >> $HOME/.ssh/config
echo -e "\tIdentityFile $HOME/.ssh/id_rsa" >> $HOME/.ssh/config
echo -e "\tIdentitiesOnly yes" >> $HOME/.ssh/config
echo -e "\tPasswordAuthentication no" >> $HOME/.ssh/config
echo -e "\tUser ${{ secrets.USERNAME }}" >> $HOME/.ssh/config
echo -e "\tPort ${{ secrets.PORTNUM }}" >> $HOME/.ssh/config
- name: Plot Performance
run: |
pip install --pre azure-data-tables pandas matplotlib
python3 tools/plot.py \
--connection "${{ secrets.AZURE_STORAGE_CONNECTION_STRING }}" \
--table ${{ secrets.AZURE_STORAGE_TABLE_NAME }} \
--key ${{ secrets.AZURE_STORAGE_KEY }} \
--container ${{ secrets.AZURE_STORAGE_CONTAINER }}
261 changes: 261 additions & 0 deletions tools/plot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,261 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import datetime
import fnmatch
import json
import subprocess
from typing import List
from azure.data.tables import TableServiceClient
import pandas
import matplotlib.pyplot as plt
import argparse
from azure.storage.blob import BlobClient

# =====================================================================================================================


# Drives the program.
def main():
# Read arguments from command line and parse them.
args: argparse.Namespace = __read_args()

# Extract optionss.
table_name: str = args.table
container_name: str = args.container
connection_str: str = args.connection
key: str = args.key
no_plot: bool = args.no_plot

__plot_performance(table_name=table_name, container_name=container_name,
connection_str=connection_str, key=key, no_plot=no_plot)


# Reads and parses command line arguments.
def __read_args() -> argparse.Namespace:
description: str = "CI Utility for pllot performance statistics of Demikernel."

# Initialize parser.
parser = argparse.ArgumentParser(prog="plot.py", description=description)

# Options related to Storage account.
parser.add_argument("--table", required=True, help="Set Azure Table to use.")
parser.add_argument("--container", required=True, help="Set Azure Blob Container to use.")

# Options related to credentials.
parser.add_argument("--connection", required=True, help="Set connection string to access Azure Storage Account.")
parser.add_argument("--key", required=True, help="Set connection key to access Azure Storage Account.")

parser.add_argument("--no-plot", action="store_true", help="Do not plot the performance statistics.")

# Read arguments from command line.
return parser.parse_args()


# Get first commit on branch.
def get_first_commit_on_branch() -> str:
cmd = "git rev-list --max-parents=0 HEAD"
git_cmd = "bash -l -c \'{}\'".format(cmd)
git_process = subprocess.Popen(
git_cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
git_stdout, _ = git_process.communicate()
return git_stdout.replace("\n", "")


def check_if_merge_commit(commit_hash: str) -> bool:
cmd = "git show --format=%P -s {}".format(commit_hash)
git_cmd = "bash -l -c \'{}\'".format(cmd)
git_process = subprocess.Popen(
git_cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
git_stdout, _ = git_process.communicate()
return len(git_stdout.split()) > 1


def check_if_head_commit(commit_hash: str) -> bool:
cmd = "git show --format=%P -s HEAD".format(commit_hash)
git_cmd = "bash -l -c \'{}\'".format(cmd)
git_process = subprocess.Popen(
git_cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
git_stdout, _ = git_process.communicate()
return len(git_stdout.split()) == 0


def get_short_commit_hash(commit_hash: str) -> int:
cmd = "git rev-parse --short {}".format(commit_hash)
git_cmd = "bash -l -c \'{}\'".format(cmd)
git_process = subprocess.Popen(
git_cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
git_stdout, _ = git_process.communicate()
return int(git_stdout.replace("\n", ""), 16)

# Compute distance of two commit hashes.


def get_distance_of_commits(commit_hash1: str) -> int:
base_commit = get_first_commit_on_branch()
cmd = "git rev-list --count {}..{}".format(base_commit, commit_hash1)
git_cmd = "bash -l -c \'{}\'".format(cmd)
git_process = subprocess.Popen(
git_cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
git_stdout, _ = git_process.communicate()
git_stdout = git_stdout.replace("\n", "")
return int(git_stdout)


def __plot_performance(table_name: str, container_name: str, connection_str: str, key: str, no_plot: bool) -> None:
# Connect to Azure table.
table_service = TableServiceClient.from_connection_string(connection_str)
table_client = table_service.get_table_client(table_name)

# Query Azure table for statistics on the past 30 days.
base_date = datetime.datetime.now() - datetime.timedelta(days=15)
# print(f"Querying Azure Table for performance statistics since {base_date}...")
query_filter: str = f"Timestamp gt datetime'{base_date.strftime('%Y-%m-%dT%H:%M:%S.%fZ')}' and" + \
"(LibOS eq 'catnap' or LibOS eq 'catpowder' or LibOS eq 'catnip') and (Syscall eq 'push' or Syscall eq 'pop')"
select: List[str] = ["LibOS", "JobName", "CommitHash", "Syscall", "AverageCyclesPerSyscall"]
data = table_client.query_entities(query_filter=query_filter, select=select)

cooked_data = {
"tcp-ping-pong-server": {
"push": {
"catnap": {
"diff": [],
"cycles": [],
"commit": []
},
"catpowder": {
"diff": [],
"cycles": [],
"commit": []
},
"catnip": {
"diff": [],
"cycles": [],
"commit": []
},
},
"pop": {
"catnap": {
"diff": [],
"cycles": [],
"commit": []
},
"catpowder": {
"diff": [],
"cycles": [],
"commit": []
},
"catnip": {
"diff": [],
"cycles": [],
"commit": []
},
},
},
"tcp-ping-pong-client": {
"push": {
"catnap": {
"diff": [],
"cycles": [],
"commit": []
},
"catpowder": {
"diff": [],
"cycles": [],
"commit": []
},
"catnip": {
"diff": [],
"cycles": [],
"commit": []
},
},
"pop": {
"catnap": {
"diff": [],
"cycles": [],
"commit": []
},
"catpowder": {
"diff": [],
"cycles": [],
"commit": []
},
"catnip": {
"diff": [],
"cycles": [],
"commit": []
},
},
}
}

job_types = ["tcp-ping-pong-server", "tcp-ping-pong-client"]
syscalls = ["push", "pop"]
libos_types = ["catnap", "catpowder", "catnip"]

# Hashtable of commits.
commits = {}
head_commit = get_first_commit_on_branch()

# Parse queried data.
for row in data:
for job_type in job_types:
if fnmatch.fnmatch(row["JobName"], f"*{job_type}*"):
for syscall in syscalls:
if syscall in row["Syscall"]:
for libos_type in libos_types:
if libos_type in row["LibOS"]:
hash = row["CommitHash"]
if check_if_merge_commit(hash) or check_if_head_commit(hash):
# check if we have already processed this commit
if not (libos_type, hash, syscall) in commits:
# print(f"Processing commit {hash}...")
commits[(libos_type, hash, syscall)] = True
cooked_data[job_type][syscall][libos_type]["diff"].append(
get_distance_of_commits(hash))
cooked_data[job_type][syscall][libos_type]["cycles"].append(
row["AverageCyclesPerSyscall"])
cooked_data[job_type][syscall][libos_type]["commit"].append(
get_short_commit_hash(hash))

# print(json.dumps(cooked_data, indent=4))

if not no_plot:
for job_type in job_types:
for syscall in syscalls:
catpowder_df = pandas.DataFrame(cooked_data[job_type][syscall]["catpowder"])
catpowder_df.sort_values(by=['diff'], inplace=True)
catnap_df = pandas.DataFrame(cooked_data[job_type][syscall]["catnap"])
catnap_df.sort_values(by=['diff'], inplace=True)
catnip_df = pandas.DataFrame(cooked_data[job_type][syscall]["catnip"])
catnip_df.sort_values(by=['diff'], inplace=True)
df = pandas.merge(catpowder_df, catnap_df, on=["commit", "diff"])
df = pandas.merge(df, catnip_df, on=["commit", "diff"])
if not df.empty:
df.columns = ["Diff", "Catpowder", "Commit", "Catnap", "Catnip"]
# print(f"Plotting {job_type} {syscall}...")
df.plot(x="Diff", y=["Catpowder", "Catnap", "Catnip"],
kind="line", marker='o',
title=f"Performance for {syscall.capitalize()}()",
xlabel="Commit Hash",
ylabel="Average Cycles Spent in Syscall",
legend=True, ylim=(0, 5000))
plt.xticks(rotation=90, ticks=df["Diff"], labels=df["Commit"])
plt.savefig(f"{job_type}-{syscall}.png", bbox_inches='tight', dpi=300)
upload_image_to_blob("demikernel", key, container_name,
f"{head_commit}-{job_type}-{syscall}.png", f"{job_type}-{syscall}.png")


def upload_image_to_blob(account_name, account_key, container_name, blob_name, image_path):
blob_client = BlobClient(account_url=f"https://{account_name}.blob.core.windows.net",
container_name=container_name,
blob_name=blob_name,
credential=account_key)

with open(image_path, 'rb') as f:
blob_client.upload_blob(f, overwrite=True)


if __name__ == "__main__":
main()

0 comments on commit 4afffe4

Please sign in to comment.