Skip to content

Commit

Permalink
[ci] Enhancement: Perf Workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
ppenna committed Mar 26, 2024
1 parent 4a6c161 commit f763497
Show file tree
Hide file tree
Showing 2 changed files with 335 additions and 0 deletions.
54 changes: 54 additions & 0 deletions .github/workflows/perf.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

name: Performance Analysis

on:
pull_request:
types: [opened, synchronize]

jobs:
plot:
runs-on: ubuntu-latest
permissions:
actions: read
contents: read
deployments: read
packages: none
pull-requests: write
security-events: write
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Parse Statistics
id: parse-stats
run: |
git show --format=%H -s HEAD
git rev-list --max-parents=0 HEAD
pip install --pre azure-data-tables azure-storage-blob pandas matplotlib
stats=$(python3 tools/plot.py \
--connection "${{ secrets.AZURE_STORAGE_CONNECTION_STRING }}" \
--table ${{ secrets.AZURE_STORAGE_TABLE_NAME }} \
--key ${{ secrets.AZURE_STORAGE_KEY }} \
--container ${{ secrets.AZURE_STORAGE_CONTAINER }} \
--no-plot)
EOF=$(dd if=/dev/urandom bs=15 count=1 status=none | base64)
echo "MESSAGE<<$EOF" >> $GITHUB_OUTPUT
echo "$stats" >> $GITHUB_OUTPUT
echo "$EOF" >> $GITHUB_OUTPUT
echo "$stats"
echo "$EOF"
- name: Post Comment
uses: actions/github-script@v7
with:
github-token: ${{secrets.GITHUB_TOKEN}}
script: |
const message = '${{ steps.parse-stats.outputs.MESSAGE }}'
github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: message
})
281 changes: 281 additions & 0 deletions tools/plot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,281 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import datetime
import fnmatch
import json
import subprocess
from typing import List
from azure.data.tables import TableServiceClient
import pandas
import matplotlib.pyplot as plt
import argparse
from azure.storage.blob import BlobClient

# =====================================================================================================================


# Drives the program.
def main():
# Read arguments from command line and parse them.
args: argparse.Namespace = __read_args()

# Extract optionss.
table_name: str = args.table
container_name: str = args.container
connection_str: str = args.connection
key: str = args.key
no_plot: bool = args.no_plot

__plot_performance(table_name=table_name, container_name=container_name,
connection_str=connection_str, key=key, no_plot=no_plot)


# Reads and parses command line arguments.
def __read_args() -> argparse.Namespace:
description: str = "CI Utility for pllot performance statistics of Demikernel."

# Initialize parser.
parser = argparse.ArgumentParser(prog="plot.py", description=description)

# Options related to Storage account.
parser.add_argument("--table", required=True, help="Set Azure Table to use.")
parser.add_argument("--container", required=True, help="Set Azure Blob Container to use.")

# Options related to credentials.
parser.add_argument("--connection", required=True, help="Set connection string to access Azure Storage Account.")
parser.add_argument("--key", required=True, help="Set connection key to access Azure Storage Account.")

parser.add_argument("--no-plot", action="store_true", help="Do not plot the performance statistics.")

# Read arguments from command line.
return parser.parse_args()


# Get head commit on branch.
def get_head_commit() -> str:
cmd = "git show --format=%H -s HEAD"
git_cmd = "bash -l -c \'{}\'".format(cmd)
git_process = subprocess.Popen(
git_cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
git_stdout, _ = git_process.communicate()
return git_stdout.replace("\n", "")


# Get first commit on branch.
def get_first_commit_on_branch(head_commit: str) -> str:
cmd = f"git rev-list --max-parents=0 {head_commit}"
git_cmd = "bash -l -c \'{}\'".format(cmd)
git_process = subprocess.Popen(
git_cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
git_stdout, _ = git_process.communicate()
return git_stdout.replace("\n", "")


def check_if_merge_commit(commit_hash: str) -> bool:
cmd = "git show --format=%P -s {}".format(commit_hash)
git_cmd = "bash -l -c \'{}\'".format(cmd)
git_process = subprocess.Popen(
git_cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
git_stdout, _ = git_process.communicate()
return len(git_stdout.split()) > 1


def check_if_head_commit(commit_hash: str) -> bool:
head_commit: str = get_head_commit()
True if commit_hash == head_commit else False


def get_short_commit_hash(commit_hash: str) -> int:
cmd = "git rev-parse --short {}".format(commit_hash)
git_cmd = "bash -l -c \'{}\'".format(cmd)
git_process = subprocess.Popen(
git_cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
git_stdout, _ = git_process.communicate()
return int(git_stdout.replace("\n", ""), 16)

# Compute distance of two commit hashes.


def get_distance_of_commits(head_commit: str) -> int:
base_commit = get_first_commit_on_branch(head_commit)
cmd = "git rev-list --count {}..{}".format(base_commit, head_commit)
git_cmd = "bash -l -c \'{}\'".format(cmd)
git_process = subprocess.Popen(
git_cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
git_stdout, _ = git_process.communicate()
git_stdout = git_stdout.replace("\n", "")
if git_stdout == "":
git_stdout = "0"
# print(f"Distance between {base_commit} and {commit_hash1} is {git_stdout}")
return int(git_stdout)


def __plot_performance(table_name: str, container_name: str, connection_str: str, key: str, no_plot: bool) -> None:
# Connect to Azure table.
table_service = TableServiceClient.from_connection_string(connection_str)
table_client = table_service.get_table_client(table_name)

print(f"Querying Azure Table for performance statistics...")

# Query Azure table for statistics on the past 30 days.
base_date = datetime.datetime.now() - datetime.timedelta(days=15)
# print(f"Querying Azure Table for performance statistics since {base_date}...")
query_filter: str = f"Timestamp gt datetime'{base_date.strftime('%Y-%m-%dT%H:%M:%S.%fZ')}' and" + \
"(LibOS eq 'catnap' or LibOS eq 'catpowder' or LibOS eq 'catnip') and (Syscall eq 'push' or Syscall eq 'pop')"
select: List[str] = ["LibOS", "JobName", "CommitHash", "Syscall", "AverageCyclesPerSyscall"]
data = table_client.query_entities(query_filter=query_filter, select=select)

cooked_data = {
"tcp-ping-pong-server": {
"push": {
"catnap": {
"diff": [],
"cycles": [],
"commit": []
},
"catpowder": {
"diff": [],
"cycles": [],
"commit": []
},
"catnip": {
"diff": [],
"cycles": [],
"commit": []
},
},
"pop": {
"catnap": {
"diff": [],
"cycles": [],
"commit": []
},
"catpowder": {
"diff": [],
"cycles": [],
"commit": []
},
"catnip": {
"diff": [],
"cycles": [],
"commit": []
},
},
},
"tcp-ping-pong-client": {
"push": {
"catnap": {
"diff": [],
"cycles": [],
"commit": []
},
"catpowder": {
"diff": [],
"cycles": [],
"commit": []
},
"catnip": {
"diff": [],
"cycles": [],
"commit": []
},
},
"pop": {
"catnap": {
"diff": [],
"cycles": [],
"commit": []
},
"catpowder": {
"diff": [],
"cycles": [],
"commit": []
},
"catnip": {
"diff": [],
"cycles": [],
"commit": []
},
},
}
}

job_types = ["tcp-ping-pong-server", "tcp-ping-pong-client"]
syscalls = ["push", "pop"]
libos_types = ["catnap", "catpowder", "catnip"]

# Hashtable of commits.
commits = {}
head_commit: str = get_head_commit()
root_commit: str = get_first_commit_on_branch(head_commit)

print(f"Processing commits since form {root_commit} to {head_commit}...")

# Parse queried data.
for row in data:
for job_type in job_types:
if fnmatch.fnmatch(row["JobName"], f"*{job_type}*"):
for syscall in syscalls:
if syscall in row["Syscall"]:
for libos_type in libos_types:
if libos_type in row["LibOS"]:
hash = row["CommitHash"]
if check_if_merge_commit(hash) or check_if_head_commit(hash):
# check if we have already processed this commit
if not (libos_type, hash, syscall) in commits:
# if check_if_head_commit(hash):
# print(f"Processing head commit {hash}...")
# else:
# print(f"Processing merge commit {hash}...")
commits[(libos_type, hash, syscall)] = True
cooked_data[job_type][syscall][libos_type]["diff"].append(
get_distance_of_commits(hash))
cooked_data[job_type][syscall][libos_type]["cycles"].append(
row["AverageCyclesPerSyscall"])
cooked_data[job_type][syscall][libos_type]["commit"].append(
get_short_commit_hash(hash))

# print(json.dumps(cooked_data, indent=4))
for job_type in job_types:
for syscall in syscalls:
catpowder_df = pandas.DataFrame(cooked_data[job_type][syscall]["catpowder"])
catpowder_df.sort_values(by=['diff'], inplace=True)
catnap_df = pandas.DataFrame(cooked_data[job_type][syscall]["catnap"])
catnap_df.sort_values(by=['diff'], inplace=True)
catnip_df = pandas.DataFrame(cooked_data[job_type][syscall]["catnip"])
catnip_df.sort_values(by=['diff'], inplace=True)
df = pandas.merge(catpowder_df, catnap_df, on=["commit", "diff"])
df = pandas.merge(df, catnip_df, on=["commit", "diff"])
df.columns = ["Diff", "Catpowder", "Commit", "Catnap", "Catnip"]
new_order = ["Diff", "Commit", "Catnap", "Catpowder", "Catnip"]
df = df.reindex(columns=new_order)
if not df.empty:
if not no_plot:
df.plot(x="Diff", y=["Catpowder", "Catnap", "Catnip"],
kind="line", marker='o',
title=f"Performance for {syscall.capitalize()}()",
xlabel="Commit Hash",
ylabel="Average Cycles Spent in Syscall",
legend=True, ylim=(0, 5000))
plt.xticks(rotation=90, ticks=df["Diff"], labels=df["Commit"])
plt.savefig(f"{job_type}-{syscall}.png", bbox_inches='tight', dpi=300)
upload_image_to_blob("demikernel", key, container_name,
f"{head_commit}-{job_type}-{syscall}.png", f"{job_type}-{syscall}.png")
else:
print(f"\nPerformance for {syscall.capitalize()}() in {job_type}:")
print(df)


def upload_image_to_blob(account_name, account_key, container_name, blob_name, image_path):
blob_client = BlobClient(account_url=f"https://{account_name}.blob.core.windows.net",
container_name=container_name,
blob_name=blob_name,
credential=account_key)

with open(image_path, 'rb') as f:
blob_client.upload_blob(f, overwrite=True)


if __name__ == "__main__":
main()

0 comments on commit f763497

Please sign in to comment.