forked from KittyChiu/workflow-metrics
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_workflow_runs.py
129 lines (105 loc) · 4 KB
/
get_workflow_runs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
"""
Retrieves all workflow runs for a repository within the specified date range.
Usage:
python get_workflow_runs.py <repo_owner> <repo_name> <start_date> <end_date>
Arguments:
repo_owner (str): The owner of the repository.
repo_name (str): The name of the repository.
start_date (str): The start date of the date range in ISO 8601 format.
end_date (str): The end date of the date range in ISO 8601 format.
Returns:
A list of workflow runs with the following fields:
- conclusion
- created_at
- display_title
- event
- head_branch
- name
- run_number
- run_started_at
- status
- updated_at
- url
- duration
Requirements:
- Python 3.x
- `jq` command-line tool
Description:
This script retrieves all workflow runs for a repository within the specified date range. The script takes four
command-line arguments: the owner of the repository, the name of the repository, the start date of the date range,
and the end date of the date range. The start and end dates should be in ISO 8601 format.
The script uses the GitHub API to retrieve the workflow runs for the specified repository and date range. The
script requires authentication with `repo` scope with the API.
The script outputs a list of workflow runs in JSON format, with the following fields for each run:
- conclusion
- created_at
- display_title
- event
- head_branch
- name
- run_number
- run_started_at
- status
- updated_at
- url
- duration
To run the script, you need to have Python 3.x and the `jq` command-line tool installed on your system. You also
need to have a GitHub API token with the `repo` scope.
Output:
- A list of workflow runs in JSON format
Example:
python get_workflow_runs.py octocat hello-world 2022-01-01 2022-01-31
"""
import subprocess
import json
import sys
from datetime import datetime
RUNS_FILE = 'runs.json'
# Parse the command-line arguments
if len(sys.argv) != 5:
print('Usage: python get_workflow_runs.py <repo_owner> <repo_name> <start_date> <end_date>')
sys.exit(1)
repo_owner = sys.argv[1]
repo_name = sys.argv[2]
start_date = sys.argv[3]
end_date = sys.argv[4]
# Validate the start_date and end_date arguments
try:
start_date = datetime.fromisoformat(start_date)
end_date = datetime.fromisoformat(end_date)
except ValueError:
print('Error: Invalid date format. Please use ISO format (YYYY-MM-DD).')
sys.exit(1)
# Parse jq query for gh api command
jq_query = (
f'[.workflow_runs[] '
f'| select(.run_started_at >= "{start_date}" and .run_started_at <= "{end_date}") '
f'| {{conclusion,created_at,display_title,event,head_branch,name,run_number,run_started_at,run_attempt,status,updated_at,url}}] '
f'| select(length > 0)'
)
# Construct the gh api command
cmd = f'gh api repos/{repo_owner}/{repo_name}/actions/runs --paginate --jq \'{jq_query}\''
# Send the command and retrieve the output
output = subprocess.check_output(cmd, shell=True, text=True)
# Parse the output as JSON and return the workflow runs
workflow_runs = []
for line in output.strip().split('\n'):
try:
data = json.loads(line)
if isinstance(data, list):
workflow_runs.extend(data)
else:
workflow_runs.append(data)
except json.JSONDecodeError:
pass
# Add the duration field to each workflow run, calculated as the difference between the updated_at and run_started_at fields
for item in workflow_runs:
updated_at = datetime.fromisoformat(item['updated_at'].replace('Z', '+00:00'))
run_started_at = datetime.fromisoformat(item['run_started_at'].replace('Z', '+00:00'))
duration = (updated_at - run_started_at).total_seconds()
item['duration'] = duration
# Print the workflow runs as raw.json file
with open(RUNS_FILE, 'w') as f:
json.dump(workflow_runs, f)
# Print the number of workflow runs
print(f'[{repo_owner}/{repo_name}]: No. of workflow runs: {len(workflow_runs)}')