diff --git a/.cruft.json b/.cruft.json index 8377fc9..d9b6d49 100644 --- a/.cruft.json +++ b/.cruft.json @@ -1,25 +1,25 @@ { - "template": "https://github.com/fpgmaas/cookiecutter-poetry.git", - "commit": "f448c9c6407c799f6b81b8e310608cb841e98d15", - "checkout": null, - "context": { - "cookiecutter": { - "author": "Mark Andrew Miller", - "email": "mamillerpa@gmail.com", - "author_github_handle": "turbomam", - "project_name": "llm-github", - "project_slug": "llm_github", - "project_description": "Tools for extracting knowledge from GitHub issues, PR comments, etc.", - "include_github_actions": "y", - "publish_to": "pypi", - "deptry": "y", - "mkdocs": "y", - "codecov": "y", - "dockerfile": "y", - "devcontainer": "n", - "open_source_license": "MIT license", - "_template": "https://github.com/fpgmaas/cookiecutter-poetry.git" - } - }, - "directory": null + "template": "https://github.com/fpgmaas/cookiecutter-poetry.git", + "commit": "f448c9c6407c799f6b81b8e310608cb841e98d15", + "checkout": null, + "context": { + "cookiecutter": { + "author": "Mark Andrew Miller", + "email": "mamillerpa@gmail.com", + "author_github_handle": "turbomam", + "project_name": "llm-github", + "project_slug": "llm_github", + "project_description": "Tools for extracting knowledge from GitHub issues, PR comments, etc.", + "include_github_actions": "y", + "publish_to": "pypi", + "deptry": "y", + "mkdocs": "y", + "codecov": "y", + "dockerfile": "y", + "devcontainer": "n", + "open_source_license": "MIT license", + "_template": "https://github.com/fpgmaas/cookiecutter-poetry.git" + } + }, + "directory": null } diff --git a/.github/actions/setup-poetry-env/action.yml b/.github/actions/setup-poetry-env/action.yml index b2cd2df..d786811 100644 --- a/.github/actions/setup-poetry-env/action.yml +++ b/.github/actions/setup-poetry-env/action.yml @@ -2,10 +2,10 @@ name: "setup-poetry-env" description: "Composite action to setup the Python and poetry environment." inputs: - python-version: - required: false - description: "The python version to use" - default: "3.11" + python-version: + required: false + description: "The python version to use" + default: "3.11" runs: using: "composite" diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index d193f81..679ba4a 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -29,7 +29,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.8', '3.9', '3.10', '3.11'] + python-version: ["3.8", "3.9", "3.10", "3.11"] fail-fast: false steps: - name: Check out diff --git a/.github/workflows/on-release-main.yml b/.github/workflows/on-release-main.yml index 85f5200..6a13b1c 100644 --- a/.github/workflows/on-release-main.yml +++ b/.github/workflows/on-release-main.yml @@ -6,7 +6,6 @@ on: branches: [main] jobs: - publish: runs-on: ubuntu-latest steps: @@ -28,7 +27,7 @@ jobs: env: PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} RELEASE_VERSION: ${{ steps.vars.outputs.tag }} - + deploy-docs: needs: publish runs-on: ubuntu-latest @@ -41,4 +40,3 @@ jobs: - name: Deploy documentation run: poetry run mkdocs gh-deploy --force - diff --git a/.gitignore b/.gitignore index c913412..26f2208 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .idea/ +*.sqlite ### diff --git a/LICENSE b/LICENSE index 0da21c0..8d8fd7e 100644 --- a/LICENSE +++ b/LICENSE @@ -19,4 +19,3 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - diff --git a/docs/modules.md b/docs/modules.md index 07e4b93..faa1bf3 100644 --- a/docs/modules.md +++ b/docs/modules.md @@ -1 +1 @@ -::: llm_github.foo +::: llm_github.core diff --git a/llm_github/core.py b/llm_github/core.py new file mode 100644 index 0000000..efeddb7 --- /dev/null +++ b/llm_github/core.py @@ -0,0 +1,360 @@ +import json +import time +from typing import Any, Dict, List, Optional + +import requests +from requests_cache import CachedSession +from typing_extensions import TypedDict # Use from typing_extensions for compatibility with older Python versions + +REQUESTS_TIMEOUT = 10 # Timeout in seconds for requests + +# Default fields to be dropped from responses +DEFAULT_DROPPED_FIELDS = [ + "_links", + "base", + "comments_url", + "commits_url", + "diff_url", + "events_url", + "head", + "html_url", + "labels_url", + "locked", + "merge_commit_sha", + "node_id", + "patch_url", + "repository_url", + "review_comment_url", + "review_comments_url", + "statuses_url", + "timeline_url", +] + + +class EnvironmentVariableError(Exception): + """Exception raised for errors in the environment variables.""" + + def __init__(self, variable: str, message: str = "is not set in the environment.") -> None: + self.variable = variable + self.message = message + super().__init__(f"{variable} {message}") + + +class RateLimit(TypedDict): + limit: int + remaining: int + reset: int + used: int + + +class RateLimitResponse(TypedDict): + rate: RateLimit + resources: Dict[str, RateLimit] + + +def return_verbatim(input_string: str) -> str: + """Return the input string.""" + return input_string + + +def get_rate_limit(token: str, session: CachedSession) -> RateLimitResponse: + """Fetch current rate limit status from GitHub API.""" + headers = {"Authorization": f"token {token}"} + response = session.get("https://api.github.com/rate_limit", headers=headers, timeout=REQUESTS_TIMEOUT) + response.raise_for_status() # Raises HTTPError for bad requests + data: RateLimitResponse = response.json() + return data + + +def wait_for_rate_limit_reset(reset_time: int) -> None: + """Wait until the rate limit reset time.""" + wait_time = reset_time - int(time.time()) + 10 # Adding 10 seconds to ensure the reset has occurred + print(f"Rate limit exceeded. Waiting for {wait_time} seconds.") + time.sleep(wait_time) + + +def remove_keys_from_dict(data: Dict[str, Any], keys_to_remove: List[str]) -> Dict[str, Any]: + """Remove specified keys from a dictionary.""" + return {key: value for key, value in data.items() if key not in keys_to_remove} + + +def write_json_to_file(json_object: List[Dict[str, Any]], filename: str) -> None: + """Save data to a JSON file.""" + with open(filename, "w", encoding="utf-8") as f: + json.dump(json_object, f, ensure_ascii=False, indent=4) + print(f"Data saved to {filename}") + + +def handle_response_errors(response: requests.Response) -> None: + """Handle HTTP errors from a response.""" + if response.status_code == 404: + print("Resource not found. Check the requested resource or permissions.") + elif response.status_code == 403: + print("Access forbidden. Ensure token has the required scopes or check for rate limits.") + elif response.status_code == 401: + print("Unauthorized. Check if the token is valid or expired.") + else: + print(f"Failed to fetch data. Status code: {response.status_code}") + print("Error message:", response.text) + + +def github_token_check(token: str, session: CachedSession) -> Optional[Dict[str, Any]]: + """Validate the GitHub token by fetching user profile.""" + headers = {"Authorization": f"token {token}"} + response = session.get("https://api.github.com/user", headers=headers, timeout=REQUESTS_TIMEOUT) + if response.status_code == 200: + print("Token is valid. User data retrieved successfully.") + data: Dict[str, Any] = response.json() + return data + print(f"Failed to authenticate. Status code: {response.status_code}") + return None + + +def list_user_orgs(token: str, session: CachedSession) -> Optional[List[Dict[str, Any]]]: + """List all organizations the user is a member of.""" + rate_limit = get_rate_limit(token, session) + if rate_limit["rate"]["remaining"] == 0: + wait_for_rate_limit_reset(rate_limit["rate"]["reset"]) + headers = {"Authorization": f"token {token}"} + response = session.get("https://api.github.com/user/orgs", headers=headers, timeout=REQUESTS_TIMEOUT) + if response.status_code == 200: + print("Organizations retrieved successfully.") + data: List[Dict[str, Any]] = response.json() + return data + handle_response_errors(response) + return None + + +def get_repos(org: str, token: str, session: CachedSession) -> Optional[List[Dict[str, Any]]]: + """Fetch all repositories for a given organization.""" + rate_limit = get_rate_limit(token, session) + if rate_limit["rate"]["remaining"] == 0: + wait_for_rate_limit_reset(rate_limit["rate"]["reset"]) + repos: List[Dict[str, Any]] = [] + url = f"https://api.github.com/orgs/{org}/repos" + headers = {"Authorization": f"token {token}"} + while url: + response = session.get(url, headers=headers, timeout=REQUESTS_TIMEOUT) + if response.status_code == 200: + repos.extend(response.json()) + url = response.links.get("next", {}).get("url") + else: + handle_response_errors(response) + return None + return repos + + +def fetch_issues(org: str, token: str, session: CachedSession) -> Optional[List[Dict[str, Any]]]: + """Fetch all issues from all repositories in an organization, handling pagination and rate limits.""" + issues: List[Dict[str, Any]] = [] + repos = get_repos(org, token, session) + if not repos: + print("No repositories found or failed to fetch repositories.") + return None + + for repo in repos: + # Ensure the URL is constructed to fetch all issues (not just open ones) + url = repo["issues_url"].replace("{/number}", "?state=all") + while url: + rate_limit = get_rate_limit(token, session) # Check rate limit before each request + if rate_limit["rate"]["remaining"] == 0: + wait_for_rate_limit_reset(rate_limit["rate"]["reset"]) + + response = session.get(url, headers={"Authorization": f"token {token}"}, timeout=REQUESTS_TIMEOUT) + if response.status_code == 200: + issues.extend(response.json()) + links = response.links + url = links["next"]["url"] if "next" in links else None + else: + print(f"Failed to fetch issues for {repo['name']}. Status code: {response.status_code}") + print("Error message:", response.text) + return None + return issues + + +def sanitize_user_data(data: Dict[str, Any]) -> Dict[str, Any]: + """Recursively sanitize user data to keep only the user 'login'.""" + if isinstance(data, dict): + if "login" in data and set(data.keys()) - {"login"}: + return {"login": data["login"]} + else: + return {key: sanitize_user_data(value) for key, value in data.items()} + elif isinstance(data, list): + return [sanitize_user_data(item) for item in data] + return data + + +def remove_empty_values(data: Any) -> Any: + """Recursively remove keys with empty values from a dictionary or list.""" + if isinstance(data, dict): + return {k: remove_empty_values(v) for k, v in data.items() if v or isinstance(v, bool)} + elif isinstance(data, list): + return [remove_empty_values(item) for item in data if item or isinstance(item, bool)] + return data + + +def process_issues(issues: List[Dict[str, Any]], keys_to_remove: List[str]) -> List[Dict[str, Any]]: + """Process a list of issues to sanitize user information and remove empty values.""" + processed_issues: List[Dict[str, Any]] = [] + for issue in issues: + sanitized_issue = sanitize_user_data(issue) + cleaned_issue = remove_empty_values(sanitized_issue) + final_issue = remove_keys_from_dict(cleaned_issue, keys_to_remove) + processed_issues.append(final_issue) + return processed_issues + + +def fetch_pull_requests(org: str, token: str, session: CachedSession) -> Optional[List[Dict[str, Any]]]: + """Fetch all pull requests from all repositories in an organization, handling pagination and rate limits.""" + pull_requests: List[Dict[str, Any]] = [] + repos = get_repos(org, token, session) + if not repos: + print("No repositories found or failed to fetch repositories.") + return None + + for repo in repos: + url = f"{repo['url']}/pulls?state=all" + while url: + rate_limit = get_rate_limit(token, session) # Check rate limit before each request + if rate_limit["rate"]["remaining"] == 0: + wait_for_rate_limit_reset(rate_limit["rate"]["reset"]) + + response = session.get(url, headers={"Authorization": f"token {token}"}, timeout=REQUESTS_TIMEOUT) + if response.status_code == 200: + pull_requests.extend(response.json()) + links = response.links + url = links["next"]["url"] if "next" in links else None + else: + print(f"Failed to fetch pull requests for {repo['name']}. Status code: {response.status_code}") + print("Error message:", response.text) + return None + return pull_requests + + +def process_pull_requests(pull_requests: List[Dict[str, Any]], keys_to_remove: List[str]) -> List[Dict[str, Any]]: + """Process a list of pull requests to sanitize user information and remove empty values.""" + processed_pull_requests: List[Dict[str, Any]] = [] + for pr in pull_requests: + sanitized_pr = sanitize_user_data(pr) + cleaned_pr = remove_empty_values(sanitized_pr) + final_pr = remove_keys_from_dict(cleaned_pr, keys_to_remove) + processed_pull_requests.append(final_pr) + return processed_pull_requests + + +def fetch_all_comments(org: str, token: str, session: CachedSession) -> Optional[List[Dict[str, Any]]]: + """Fetch all comments from all repositories in an organization, + distinguishing between issue and PR comments, while handling pagination and rate limits.""" + all_comments: List[Dict[str, Any]] = [] + repos = get_repos(org, token, session) + if not repos: + print("No repositories found or failed to fetch repositories.") + return None + + for repo in repos: + # Adjusting per_page to fetch more comments per request if needed + url = f"{repo['url']}/issues/comments?per_page=100" + while url: + rate_limit = get_rate_limit(token, session) # Check rate limit before each request + if rate_limit["rate"]["remaining"] == 0: + wait_for_rate_limit_reset(rate_limit["rate"]["reset"]) + + response = session.get(url, headers={"Authorization": f"token {token}"}, timeout=REQUESTS_TIMEOUT) + if response.status_code == 200: + comments = response.json() + for comment in comments: + if "pull_request" in comment: + comment["type"] = "pull_request" + else: + comment["type"] = "issue" + all_comments.extend(comments) + links = response.links + url = links["next"]["url"] if "next" in links else None + else: + print(f"Failed to fetch comments for {repo['name']}. Status code: {response.status_code}") + print("Error message:", response.text) + return None + return all_comments + + +def process_comments(comments: List[Dict[str, Any]], keys_to_remove: List[str]) -> List[Dict[str, Any]]: + """Process a list of comments to sanitize user information and remove empty values.""" + processed_comments: List[Dict[str, Any]] = [] + for comment in comments: + sanitized_comment = sanitize_user_data(comment) + cleaned_comment = remove_empty_values(sanitized_comment) + final_comment = remove_keys_from_dict(cleaned_comment, keys_to_remove) + processed_comments.append(final_comment) + return processed_comments + + +def fetch_all_discussions(org: str, token: str, session: CachedSession) -> Optional[List[Dict[str, Any]]]: + """Fetch discussions from all repositories in the specified organization.""" + all_discussions: List[Dict[str, Any]] = [] + repos = get_repos(org, token, session) + if repos: + for repo in repos: + repo_name = repo["name"] if isinstance(repo, dict) else repo + print(f"Fetching discussions for repository: {repo_name}") + discussions = fetch_discussions_graphql(org, repo_name, token) + if discussions: + all_discussions.extend(discussions) + else: + print(f"No discussions found or an error occurred for repository: {repo_name}") + return all_discussions + + +def fetch_discussions_graphql(org: str, repo: str, token: str) -> Optional[List[Dict[str, Any]]]: + """Fetch discussions using GitHub's GraphQL API.""" + url = "https://api.github.com/graphql" + headers = {"Authorization": f"Bearer {token}"} + query = """ + query FetchDiscussions($org: String!, $repo: String!) { + repository(owner: $org, name: $repo) { + discussions(first: 100) { + nodes { + number + title + url + bodyText + createdAt + updatedAt + author { + login + } + labels(first: 10) { + nodes { + name + description + } + } + } + } + } + } + """ + variables = {"org": org, "repo": repo} + # Added a timeout of 10 seconds + response = requests.post(url, json={"query": query, "variables": variables}, headers=headers, timeout=10) + if response.status_code == 200: + data: Any = response.json() + if "errors" in data: + print(f"GraphQL Errors: {json.dumps(data['errors'], indent=2)}") + nodes: Optional[List[Dict[str, Any]]] = ( + data.get("data", {}).get("repository", {}).get("discussions", {}).get("nodes", []) + ) + return nodes + print(f"Failed to fetch discussions. Status code: {response.status_code}") + print("Response: ", response.text) + return None + + +def process_discussions(discussions: List[Dict[str, Any]], keys_to_remove: List[str]) -> List[Dict[str, Any]]: + """Process a list of discussions to sanitize user information, remove empty values, and remove specified keys.""" + processed_discussions: List[Dict[str, Any]] = [] + for discussion in discussions: + sanitized_discussion = sanitize_user_data(discussion) + cleaned_discussion = remove_empty_values(sanitized_discussion) + final_discussion = remove_keys_from_dict(cleaned_discussion, keys_to_remove) + processed_discussions.append(final_discussion) + return processed_discussions diff --git a/llm_github/execute.py b/llm_github/execute.py new file mode 100644 index 0000000..0006db2 --- /dev/null +++ b/llm_github/execute.py @@ -0,0 +1,74 @@ +import os +from typing import Dict, List, Optional + +from dotenv import load_dotenv +from requests_cache import CachedSession +from requests_cache.backends.sqlite import SQLiteCache + +# Fixing import conflicts by adjusting namespace and avoiding re-importing CachedSession +from llm_github.core import ( + DEFAULT_DROPPED_FIELDS, + EnvironmentVariableError, + fetch_all_comments, + fetch_all_discussions, + fetch_issues, + fetch_pull_requests, + get_repos, + github_token_check, + list_user_orgs, + process_comments, + process_discussions, + process_issues, + process_pull_requests, + write_json_to_file, +) + +# Load environment variables from .env file +load_dotenv(dotenv_path="local/.env", verbose=True) + +# Global access token for GitHub API +global_token: str = os.getenv("GITHUB_TOKEN", "") +if not global_token: + raise EnvironmentVariableError("GITHUB_TOKEN") +print("Token loaded successfully.") + +# Set up cache with SQLite backend +session: CachedSession = CachedSession( + cache_name="llm-github-cache", + backend=SQLiteCache("llm-github.sqlite", timeout=86400), # Cache expires after 24 hours +) + +user_data: Optional[Dict] = github_token_check(global_token, session=session) +orgs: Optional[List[Dict]] = list_user_orgs(global_token, session=session) + +org_name: str = "microbiomedata" + +print("FETCHING REPOS") +repos: Optional[List[Dict]] = get_repos(org_name, global_token, session=session) +if repos: + write_json_to_file(repos, f"{org_name}_repos.json") + +print("FETCHING ISSUES") +org_issues: Optional[List[Dict]] = fetch_issues(org_name, global_token, session=session) +if org_issues: + sanitized_issues: List[Dict] = process_issues(org_issues, DEFAULT_DROPPED_FIELDS) + write_json_to_file(sanitized_issues, f"{org_name}_issues.json") + +print("FETCHING PRs") +pull_requests: Optional[List[Dict]] = fetch_pull_requests(org_name, global_token, session=session) +if pull_requests: + processed_pull_requests: List[Dict] = process_pull_requests(pull_requests, DEFAULT_DROPPED_FIELDS) + write_json_to_file(processed_pull_requests, f"{org_name}_prs.json") + +print("FETCHING COMMENTS") +comments: Optional[List[Dict]] = fetch_all_comments(org_name, global_token, session=session) +if comments: + processed_comments: List[Dict] = process_comments(comments, DEFAULT_DROPPED_FIELDS) + write_json_to_file(processed_comments, f"{org_name}_comments.json") + +print("FETCHING DISCUSSIONS") +all_discussions: Optional[List[Dict]] = fetch_all_discussions(org_name, global_token, session=session) +if all_discussions: + processed_discussions: List[Dict] = process_discussions(all_discussions, DEFAULT_DROPPED_FIELDS) + print(f"Total discussions fetched from all repositories: {len(processed_discussions)}") + write_json_to_file(processed_discussions, f"{org_name}_discussions.json") diff --git a/llm_github/foo.py b/llm_github/foo.py deleted file mode 100644 index 8b7396d..0000000 --- a/llm_github/foo.py +++ /dev/null @@ -1,17 +0,0 @@ -def foo(bar: str) -> str: - """Summary line. - - Extended description of function. - - Args: - bar: Description of input argument. - - Returns: - Description of return value - """ - - return bar - - -if __name__ == "__main__": # pragma: no cover - pass diff --git a/local/.env.template b/local/.env.template index 74d0a43..3b926cd 100644 --- a/local/.env.template +++ b/local/.env.template @@ -1 +1 @@ -foo=bar +GITHUB_TOKEN= diff --git a/mkdocs.yml b/mkdocs.yml index 212a5b1..c8cfd91 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -15,9 +15,9 @@ plugins: - mkdocstrings: handlers: python: - setup_commands: - - import sys - - sys.path.append('../') + setup_commands: + - import sys + - sys.path.append('../') theme: name: material feature: diff --git a/poetry.lock b/poetry.lock index 1586a71..fc850bb 100644 --- a/poetry.lock +++ b/poetry.lock @@ -15,6 +15,25 @@ files = [ six = ">=1.6.1,<2.0" wheel = ">=0.23.0,<1.0" +[[package]] +name = "attrs" +version = "23.2.0" +description = "Classes Without Boilerplate" +optional = false +python-versions = ">=3.7" +files = [ + {file = "attrs-23.2.0-py3-none-any.whl", hash = "sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1"}, + {file = "attrs-23.2.0.tar.gz", hash = "sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30"}, +] + +[package.extras] +cov = ["attrs[tests]", "coverage[toml] (>=5.3)"] +dev = ["attrs[tests]", "pre-commit"] +docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"] +tests = ["attrs[tests-no-zope]", "zope-interface"] +tests-mypy = ["mypy (>=1.6)", "pytest-mypy-plugins"] +tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist[psutil]"] + [[package]] name = "babel" version = "2.15.0" @@ -43,6 +62,31 @@ files = [ {file = "cachetools-5.3.3.tar.gz", hash = "sha256:ba29e2dfa0b8b556606f097407ed1aa62080ee108ab0dc5ec9d6a723a007d105"}, ] +[[package]] +name = "cattrs" +version = "23.2.3" +description = "Composable complex class support for attrs and dataclasses." +optional = false +python-versions = ">=3.8" +files = [ + {file = "cattrs-23.2.3-py3-none-any.whl", hash = "sha256:0341994d94971052e9ee70662542699a3162ea1e0c62f7ce1b4a57f563685108"}, + {file = "cattrs-23.2.3.tar.gz", hash = "sha256:a934090d95abaa9e911dac357e3a8699e0b4b14f8529bcc7d2b1ad9d51672b9f"}, +] + +[package.dependencies] +attrs = ">=23.1.0" +exceptiongroup = {version = ">=1.1.1", markers = "python_version < \"3.11\""} +typing-extensions = {version = ">=4.1.0,<4.6.3 || >4.6.3", markers = "python_version < \"3.11\""} + +[package.extras] +bson = ["pymongo (>=4.4.0)"] +cbor2 = ["cbor2 (>=5.4.6)"] +msgpack = ["msgpack (>=1.0.5)"] +orjson = ["orjson (>=3.9.2)"] +pyyaml = ["pyyaml (>=6.0)"] +tomlkit = ["tomlkit (>=0.11.8)"] +ujson = ["ujson (>=5.7.0)"] + [[package]] name = "certifi" version = "2024.6.2" @@ -929,6 +973,20 @@ files = [ [package.dependencies] six = ">=1.5" +[[package]] +name = "python-dotenv" +version = "1.0.1" +description = "Read key-value pairs from a .env file and set them as environment variables" +optional = false +python-versions = ">=3.8" +files = [ + {file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"}, + {file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"}, +] + +[package.extras] +cli = ["click (>=5.0)"] + [[package]] name = "pytz" version = "2024.1" @@ -1123,6 +1181,36 @@ urllib3 = ">=1.21.1,<3" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] +[[package]] +name = "requests-cache" +version = "1.2.1" +description = "A persistent cache for python requests" +optional = false +python-versions = ">=3.8" +files = [ + {file = "requests_cache-1.2.1-py3-none-any.whl", hash = "sha256:1285151cddf5331067baa82598afe2d47c7495a1334bfe7a7d329b43e9fd3603"}, + {file = "requests_cache-1.2.1.tar.gz", hash = "sha256:68abc986fdc5b8d0911318fbb5f7c80eebcd4d01bfacc6685ecf8876052511d1"}, +] + +[package.dependencies] +attrs = ">=21.2" +cattrs = ">=22.2" +platformdirs = ">=2.5" +requests = ">=2.22" +url-normalize = ">=1.4" +urllib3 = ">=1.25.5" + +[package.extras] +all = ["boto3 (>=1.15)", "botocore (>=1.18)", "itsdangerous (>=2.0)", "pymongo (>=3)", "pyyaml (>=6.0.1)", "redis (>=3)", "ujson (>=5.4)"] +bson = ["bson (>=0.5)"] +docs = ["furo (>=2023.3,<2024.0)", "linkify-it-py (>=2.0,<3.0)", "myst-parser (>=1.0,<2.0)", "sphinx (>=5.0.2,<6.0.0)", "sphinx-autodoc-typehints (>=1.19)", "sphinx-automodapi (>=0.14)", "sphinx-copybutton (>=0.5)", "sphinx-design (>=0.2)", "sphinx-notfound-page (>=0.8)", "sphinxcontrib-apidoc (>=0.3)", "sphinxext-opengraph (>=0.9)"] +dynamodb = ["boto3 (>=1.15)", "botocore (>=1.18)"] +json = ["ujson (>=5.4)"] +mongodb = ["pymongo (>=3)"] +redis = ["redis (>=3)"] +security = ["itsdangerous (>=2.0)"] +yaml = ["pyyaml (>=6.0.1)"] + [[package]] name = "six" version = "1.16.0" @@ -1172,6 +1260,20 @@ virtualenv = ">=20.25" docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-argparse-cli (>=1.11.1)", "sphinx-autodoc-typehints (>=1.25.2)", "sphinx-copybutton (>=0.5.2)", "sphinx-inline-tabs (>=2023.4.21)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.11)"] testing = ["build[virtualenv] (>=1.0.3)", "covdefaults (>=2.3)", "detect-test-pollution (>=1.2)", "devpi-process (>=1)", "diff-cover (>=8.0.2)", "distlib (>=0.3.8)", "flaky (>=3.7)", "hatch-vcs (>=0.4)", "hatchling (>=1.21)", "psutil (>=5.9.7)", "pytest (>=7.4.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-xdist (>=3.5)", "re-assert (>=1.1)", "time-machine (>=2.13)", "wheel (>=0.42)"] +[[package]] +name = "types-requests" +version = "2.32.0.20240622" +description = "Typing stubs for requests" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-requests-2.32.0.20240622.tar.gz", hash = "sha256:ed5e8a412fcc39159d6319385c009d642845f250c63902718f605cd90faade31"}, + {file = "types_requests-2.32.0.20240622-py3-none-any.whl", hash = "sha256:97bac6b54b5bd4cf91d407e62f0932a74821bc2211f22116d9ee1dd643826caf"}, +] + +[package.dependencies] +urllib3 = ">=2" + [[package]] name = "typing-extensions" version = "4.12.2" @@ -1183,6 +1285,20 @@ files = [ {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, ] +[[package]] +name = "url-normalize" +version = "1.4.3" +description = "URL normalization for Python" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" +files = [ + {file = "url-normalize-1.4.3.tar.gz", hash = "sha256:d23d3a070ac52a67b83a1c59a0e68f8608d1cd538783b401bc9de2c0fac999b2"}, + {file = "url_normalize-1.4.3-py2.py3-none-any.whl", hash = "sha256:ec3c301f04e5bb676d333a7fa162fa977ad2ca04b7e652bfc9fac4e405728eed"}, +] + +[package.dependencies] +six = "*" + [[package]] name = "urllib3" version = "2.2.2" @@ -1296,4 +1412,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = ">=3.8,<4.0" -content-hash = "a3b14f6f7cdd13dce3b5a26933ebce280805aba59061c1f199e1e5f2ad527883" +content-hash = "31ee1c4d060296bd27aa244fd375dceedd6a70f27cc03f0d7537ce15f8e18739" diff --git a/pyproject.toml b/pyproject.toml index 09d370a..a63dda9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,16 +2,20 @@ name = "llm_github" version = "0.0.1" description = "Tools for extracting knowledge from GitHub issues, PR comments, etc." -authors = ["Mark Andrew Miller "] +authors = ["Mark Andrew Miller "] repository = "https://github.com/turbomam/llm-github" documentation = "https://turbomam.github.io/llm-github/" readme = "README.md" packages = [ - {include = "llm_github"} + { include = "llm_github" } ] [tool.poetry.dependencies] python = ">=3.8,<4.0" +python-dotenv = "^1.0.1" +requests-cache = "^1.2.1" +requests = "^2.32.3" +typing-extensions = "^4.12.2" [tool.poetry.group.dev.dependencies] pytest = "^7.2.0" @@ -20,11 +24,12 @@ deptry = "^0.12.0" mypy = "^1.5.1" pre-commit = "^3.4.0" tox = "^4.11.1" +types-requests = "^2.32.0.20240622" [tool.poetry.group.docs.dependencies] mkdocs = "^1.4.2" mkdocs-material = "^9.2.7" -mkdocstrings = {extras = ["python"], version = "^0.23.0"} +mkdocstrings = { extras = ["python"], version = "^0.23.0" } [build-system] requires = ["poetry-core>=1.0.0"] @@ -32,13 +37,13 @@ build-backend = "poetry.core.masonry.api" [tool.mypy] files = ["llm_github"] -disallow_untyped_defs = "True" -disallow_any_unimported = "True" -no_implicit_optional = "True" -check_untyped_defs = "True" -warn_return_any = "True" -warn_unused_ignores = "True" -show_error_codes = "True" +disallow_untyped_defs = true +disallow_any_unimported = true +no_implicit_optional = true +check_untyped_defs = true +warn_return_any = true +warn_unused_ignores = true +show_error_codes = true [tool.pytest.ini_options] testpaths = ["tests"] @@ -48,41 +53,25 @@ target-version = "py37" line-length = 120 fix = true select = [ - # flake8-2020 "YTT", - # flake8-bandit "S", - # flake8-bugbear "B", - # flake8-builtins "A", - # flake8-comprehensions "C4", - # flake8-debugger "T10", - # flake8-simplify "SIM", - # isort "I", - # mccabe "C90", - # pycodestyle - "E", "W", - # pyflakes + "E", + "W", "F", - # pygrep-hooks "PGH", - # pyupgrade "UP", - # ruff "RUF", - # tryceratops "TRY", ] ignore = [ - # LineTooLong "E501", - # DoNotAssignLambda "E731", ] @@ -96,6 +85,5 @@ skip_empty = true branch = true source = ["llm_github"] - [tool.ruff.per-file-ignores] "tests/*" = ["S101"] diff --git a/tests/test_core.py b/tests/test_core.py new file mode 100644 index 0000000..3f0a3b4 --- /dev/null +++ b/tests/test_core.py @@ -0,0 +1,20 @@ +from unittest.mock import Mock + +from llm_github.core import get_rate_limit, return_verbatim + + +def test_return_verbatim(): + assert return_verbatim("foo") == "foo" + + +def test_get_rate_limit(): + mock_session = Mock() + mock_session.get.return_value.status_code = 200 + mock_session.get.return_value.json.return_value = { + "rate": {"limit": 5000, "remaining": 4999, "reset": 1234567890, "used": 1} + } + + token = "fake_token" # noqa: S105 + result = get_rate_limit(token, mock_session) + expected = {"limit": 5000, "remaining": 4999, "reset": 1234567890, "used": 1} + assert result["rate"] == expected diff --git a/tests/test_foo.py b/tests/test_foo.py deleted file mode 100644 index 42c9b38..0000000 --- a/tests/test_foo.py +++ /dev/null @@ -1,5 +0,0 @@ -from llm_github.foo import foo - - -def test_foo(): - assert foo("foo") == "foo"