From 40e0a3ef8717b8dc625ebde82b1e5b814936318a Mon Sep 17 00:00:00 2001 From: Anis Koubaa Date: Wed, 18 Dec 2024 12:44:28 +0100 Subject: [PATCH] after dbus client integration --- .vscode/launch.json | 46 --------------------- .vscode/settings.json | 7 ---- databusclient/cli.py | 9 ++-- databusclient/client.py | 69 +++++++++++++++++-------------- databusclient/consume/download.py | 2 +- test_oep.sh | 10 +++++ 6 files changed, 53 insertions(+), 90 deletions(-) delete mode 100644 .vscode/launch.json delete mode 100644 .vscode/settings.json create mode 100644 test_oep.sh diff --git a/.vscode/launch.json b/.vscode/launch.json deleted file mode 100644 index acb70ee..0000000 --- a/.vscode/launch.json +++ /dev/null @@ -1,46 +0,0 @@ -{ - // Use IntelliSense to learn about possible attributes. - // Hover to view descriptions of existing attributes. - // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 - "version": "0.2.0", - "configurations": [ - { - "name": "DebugPy: Run pytest module (manually select venv of poetry)", - "type": "debugpy", - "request": "launch", - // "pythonPath": "${workspaceFolder}/.venv/Scripts/python.exe", // need to set venv of poetry via vscode - "cwd": "${workspaceFolder}", - "module": "pytest", - "args": [ - ], - "console": "integratedTerminal", - "justMyCode": true, - }, - // run pytests with poetry - // { - // "name": "DebugPy: poetry run pytest", - // "type": "debugpy", - // "request": "launch", - // "program": "poetry", - // "args": [ - // "run pytest" - // ], - // // "env": { - // // "PYTHONPATH": "${workspaceFolder}" - // // }, - // "console": "integratedTerminal" - // }, - { - "name": "[Deprecated] Python: Run pytest module (manually select venv of poetry)", - "type": "python", - "request": "launch", - // "pythonPath": "${workspaceFolder}/.venv/Scripts/python.exe", - "cwd": "${workspaceFolder}", - "module": "pytest", - "args": [ - ], - "console": "integratedTerminal", - "justMyCode": false, - } - ] -} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 9b38853..0000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "python.testing.pytestArgs": [ - "tests" - ], - "python.testing.unittestEnabled": false, - "python.testing.pytestEnabled": true -} \ No newline at end of file diff --git a/databusclient/cli.py b/databusclient/cli.py index 3384323..9cd93e5 100644 --- a/databusclient/cli.py +++ b/databusclient/cli.py @@ -24,7 +24,7 @@ def deploy( "download URL and CV the " "key=value pairs (_ separated) content variants of a distribution. filext and compression are optional " "and if left out inferred from the path. If the sha256sum:contentlength part is left out it will be " - "calcuted by downloading the file.", + "calculated by downloading the file.", ), ): typer.echo(version_id) @@ -36,8 +36,9 @@ def deploy( @app.command() def download( - localDir: str = typer.Option(..., help="local databus folder"), + local_dir: str = typer.Option(..., help="local databus folder"), databus: str = typer.Option(..., help="databus URL"), - databusuris: List[str] = typer.Argument(...,help="any kind of these: databus identifier, databus collection identifier, query file") + databus_uris: List[str] = typer.Argument(..., help="any kind of these: databus identifier, databus " + "collection identifier, query file") ): - client.download(localDir=localDir,endpoint=databus,databusURIs=databusuris) + client.download(local_dir=local_dir, endpoint=databus, databus_uris=databus_uris) diff --git a/databusclient/client.py b/databusclient/client.py index 5cb5061..3e2afcc 100644 --- a/databusclient/client.py +++ b/databusclient/client.py @@ -180,9 +180,11 @@ def create_distribution( ) -> str: """Creates the identifier-string for a distribution used as downloadURLs in the createDataset function. url: is the URL of the dataset - cvs: dict of content variants identifying a certain distribution (needs to be unique for each distribution in the dataset) + cvs: dict of content variants identifying a certain distribution (needs to be unique for each distribution in the + dataset) file_format: identifier for the file format (e.g. json). If set to None client tries to infer it from the path - compression: identifier for the compression format (e.g. gzip). If set to None client tries to infer it from the path + compression: identifier for the compression format (e.g. gzip). If set to None client tries to infer it from the + path sha256_length_tuple: sha256sum and content_length of the file in the form of Tuple[shasum, length]. If left out file will be downloaded extra and calculated. """ @@ -219,14 +221,16 @@ def create_dataset( group_description: str = None, ) -> Dict[str, Union[List[Dict[str, Union[bool, str, int, float, List]]], str]]: """ - Creates a Databus Dataset as a python dict from distributions and submitted metadata. WARNING: If file stats (sha256sum, content length) - were not submitted, the client loads the files and calculates them. This can potentially take a lot of time, depending on the file size. + Creates a Databus Dataset as a python dict from distributions and submitted metadata. WARNING: If file stats + (sha256sum, content length) were not submitted, the client loads the files and calculates them. This can + potentially take a lot of time, depending on the file size. The result can be transformed to a JSON-LD by calling json.dumps(dataset). Parameters ---------- version_id: str - The version ID representing the Dataset. Needs to be in the form of $DATABUS_BASE/$ACCOUNT/$GROUP/$ARTIFACT/$VERSION + The version ID representing the Dataset. Needs to be in the form of + $DATABUS_BASE/$ACCOUNT/$GROUP/$ARTIFACT/$VERSION title: str The title text of the dataset abstract: str @@ -252,7 +256,7 @@ def create_dataset( _versionId = str(version_id).strip("/") _, account_name, group_name, artifact_name, version = _versionId.rsplit("/", 4) - # could be build from stuff above, + # could be built from stuff above, # was not sure if there are edge cases BASE=http://databus.example.org/"base"/... group_id = _versionId.rsplit("/", 2)[0] @@ -359,15 +363,16 @@ def deploy( Parameters ---------- dataid: Dict[str, Union[List[Dict[str, Union[bool, str, int, float, List]]], str]] - The dataid represented as a python dict. Preferably created by the creaateDataset function + The dataid represented as a python dict. Preferably created by the create Dataset function api_key: str the API key of the user noted in the Dataset identifier verify_parts: bool - flag of the publish POST request, prevents the databus from checking shasum and content length (is already handled by the client, reduces load on the Databus). Default is False + flag of the publish POST request, prevents the databus from checking shasum and content length (is already + handled by the client, reduces load on the Databus). Default is False log_level: DeployLogLevel - log level of the deploy output + log level of the deployment output debug: bool - controls whether output shold be printed to the console (stdout) + controls whether output should be printed to the console (stdout) """ headers = {"X-API-KEY": f"{api_key}", "Content-Type": "application/json"} @@ -401,14 +406,14 @@ def __download_file__(url, filename): - filename: the local file path where the file should be saved """ - print("download "+url) - os.makedirs(os.path.dirname(filename), exist_ok=True) # Create the necessary directories + print("download "+url) + os.makedirs(os.path.dirname(filename), exist_ok=True) # Create the necessary directories response = requests.get(url, stream=True) - total_size_in_bytes= int(response.headers.get('content-length', 0)) - block_size = 1024 # 1 Kibibyte + total_size_in_bytes = int(response.headers.get('content-length', 0)) + block_size = 1024 # 1 Kibibyte progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True) - with open(filename, 'wb') as file: + with open(filename, 'wb') as file: for data in response.iter_content(block_size): progress_bar.update(len(data)) file.write(data) @@ -417,7 +422,7 @@ def __download_file__(url, filename): print("ERROR, something went wrong") -def __query_sparql__(endpoint_url, query)-> dict: +def __query_sparql__(endpoint_url, query) -> dict: """ Query a SPARQL endpoint and return results in JSON format. @@ -437,7 +442,7 @@ def __query_sparql__(endpoint_url, query)-> dict: def __handle__databus_file_query__(endpoint_url, query) -> List[str]: - result_dict = __query_sparql__(endpoint_url,query) + result_dict = __query_sparql__(endpoint_url, query) for binding in result_dict['results']['bindings']: if len(binding.keys()) > 1: print("Error multiple bindings in query response") @@ -451,41 +456,41 @@ def wsha256(raw: str): return sha256(raw.encode('utf-8')).hexdigest() -def __handle_databus_collection__(endpoint, uri: str)-> str: +def __handle_databus_collection__(endpoint, uri: str) -> str: headers = {"Accept": "text/sparql"} return requests.get(uri, headers=headers).text -def __download_list__(urls: List[str], localDir: str): +def __download_list__(urls: List[str], local_dir: str): for url in urls: - __download_file__(url=url,filename=localDir+"/"+wsha256(url)) + __download_file__(url=url, filename=local_dir+"/"+wsha256(url)) def download( - localDir: str, + local_dir: str, endpoint: str, - databusURIs: List[str] + databus_uris: List[str] ) -> None: """ Download datasets to local storage from databus registry ------ localDir: the local directory - databusURIs: identifiers to access databus registered datasets + databus_uris: identifiers to access databus registered datasets """ - for databusURI in databusURIs: + for databus_uri in databus_uris: # dataID or databus collection - if databusURI.startswith("http://") or databusURI.startswith("https://"): + if databus_uri.startswith("http://") or databus_uri.startswith("https://"): # databus collection - if "/collections/" in databusURI: #TODO "in" is not safe! there could be an artifact named collections, need to check for the correct part position in the URI - query = __handle_databus_collection__(endpoint,databusURI) + if "/collections/" in databus_uri: # TODO "in" is not safe! there could be an artifact named collections, need to check for the correct part position in the URI + query = __handle_databus_collection__(endpoint, databus_uri) res = __handle__databus_file_query__(endpoint, query) else: - print("dataId not supported yet") #TODO add support for other DatabusIds here (artifact, group, etc.) + print("dataId not supported yet") # TODO add support for other DatabusIds here (artifact, group, etc.) # query in local file - elif databusURI.startswith("file://"): + elif databus_uri.startswith("file://"): print("query in file not supported yet") # query as argument else: - print("QUERY {}", databusURI.replace("\n"," ")) - res = __handle__databus_file_query__(endpoint,databusURI) - __download_list__(res,localDir) \ No newline at end of file + print("QUERY {}", databus_uri.replace("\n", " ")) + res = __handle__databus_file_query__(endpoint, databus_uri) + __download_list__(res, local_dir) diff --git a/databusclient/consume/download.py b/databusclient/consume/download.py index a1bbd8a..ea91cf1 100644 --- a/databusclient/consume/download.py +++ b/databusclient/consume/download.py @@ -1,4 +1,4 @@ -### All kind of download functionalities for Databus ### +# All kind of download functionalities for Databus ### class Downloder: pass diff --git a/test_oep.sh b/test_oep.sh new file mode 100644 index 0000000..78b6ade --- /dev/null +++ b/test_oep.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +databusclient deploy \ + --version-id "https://databus.openenergyplatform.org/koubaa-hmc/active_photovoltaic/testArtifact/1.0-alpha/" \ + --title "Test Title" \ + --abstract "Test Abstract" \ + --description "Test Description" \ + --license-uri "http://dalicc.net/licenselibrary/AdaptivePublicLicense10" \ + --apikey "ddac53f3-27e7-4abb-8f22-0f106406c525" \ + "https://raw.githubusercontent.com/dbpedia/databus/master/server/app/api/swagger.yml|type=swagger"