Skip to content

Commit

Permalink
Merge pull request #225 from mlrun/0.10.x-dev
Browse files Browse the repository at this point in the history
remove redundant artifact path
  • Loading branch information
aviaIguazio authored Feb 10, 2022
2 parents 21c7f36 + 9996a36 commit ce0e06e
Showing 1 changed file with 25 additions and 30 deletions.
55 changes: 25 additions & 30 deletions howto/spark/spark-mlrun-read-csv.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,7 @@
"outputs": [],
"source": [
"def read_csv(context: MLClientCtx, \n",
" dataset: DataItem, \n",
" artifact_path):\n",
" dataset: DataItem):\n",
" \"\"\"\n",
" Read csv while using spark job and mlrun - generate serverless function\n",
" --------------------------------------------------------------------------------------------\n",
Expand All @@ -85,9 +84,6 @@
" the default location will be \"/v3io/projects/<file_name> \n",
" which can be change by using mlrun.mount_v3io later in the function specs\n",
" \n",
" artifact_path : String\n",
" path on which the outout/artifacts of the fucntion will be saved\n",
" \n",
" Returns:\n",
" logged_dataset : mlrun_artifact\n",
" dataset will be logged into mlrun database as dataset artifact\n",
Expand Down Expand Up @@ -115,8 +111,7 @@
" # log final report\n",
" context.log_dataset(\"df_sample\", \n",
" df=df_to_log,\n",
" format=\"csv\", index=False,\n",
" artifact_path=artifact_path)\n",
" format=\"csv\", index=False)\n",
" \n",
" spark.stop()"
]
Expand Down Expand Up @@ -240,20 +235,20 @@
"> 2021-07-12 14:00:09,417 [info] Started building image: .mlrun/func-default-spark-mlrun-read-csv:latest\n",
"E0712 14:00:51.928074 1 aws_credentials.go:77] while getting AWS credentials NoCredentialProviders: no valid providers in chain. Deprecated.\n",
"\tFor verbose messaging see aws.Config.CredentialsChainVerboseErrors\n",
"\u001b[36mINFO\u001b[0m[0040] Retrieving image manifest datanode-registry.iguazio-platform.app.dev39.lab.iguazeng.com:80/iguazio/shell:3.0_b117_20210510150319 \n",
"\u001b[36mINFO\u001b[0m[0040] Retrieving image manifest datanode-registry.iguazio-platform.app.dev39.lab.iguazeng.com:80/iguazio/shell:3.0_b117_20210510150319 \n",
"\u001b[36mINFO\u001b[0m[0040] Built cross stage deps: map[] \n",
"\u001b[36mINFO\u001b[0m[0040] Retrieving image manifest datanode-registry.iguazio-platform.app.dev39.lab.iguazeng.com:80/iguazio/shell:3.0_b117_20210510150319 \n",
"\u001b[36mINFO\u001b[0m[0040] Retrieving image manifest datanode-registry.iguazio-platform.app.dev39.lab.iguazeng.com:80/iguazio/shell:3.0_b117_20210510150319 \n",
"\u001b[36mINFO\u001b[0m[0040] Executing 0 build triggers \n",
"\u001b[36mINFO\u001b[0m[0040] Unpacking rootfs as cmd RUN pip install matplotlib pyspark requires it. \n",
"\u001b[36mINFO\u001b[0m[0154] RUN pip install matplotlib pyspark \n",
"\u001b[36mINFO\u001b[0m[0154] Taking snapshot of full filesystem... \n",
"\u001b[36mINFO\u001b[0m[0179] cmd: /bin/sh \n",
"\u001b[36mINFO\u001b[0m[0179] args: [-c pip install matplotlib pyspark] \n",
"\u001b[36mINFO\u001b[0m[0179] util.Lookup returned: &{Uid:1000 Gid:1000 Username:iguazio Name: HomeDir:/igz} \n",
"\u001b[36mINFO\u001b[0m[0179] performing slow lookup of group ids for iguazio \n",
"\u001b[36mINFO\u001b[0m[0179] Running: [/bin/sh -c pip install matplotlib pyspark] \n",
"\u001B[36mINFO\u001B[0m[0040] Retrieving image manifest datanode-registry.iguazio-platform.app.dev39.lab.iguazeng.com:80/iguazio/shell:3.0_b117_20210510150319 \n",
"\u001B[36mINFO\u001B[0m[0040] Retrieving image manifest datanode-registry.iguazio-platform.app.dev39.lab.iguazeng.com:80/iguazio/shell:3.0_b117_20210510150319 \n",
"\u001B[36mINFO\u001B[0m[0040] Built cross stage deps: map[] \n",
"\u001B[36mINFO\u001B[0m[0040] Retrieving image manifest datanode-registry.iguazio-platform.app.dev39.lab.iguazeng.com:80/iguazio/shell:3.0_b117_20210510150319 \n",
"\u001B[36mINFO\u001B[0m[0040] Retrieving image manifest datanode-registry.iguazio-platform.app.dev39.lab.iguazeng.com:80/iguazio/shell:3.0_b117_20210510150319 \n",
"\u001B[36mINFO\u001B[0m[0040] Executing 0 build triggers \n",
"\u001B[36mINFO\u001B[0m[0040] Unpacking rootfs as cmd RUN pip install matplotlib pyspark requires it. \n",
"\u001B[36mINFO\u001B[0m[0154] RUN pip install matplotlib pyspark \n",
"\u001B[36mINFO\u001B[0m[0154] Taking snapshot of full filesystem... \n",
"\u001B[36mINFO\u001B[0m[0179] cmd: /bin/sh \n",
"\u001B[36mINFO\u001B[0m[0179] args: [-c pip install matplotlib pyspark] \n",
"\u001B[36mINFO\u001B[0m[0179] util.Lookup returned: &{Uid:1000 Gid:1000 Username:iguazio Name: HomeDir:/igz} \n",
"\u001B[36mINFO\u001B[0m[0179] performing slow lookup of group ids for iguazio \n",
"\u001B[36mINFO\u001B[0m[0179] Running: [/bin/sh -c pip install matplotlib pyspark] \n",
"WARNING: The directory '/.cache/pip' or its parent directory is not owned or is not writable by the current user. The cache has been disabled. Check the permissions and owner of that directory. If executing pip with sudo, you may want sudo's -H flag.\n",
"Collecting matplotlib\n",
" Downloading matplotlib-3.4.2-cp37-cp37m-manylinux1_x86_64.whl (10.3 MB)\n",
Expand All @@ -275,13 +270,13 @@
"Requirement already satisfied: six>=1.5 in /conda/lib/python3.7/site-packages (from python-dateutil>=2.7->matplotlib) (1.12.0)\n",
"Installing collected packages: kiwisolver, pillow, python-dateutil, pyparsing, cycler, numpy, matplotlib, py4j\n",
"Successfully installed cycler-0.10.0 kiwisolver-1.3.1 matplotlib-3.4.2 numpy-1.21.0 pillow-8.3.1 py4j-0.10.7 pyparsing-2.4.7 python-dateutil-2.8.1\n",
"\u001b[36mINFO\u001b[0m[0192] Taking snapshot of full filesystem... \n",
"\u001b[36mINFO\u001b[0m[0201] RUN python -m pip install \"mlrun[complete]==0.6.5-rc12\" \n",
"\u001b[36mINFO\u001b[0m[0201] cmd: /bin/sh \n",
"\u001b[36mINFO\u001b[0m[0201] args: [-c python -m pip install \"mlrun[complete]==0.6.5-rc12\"] \n",
"\u001b[36mINFO\u001b[0m[0201] util.Lookup returned: &{Uid:1000 Gid:1000 Username:iguazio Name: HomeDir:/igz} \n",
"\u001b[36mINFO\u001b[0m[0201] performing slow lookup of group ids for iguazio \n",
"\u001b[36mINFO\u001b[0m[0201] Running: [/bin/sh -c python -m pip install \"mlrun[complete]==0.6.5-rc12\"] \n",
"\u001B[36mINFO\u001B[0m[0192] Taking snapshot of full filesystem... \n",
"\u001B[36mINFO\u001B[0m[0201] RUN python -m pip install \"mlrun[complete]==0.6.5-rc12\" \n",
"\u001B[36mINFO\u001B[0m[0201] cmd: /bin/sh \n",
"\u001B[36mINFO\u001B[0m[0201] args: [-c python -m pip install \"mlrun[complete]==0.6.5-rc12\"] \n",
"\u001B[36mINFO\u001B[0m[0201] util.Lookup returned: &{Uid:1000 Gid:1000 Username:iguazio Name: HomeDir:/igz} \n",
"\u001B[36mINFO\u001B[0m[0201] performing slow lookup of group ids for iguazio \n",
"\u001B[36mINFO\u001B[0m[0201] Running: [/bin/sh -c python -m pip install \"mlrun[complete]==0.6.5-rc12\"] \n",
"WARNING: The directory '/.cache/pip' or its parent directory is not owned or is not writable by the current user. The cache has been disabled. Check the permissions and owner of that directory. If executing pip with sudo, you may want sudo's -H flag.\n",
"Collecting mlrun[complete]==0.6.5-rc12\n",
" Downloading mlrun-0.6.5rc12-py3-none-any.whl (537 kB)\n",
Expand Down Expand Up @@ -635,7 +630,7 @@
"google-api-core 1.31.0 requires six>=1.13.0, but you'll have six 1.12.0 which is incompatible.\n",
"aiobotocore 1.3.2 requires botocore<1.20.107,>=1.20.106, but you'll have botocore 1.20.49 which is incompatible.\n",
"Successfully installed Deprecated-1.2.12 GitPython-3.1.18 Mako-1.1.4 MarkupSafe-2.0.1 PyJWT-2.1.0 Send2Trash-1.7.1 adal-1.2.7 adlfs-0.7.7 aiobotocore-1.3.2 aiohttp-3.7.4.post0 aioitertools-0.7.1 alembic-1.5.8 argon2-cffi-20.1.0 async-generator-1.10 async-timeout-3.0.1 attrs-21.2.0 azure-common-1.1.27 azure-core-1.16.0 azure-datalake-store-0.0.52 azure-identity-1.6.0 azure-keyvault-secrets-4.3.0 azure-storage-blob-12.6.0 backcall-0.2.0 bleach-3.3.0 boto3-1.17.49 botocore-1.20.49 cachetools-4.2.2 click-7.1.2 cloudpickle-1.6.0 cryptography-3.3.2 dask-2.30.0 decorator-5.0.9 defusedxml-0.7.1 distributed-2.30.1 entrypoints-0.3 fastapi-0.62.0 fsspec-0.9.0 future-0.18.2 gitdb-4.0.7 google-api-core-1.31.0 google-auth-1.32.1 google-cloud-core-1.7.1 google-cloud-storage-1.40.0 google-crc32c-1.1.2 google-resumable-media-1.3.1 googleapis-common-protos-1.53.0 greenlet-1.1.0 grpcio-1.30.0 grpcio-tools-1.30.0 heapdict-1.0.1 humanfriendly-8.2 importlib-metadata-4.6.1 ipykernel-5.5.5 ipython-7.16.1 ipython-genutils-0.2.0 isodate-0.6.0 jedi-0.18.0 jinja2-3.0.1 jmespath-0.10.0 jsonschema-3.2.0 jupyter-client-6.1.12 jupyter-core-4.7.1 jupyterlab-pygments-0.1.2 kfp-1.0.4 kfp-server-api-1.6.0 kubernetes-11.0.0 mergedeep-1.3.4 mistune-0.8.4 mlrun-0.6.5rc12 msal-1.12.0 msal-extensions-0.3.0 msgpack-1.0.2 msrest-0.6.21 multidict-5.1.0 nbclient-0.5.3 nbconvert-6.1.0 nbformat-5.1.3 nest-asyncio-1.5.1 notebook-6.4.0 nuclio-jupyter-0.8.16 numpy-1.19.5 oauthlib-3.1.1 orjson-3.3.1 packaging-21.0 pandas-1.3.0 pandocfilters-1.4.3 parso-0.8.2 pexpect-4.8.0 pickleshare-0.7.5 portalocker-1.7.1 prometheus-client-0.11.0 prompt-toolkit-3.0.19 protobuf-3.17.3 psutil-5.8.0 ptyprocess-0.7.0 pyarrow-1.0.1 pyasn1-0.4.8 pyasn1-modules-0.2.8 pydantic-1.8.2 pygments-2.9.0 pyrsistent-0.18.0 python-editor-1.0.4 pytz-2021.1 pyyaml-5.4.1 pyzmq-22.1.0 requests-2.25.1 requests-oauthlib-1.3.0 requests-toolbelt-0.9.1 rsa-4.7.2 s3fs-0.6.0 s3transfer-0.3.7 semver-2.13.0 smmap-4.0.0 sortedcontainers-2.4.0 sqlalchemy-1.4.20 starlette-0.13.6 storey-0.6.10 strip-hints-0.1.9 tabulate-0.8.3 tblib-1.7.0 terminado-0.10.1 testpath-0.5.0 toolz-0.11.1 tornado-6.1 traitlets-5.0.5 typing-extensions-3.10.0.0 ujson-4.0.2 urllib3-1.26.6 v3io-0.5.8 v3io-frames-0.8.15 v3iofs-0.1.7 wcwidth-0.2.5 webencodings-0.5.1 websocket-client-1.1.0 wrapt-1.12.1 yarl-1.6.3 zict-2.0.0 zipp-3.5.0\n",
"\u001b[36mINFO\u001b[0m[0288] Taking snapshot of full filesystem... \n"
"\u001B[36mINFO\u001B[0m[0288] Taking snapshot of full filesystem... \n"
]
},
{
Expand Down Expand Up @@ -949,4 +944,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}

0 comments on commit ce0e06e

Please sign in to comment.