diff --git a/howto/spark/spark-mlrun-read-csv.ipynb b/howto/spark/spark-mlrun-read-csv.ipynb index 075bfca9..0da982fa 100644 --- a/howto/spark/spark-mlrun-read-csv.ipynb +++ b/howto/spark/spark-mlrun-read-csv.ipynb @@ -69,8 +69,7 @@ "outputs": [], "source": [ "def read_csv(context: MLClientCtx, \n", - " dataset: DataItem, \n", - " artifact_path):\n", + " dataset: DataItem):\n", " \"\"\"\n", " Read csv while using spark job and mlrun - generate serverless function\n", " --------------------------------------------------------------------------------------------\n", @@ -85,9 +84,6 @@ " the default location will be \"/v3io/projects/ \n", " which can be change by using mlrun.mount_v3io later in the function specs\n", " \n", - " artifact_path : String\n", - " path on which the outout/artifacts of the fucntion will be saved\n", - " \n", " Returns:\n", " logged_dataset : mlrun_artifact\n", " dataset will be logged into mlrun database as dataset artifact\n", @@ -115,8 +111,7 @@ " # log final report\n", " context.log_dataset(\"df_sample\", \n", " df=df_to_log,\n", - " format=\"csv\", index=False,\n", - " artifact_path=artifact_path)\n", + " format=\"csv\", index=False)\n", " \n", " spark.stop()" ] @@ -240,20 +235,20 @@ "> 2021-07-12 14:00:09,417 [info] Started building image: .mlrun/func-default-spark-mlrun-read-csv:latest\n", "E0712 14:00:51.928074 1 aws_credentials.go:77] while getting AWS credentials NoCredentialProviders: no valid providers in chain. Deprecated.\n", "\tFor verbose messaging see aws.Config.CredentialsChainVerboseErrors\n", - "\u001b[36mINFO\u001b[0m[0040] Retrieving image manifest datanode-registry.iguazio-platform.app.dev39.lab.iguazeng.com:80/iguazio/shell:3.0_b117_20210510150319 \n", - "\u001b[36mINFO\u001b[0m[0040] Retrieving image manifest datanode-registry.iguazio-platform.app.dev39.lab.iguazeng.com:80/iguazio/shell:3.0_b117_20210510150319 \n", - "\u001b[36mINFO\u001b[0m[0040] Built cross stage deps: map[] \n", - "\u001b[36mINFO\u001b[0m[0040] Retrieving image manifest datanode-registry.iguazio-platform.app.dev39.lab.iguazeng.com:80/iguazio/shell:3.0_b117_20210510150319 \n", - "\u001b[36mINFO\u001b[0m[0040] Retrieving image manifest datanode-registry.iguazio-platform.app.dev39.lab.iguazeng.com:80/iguazio/shell:3.0_b117_20210510150319 \n", - "\u001b[36mINFO\u001b[0m[0040] Executing 0 build triggers \n", - "\u001b[36mINFO\u001b[0m[0040] Unpacking rootfs as cmd RUN pip install matplotlib pyspark requires it. \n", - "\u001b[36mINFO\u001b[0m[0154] RUN pip install matplotlib pyspark \n", - "\u001b[36mINFO\u001b[0m[0154] Taking snapshot of full filesystem... \n", - "\u001b[36mINFO\u001b[0m[0179] cmd: /bin/sh \n", - "\u001b[36mINFO\u001b[0m[0179] args: [-c pip install matplotlib pyspark] \n", - "\u001b[36mINFO\u001b[0m[0179] util.Lookup returned: &{Uid:1000 Gid:1000 Username:iguazio Name: HomeDir:/igz} \n", - "\u001b[36mINFO\u001b[0m[0179] performing slow lookup of group ids for iguazio \n", - "\u001b[36mINFO\u001b[0m[0179] Running: [/bin/sh -c pip install matplotlib pyspark] \n", + "\u001B[36mINFO\u001B[0m[0040] Retrieving image manifest datanode-registry.iguazio-platform.app.dev39.lab.iguazeng.com:80/iguazio/shell:3.0_b117_20210510150319 \n", + "\u001B[36mINFO\u001B[0m[0040] Retrieving image manifest datanode-registry.iguazio-platform.app.dev39.lab.iguazeng.com:80/iguazio/shell:3.0_b117_20210510150319 \n", + "\u001B[36mINFO\u001B[0m[0040] Built cross stage deps: map[] \n", + "\u001B[36mINFO\u001B[0m[0040] Retrieving image manifest datanode-registry.iguazio-platform.app.dev39.lab.iguazeng.com:80/iguazio/shell:3.0_b117_20210510150319 \n", + "\u001B[36mINFO\u001B[0m[0040] Retrieving image manifest datanode-registry.iguazio-platform.app.dev39.lab.iguazeng.com:80/iguazio/shell:3.0_b117_20210510150319 \n", + "\u001B[36mINFO\u001B[0m[0040] Executing 0 build triggers \n", + "\u001B[36mINFO\u001B[0m[0040] Unpacking rootfs as cmd RUN pip install matplotlib pyspark requires it. \n", + "\u001B[36mINFO\u001B[0m[0154] RUN pip install matplotlib pyspark \n", + "\u001B[36mINFO\u001B[0m[0154] Taking snapshot of full filesystem... \n", + "\u001B[36mINFO\u001B[0m[0179] cmd: /bin/sh \n", + "\u001B[36mINFO\u001B[0m[0179] args: [-c pip install matplotlib pyspark] \n", + "\u001B[36mINFO\u001B[0m[0179] util.Lookup returned: &{Uid:1000 Gid:1000 Username:iguazio Name: HomeDir:/igz} \n", + "\u001B[36mINFO\u001B[0m[0179] performing slow lookup of group ids for iguazio \n", + "\u001B[36mINFO\u001B[0m[0179] Running: [/bin/sh -c pip install matplotlib pyspark] \n", "WARNING: The directory '/.cache/pip' or its parent directory is not owned or is not writable by the current user. The cache has been disabled. Check the permissions and owner of that directory. If executing pip with sudo, you may want sudo's -H flag.\n", "Collecting matplotlib\n", " Downloading matplotlib-3.4.2-cp37-cp37m-manylinux1_x86_64.whl (10.3 MB)\n", @@ -275,13 +270,13 @@ "Requirement already satisfied: six>=1.5 in /conda/lib/python3.7/site-packages (from python-dateutil>=2.7->matplotlib) (1.12.0)\n", "Installing collected packages: kiwisolver, pillow, python-dateutil, pyparsing, cycler, numpy, matplotlib, py4j\n", "Successfully installed cycler-0.10.0 kiwisolver-1.3.1 matplotlib-3.4.2 numpy-1.21.0 pillow-8.3.1 py4j-0.10.7 pyparsing-2.4.7 python-dateutil-2.8.1\n", - "\u001b[36mINFO\u001b[0m[0192] Taking snapshot of full filesystem... \n", - "\u001b[36mINFO\u001b[0m[0201] RUN python -m pip install \"mlrun[complete]==0.6.5-rc12\" \n", - "\u001b[36mINFO\u001b[0m[0201] cmd: /bin/sh \n", - "\u001b[36mINFO\u001b[0m[0201] args: [-c python -m pip install \"mlrun[complete]==0.6.5-rc12\"] \n", - "\u001b[36mINFO\u001b[0m[0201] util.Lookup returned: &{Uid:1000 Gid:1000 Username:iguazio Name: HomeDir:/igz} \n", - "\u001b[36mINFO\u001b[0m[0201] performing slow lookup of group ids for iguazio \n", - "\u001b[36mINFO\u001b[0m[0201] Running: [/bin/sh -c python -m pip install \"mlrun[complete]==0.6.5-rc12\"] \n", + "\u001B[36mINFO\u001B[0m[0192] Taking snapshot of full filesystem... \n", + "\u001B[36mINFO\u001B[0m[0201] RUN python -m pip install \"mlrun[complete]==0.6.5-rc12\" \n", + "\u001B[36mINFO\u001B[0m[0201] cmd: /bin/sh \n", + "\u001B[36mINFO\u001B[0m[0201] args: [-c python -m pip install \"mlrun[complete]==0.6.5-rc12\"] \n", + "\u001B[36mINFO\u001B[0m[0201] util.Lookup returned: &{Uid:1000 Gid:1000 Username:iguazio Name: HomeDir:/igz} \n", + "\u001B[36mINFO\u001B[0m[0201] performing slow lookup of group ids for iguazio \n", + "\u001B[36mINFO\u001B[0m[0201] Running: [/bin/sh -c python -m pip install \"mlrun[complete]==0.6.5-rc12\"] \n", "WARNING: The directory '/.cache/pip' or its parent directory is not owned or is not writable by the current user. The cache has been disabled. Check the permissions and owner of that directory. If executing pip with sudo, you may want sudo's -H flag.\n", "Collecting mlrun[complete]==0.6.5-rc12\n", " Downloading mlrun-0.6.5rc12-py3-none-any.whl (537 kB)\n", @@ -635,7 +630,7 @@ "google-api-core 1.31.0 requires six>=1.13.0, but you'll have six 1.12.0 which is incompatible.\n", "aiobotocore 1.3.2 requires botocore<1.20.107,>=1.20.106, but you'll have botocore 1.20.49 which is incompatible.\n", "Successfully installed Deprecated-1.2.12 GitPython-3.1.18 Mako-1.1.4 MarkupSafe-2.0.1 PyJWT-2.1.0 Send2Trash-1.7.1 adal-1.2.7 adlfs-0.7.7 aiobotocore-1.3.2 aiohttp-3.7.4.post0 aioitertools-0.7.1 alembic-1.5.8 argon2-cffi-20.1.0 async-generator-1.10 async-timeout-3.0.1 attrs-21.2.0 azure-common-1.1.27 azure-core-1.16.0 azure-datalake-store-0.0.52 azure-identity-1.6.0 azure-keyvault-secrets-4.3.0 azure-storage-blob-12.6.0 backcall-0.2.0 bleach-3.3.0 boto3-1.17.49 botocore-1.20.49 cachetools-4.2.2 click-7.1.2 cloudpickle-1.6.0 cryptography-3.3.2 dask-2.30.0 decorator-5.0.9 defusedxml-0.7.1 distributed-2.30.1 entrypoints-0.3 fastapi-0.62.0 fsspec-0.9.0 future-0.18.2 gitdb-4.0.7 google-api-core-1.31.0 google-auth-1.32.1 google-cloud-core-1.7.1 google-cloud-storage-1.40.0 google-crc32c-1.1.2 google-resumable-media-1.3.1 googleapis-common-protos-1.53.0 greenlet-1.1.0 grpcio-1.30.0 grpcio-tools-1.30.0 heapdict-1.0.1 humanfriendly-8.2 importlib-metadata-4.6.1 ipykernel-5.5.5 ipython-7.16.1 ipython-genutils-0.2.0 isodate-0.6.0 jedi-0.18.0 jinja2-3.0.1 jmespath-0.10.0 jsonschema-3.2.0 jupyter-client-6.1.12 jupyter-core-4.7.1 jupyterlab-pygments-0.1.2 kfp-1.0.4 kfp-server-api-1.6.0 kubernetes-11.0.0 mergedeep-1.3.4 mistune-0.8.4 mlrun-0.6.5rc12 msal-1.12.0 msal-extensions-0.3.0 msgpack-1.0.2 msrest-0.6.21 multidict-5.1.0 nbclient-0.5.3 nbconvert-6.1.0 nbformat-5.1.3 nest-asyncio-1.5.1 notebook-6.4.0 nuclio-jupyter-0.8.16 numpy-1.19.5 oauthlib-3.1.1 orjson-3.3.1 packaging-21.0 pandas-1.3.0 pandocfilters-1.4.3 parso-0.8.2 pexpect-4.8.0 pickleshare-0.7.5 portalocker-1.7.1 prometheus-client-0.11.0 prompt-toolkit-3.0.19 protobuf-3.17.3 psutil-5.8.0 ptyprocess-0.7.0 pyarrow-1.0.1 pyasn1-0.4.8 pyasn1-modules-0.2.8 pydantic-1.8.2 pygments-2.9.0 pyrsistent-0.18.0 python-editor-1.0.4 pytz-2021.1 pyyaml-5.4.1 pyzmq-22.1.0 requests-2.25.1 requests-oauthlib-1.3.0 requests-toolbelt-0.9.1 rsa-4.7.2 s3fs-0.6.0 s3transfer-0.3.7 semver-2.13.0 smmap-4.0.0 sortedcontainers-2.4.0 sqlalchemy-1.4.20 starlette-0.13.6 storey-0.6.10 strip-hints-0.1.9 tabulate-0.8.3 tblib-1.7.0 terminado-0.10.1 testpath-0.5.0 toolz-0.11.1 tornado-6.1 traitlets-5.0.5 typing-extensions-3.10.0.0 ujson-4.0.2 urllib3-1.26.6 v3io-0.5.8 v3io-frames-0.8.15 v3iofs-0.1.7 wcwidth-0.2.5 webencodings-0.5.1 websocket-client-1.1.0 wrapt-1.12.1 yarl-1.6.3 zict-2.0.0 zipp-3.5.0\n", - "\u001b[36mINFO\u001b[0m[0288] Taking snapshot of full filesystem... \n" + "\u001B[36mINFO\u001B[0m[0288] Taking snapshot of full filesystem... \n" ] }, { @@ -949,4 +944,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file