diff --git a/warehouse/Dockerfile b/warehouse/Dockerfile index 9bfccfe581..5d7129b652 100644 --- a/warehouse/Dockerfile +++ b/warehouse/Dockerfile @@ -21,6 +21,7 @@ RUN poetry export -f requirements.txt --without-hashes --output requirements.txt COPY ./dbt_project.yml /app/dbt_project.yml COPY ./packages.yml /app/packages.yml +COPY ./profiles.yml /app/profiles.yml RUN dbt deps COPY . /app diff --git a/warehouse/models/mart/transit_database/_mart_transit_database.yml b/warehouse/models/mart/transit_database/_mart_transit_database.yml index 1be1047ecd..cafcabbf16 100644 --- a/warehouse/models/mart/transit_database/_mart_transit_database.yml +++ b/warehouse/models/mart/transit_database/_mart_transit_database.yml @@ -386,7 +386,6 @@ models: For best results, join with reference to a specific date, and make sure to choose a specific output grain (organizations, services, customer-facing vs. not); you will likely need to filter or group to get the desired output. - tests: columns: - *key - name: service_key diff --git a/warehouse/models/staging/audit/stg_audit__cloudaudit_googleapis_com_data_access.sql b/warehouse/models/staging/audit/stg_audit__cloudaudit_googleapis_com_data_access.sql index d195684b66..14118dc821 100644 --- a/warehouse/models/staging/audit/stg_audit__cloudaudit_googleapis_com_data_access.sql +++ b/warehouse/models/staging/audit/stg_audit__cloudaudit_googleapis_com_data_access.sql @@ -7,7 +7,6 @@ 'data_type': 'date', 'granularity': 'day', }, - partitions=['current_date()'], cluster_by='job_type', ) }} @@ -21,13 +20,15 @@ WITH latest AS ( FROM cal-itp-data-infra.audit.cloudaudit_googleapis_com_data_access_{{ yesterday.strftime('%Y%m%d') }} ), -everything AS ( - {% set start_date = modules.datetime.date(year=2022, month=4, day=11) %} - {% set days = (modules.datetime.date.today() - start_date).days + 1 %} +everything AS ( -- noqa: ST03 + -- without this limited lookback, we'd eventually exhaust query resources on full refreshes + -- since we might end up unioning hundreds of tables + -- technically we have data back to 2022-04-11 + {% set days = 90 %} - {% for add in range(days) %} + {% for day in range(days) %} - {% set current = start_date + modules.datetime.timedelta(days=add) %} + {% set current = modules.datetime.date.today() - modules.datetime.timedelta(days=day) %} SELECT * FROM cal-itp-data-infra.audit.cloudaudit_googleapis_com_data_access_{{ current.strftime('%Y%m%d') }} @@ -79,9 +80,9 @@ stg_audit__cloudaudit_googleapis_com_data_access AS ( SECOND ) AS duration_in_seconds, JSON_VALUE_ARRAY(job, '$.jobStats.queryStats.referencedTables') as referenced_tables, - CAST(JSON_VALUE(job, '$.jobStats.queryStats.totalBilledBytes') AS INT64) AS total_billed_bytes, - 5.0 * CAST(JSON_VALUE(job, '$.jobStats.queryStats.totalBilledBytes') AS INT64) / POWER(2, 40) AS estimated_cost_usd, -- $5/TB - CAST(JSON_VALUE(job, '$.jobStats.totalSlotMs') AS INT64) / 1000 AS total_slots_seconds, + CAST(JSON_VALUE(job, '$.jobStats.queryStats.totalBilledBytes') AS int64) AS total_billed_bytes, + 5.0 * CAST(JSON_VALUE(job, '$.jobStats.queryStats.totalBilledBytes') AS int64) / POWER(2, 40) AS estimated_cost_usd, -- $5/TB + CAST(JSON_VALUE(job, '$.jobStats.totalSlotMs') AS int64) / 1000 AS total_slots_seconds, JSON_VALUE(metadata, '$.tableDataRead.jobName') as table_data_read_job_name, diff --git a/warehouse/mypy.ini b/warehouse/mypy.ini new file mode 100644 index 0000000000..4b4f17fb42 --- /dev/null +++ b/warehouse/mypy.ini @@ -0,0 +1,3 @@ +[mypy] +plugins = pydantic.mypy, sqlmypy +disable_error_code = assignment diff --git a/warehouse/poetry.lock b/warehouse/poetry.lock index 26ea03ff3a..0e1fab5451 100644 --- a/warehouse/poetry.lock +++ b/warehouse/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry and should not be changed by hand. +# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand. [[package]] name = "agate" @@ -274,24 +274,6 @@ files = [ {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"}, ] -[[package]] -name = "betterproto" -version = "1.2.5" -description = "A better Protobuf / gRPC generator & library" -category = "main" -optional = false -python-versions = ">=3.6" -files = [ - {file = "betterproto-1.2.5.tar.gz", hash = "sha256:74a3ab34646054f674d236d1229ba8182dc2eae86feb249b8590ef496ce9803d"}, -] - -[package.dependencies] -grpclib = "*" -stringcase = "*" - -[package.extras] -compiler = ["black", "jinja2", "protobuf"] - [[package]] name = "black" version = "22.12.0" @@ -729,57 +711,58 @@ pyarrow = ">=3.0.0" [[package]] name = "dbt-bigquery" -version = "1.4.3" +version = "1.6.0b1" description = "The Bigquery adapter plugin for dbt" category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "dbt-bigquery-1.4.3.tar.gz", hash = "sha256:af85c41cdf7efd71eee9a460f5b51d8c59ca903efbd2c6974d1e159c3e871489"}, - {file = "dbt_bigquery-1.4.3-py3-none-any.whl", hash = "sha256:7f6a7bf40492e68c2782410cf8e118321c4f3844c577d2e7c3c0b79acb307add"}, + {file = "dbt-bigquery-1.6.0b1.tar.gz", hash = "sha256:1481fc4489cb068ac9f021c5b4be7599272c82c64597d5f5d5b2a872dd8beaec"}, + {file = "dbt_bigquery-1.6.0b1-py3-none-any.whl", hash = "sha256:ded7abdd617b46f6e81f50227c50e8e09a18ca01af655ac7c88e60680421c519"}, ] [package.dependencies] agate = ">=1.6.3,<1.7.0" -dbt-core = ">=1.4.0,<1.5.0" +dbt-core = ">=1.6.0b1,<1.7.0" google-cloud-bigquery = ">=3.0,<4.0" google-cloud-dataproc = ">=5.0,<6.0" google-cloud-storage = ">=2.4,<3.0" [[package]] name = "dbt-core" -version = "1.4.5" +version = "1.6.0b1" description = "With dbt, data analysts and engineers can build analytics the way engineers build applications." category = "main" optional = false python-versions = ">=3.7.2" files = [ - {file = "dbt-core-1.4.5.tar.gz", hash = "sha256:d32a322c192a84f41c27e8e94758c3062d2796a2e0ef32056ae5fc5d4d5c2298"}, - {file = "dbt_core-1.4.5-py3-none-any.whl", hash = "sha256:361248e7a629d2746e18c31fa459afb607060fb8cb1e2a76706c36ce5ab9b534"}, + {file = "dbt-core-1.6.0b1.tar.gz", hash = "sha256:78e0a7b635b7a0cdad9d614672bdb16cd120eeb2743e72766ee026ad75b526ed"}, + {file = "dbt_core-1.6.0b1-py3-none-any.whl", hash = "sha256:e853ab215111c3143f4071ba17e7cc05b6513fb4b5aaa5da065f532c8f48317f"}, ] [package.dependencies] agate = ">=1.6,<1.7.1" -betterproto = "1.2.5" cffi = ">=1.9,<2.0.0" click = ">=7.0,<9" colorama = ">=0.3.9,<0.4.7" dbt-extractor = ">=0.4.1,<0.5.0" -hologram = ">=0.0.14,<=0.0.15" +hologram = ">=0.0.14,<=0.0.16" idna = ">=2.5,<4" isodate = ">=0.6,<0.7" Jinja2 = "3.1.2" logbook = ">=1.5,<1.6" -mashumaro = {version = "3.3.1", extras = ["msgpack"]} +mashumaro = {version = "3.6", extras = ["msgpack"]} minimal-snowplow-tracker = "0.0.2" networkx = {version = ">=2.3,<3", markers = "python_version >= \"3.8\""} packaging = ">20.9" -pathspec = ">=0.9,<0.11" +pathspec = ">=0.9,<0.12" +protobuf = ">=4.0.0" pytz = ">=2015.7" pyyaml = ">=6.0" requests = "<3.0.0" -sqlparse = ">=0.2.3,<0.5" +sqlparse = ">=0.2.3,<0.4.4" typing-extensions = ">=3.7.4" +urllib3 = ">=1.0,<2.0" werkzeug = ">=1,<3" [[package]] @@ -1340,14 +1323,14 @@ grpc = ["grpcio (>=1.38.0,<2.0dev)"] [[package]] name = "google-cloud-dataproc" -version = "5.4.0" +version = "5.4.1" description = "Google Cloud Dataproc API client library" category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "google-cloud-dataproc-5.4.0.tar.gz", hash = "sha256:a0c5fe5cc8259a0bb6b1f4445714aafc4f5fa047f8f54cc7133914ba97856f5e"}, - {file = "google_cloud_dataproc-5.4.0-py2.py3-none-any.whl", hash = "sha256:6c0e8e80189ab7cf1a792e82269efd63bb80fbeaa19276847f96f53df5a13ee8"}, + {file = "google-cloud-dataproc-5.4.1.tar.gz", hash = "sha256:1896e14f63c121a3f1e2c221287cc7fd00650d2a73f8b38f82040ae6d5aab7bf"}, + {file = "google_cloud_dataproc-5.4.1-py2.py3-none-any.whl", hash = "sha256:dfc6db122b38330779882652b10aa85ace45547433ca4af020c4c262660a4074"}, ] [package.dependencies] @@ -1589,40 +1572,6 @@ googleapis-common-protos = ">=1.5.5" grpcio = ">=1.51.3" protobuf = ">=4.21.6" -[[package]] -name = "grpclib" -version = "0.4.3" -description = "Pure-Python gRPC implementation for asyncio" -category = "main" -optional = false -python-versions = ">=3.7" -files = [ - {file = "grpclib-0.4.3.tar.gz", hash = "sha256:eadf2002fc5a25158b707c0338a6c0b96dd7fbdc6df66f7e515e7f041d56a940"}, -] - -[package.dependencies] -h2 = ">=3.1.0,<5" -multidict = "*" - -[package.extras] -protobuf = ["protobuf (>=3.15.0)"] - -[[package]] -name = "h2" -version = "4.1.0" -description = "HTTP/2 State-Machine based protocol implementation" -category = "main" -optional = false -python-versions = ">=3.6.1" -files = [ - {file = "h2-4.1.0-py3-none-any.whl", hash = "sha256:03a46bcf682256c95b5fd9e9a99c1323584c3eec6440d379b9903d709476bc6d"}, - {file = "h2-4.1.0.tar.gz", hash = "sha256:a83aca08fbe7aacb79fec788c9c0bac936343560ed9ec18b82a13a12c28d2abb"}, -] - -[package.dependencies] -hpack = ">=4.0,<5" -hyperframe = ">=6.0,<7" - [[package]] name = "hologram" version = "0.0.15" @@ -1639,18 +1588,6 @@ files = [ jsonschema = ">=3.0,<4.0" python-dateutil = ">=2.8,<2.9" -[[package]] -name = "hpack" -version = "4.0.0" -description = "Pure-Python HPACK header compression" -category = "main" -optional = false -python-versions = ">=3.6.1" -files = [ - {file = "hpack-4.0.0-py3-none-any.whl", hash = "sha256:84a076fad3dc9a9f8063ccb8041ef100867b1878b25ef0ee63847a5d53818a6c"}, - {file = "hpack-4.0.0.tar.gz", hash = "sha256:fc41de0c63e687ebffde81187a948221294896f6bdc0ae2312708df339430095"}, -] - [[package]] name = "humanize" version = "4.6.0" @@ -1666,18 +1603,6 @@ files = [ [package.extras] tests = ["freezegun", "pytest", "pytest-cov"] -[[package]] -name = "hyperframe" -version = "6.0.1" -description = "HTTP/2 framing layer for Python" -category = "main" -optional = false -python-versions = ">=3.6.1" -files = [ - {file = "hyperframe-6.0.1-py3-none-any.whl", hash = "sha256:0ec6bafd80d8ad2195c4f03aacba3a8265e57bc4cff261e802bf39970ed02a15"}, - {file = "hyperframe-6.0.1.tar.gz", hash = "sha256:ae510046231dc8e9ecb1a6586f63d2347bf4c8905914aa84ba585ae85f28a914"}, -] - [[package]] name = "idna" version = "3.4" @@ -2131,14 +2056,14 @@ files = [ [[package]] name = "mashumaro" -version = "3.3.1" -description = "Fast serialization framework on top of dataclasses" +version = "3.6" +description = "Fast serialization library on top of dataclasses" category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "mashumaro-3.3.1-py3-none-any.whl", hash = "sha256:74ae7704e4ac8813ff701909aa8d96a405156dc2e1e3fd34ac07db7f0823a54a"}, - {file = "mashumaro-3.3.1.tar.gz", hash = "sha256:997ed0a4ce64967b96ff65f5ca76b8e5e459a4ec7a6a0f73625a067004a801c9"}, + {file = "mashumaro-3.6-py3-none-any.whl", hash = "sha256:77403e3e2ecd0a7d0e22d472c08e33282460e48726eabe356c5163efbdf9c7ee"}, + {file = "mashumaro-3.6.tar.gz", hash = "sha256:ceb3de53029219bbbb0385ca600b59348dcd14e0c68523986c6d51889ad338f5"}, ] [package.dependencies] @@ -2434,46 +2359,42 @@ yaml = ["PyYAML (>=5.1.0)"] [[package]] name = "mypy" -version = "0.991" +version = "1.2.0" description = "Optional static typing for Python" category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "mypy-0.991-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7d17e0a9707d0772f4a7b878f04b4fd11f6f5bcb9b3813975a9b13c9332153ab"}, - {file = "mypy-0.991-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0714258640194d75677e86c786e80ccf294972cc76885d3ebbb560f11db0003d"}, - {file = "mypy-0.991-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0c8f3be99e8a8bd403caa8c03be619544bc2c77a7093685dcf308c6b109426c6"}, - {file = "mypy-0.991-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc9ec663ed6c8f15f4ae9d3c04c989b744436c16d26580eaa760ae9dd5d662eb"}, - {file = "mypy-0.991-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4307270436fd7694b41f913eb09210faff27ea4979ecbcd849e57d2da2f65305"}, - {file = "mypy-0.991-cp310-cp310-win_amd64.whl", hash = "sha256:901c2c269c616e6cb0998b33d4adbb4a6af0ac4ce5cd078afd7bc95830e62c1c"}, - {file = "mypy-0.991-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:d13674f3fb73805ba0c45eb6c0c3053d218aa1f7abead6e446d474529aafc372"}, - {file = "mypy-0.991-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1c8cd4fb70e8584ca1ed5805cbc7c017a3d1a29fb450621089ffed3e99d1857f"}, - {file = "mypy-0.991-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:209ee89fbb0deed518605edddd234af80506aec932ad28d73c08f1400ef80a33"}, - {file = "mypy-0.991-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:37bd02ebf9d10e05b00d71302d2c2e6ca333e6c2a8584a98c00e038db8121f05"}, - {file = "mypy-0.991-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:26efb2fcc6b67e4d5a55561f39176821d2adf88f2745ddc72751b7890f3194ad"}, - {file = "mypy-0.991-cp311-cp311-win_amd64.whl", hash = "sha256:3a700330b567114b673cf8ee7388e949f843b356a73b5ab22dd7cff4742a5297"}, - {file = "mypy-0.991-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:1f7d1a520373e2272b10796c3ff721ea1a0712288cafaa95931e66aa15798813"}, - {file = "mypy-0.991-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:641411733b127c3e0dab94c45af15fea99e4468f99ac88b39efb1ad677da5711"}, - {file = "mypy-0.991-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:3d80e36b7d7a9259b740be6d8d906221789b0d836201af4234093cae89ced0cd"}, - {file = "mypy-0.991-cp37-cp37m-win_amd64.whl", hash = "sha256:e62ebaad93be3ad1a828a11e90f0e76f15449371ffeecca4a0a0b9adc99abcef"}, - {file = "mypy-0.991-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:b86ce2c1866a748c0f6faca5232059f881cda6dda2a893b9a8373353cfe3715a"}, - {file = "mypy-0.991-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ac6e503823143464538efda0e8e356d871557ef60ccd38f8824a4257acc18d93"}, - {file = "mypy-0.991-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0cca5adf694af539aeaa6ac633a7afe9bbd760df9d31be55ab780b77ab5ae8bf"}, - {file = "mypy-0.991-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a12c56bf73cdab116df96e4ff39610b92a348cc99a1307e1da3c3768bbb5b135"}, - {file = "mypy-0.991-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:652b651d42f155033a1967739788c436491b577b6a44e4c39fb340d0ee7f0d70"}, - {file = "mypy-0.991-cp38-cp38-win_amd64.whl", hash = "sha256:4175593dc25d9da12f7de8de873a33f9b2b8bdb4e827a7cae952e5b1a342e243"}, - {file = "mypy-0.991-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:98e781cd35c0acf33eb0295e8b9c55cdbef64fcb35f6d3aa2186f289bed6e80d"}, - {file = "mypy-0.991-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6d7464bac72a85cb3491c7e92b5b62f3dcccb8af26826257760a552a5e244aa5"}, - {file = "mypy-0.991-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c9166b3f81a10cdf9b49f2d594b21b31adadb3d5e9db9b834866c3258b695be3"}, - {file = "mypy-0.991-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8472f736a5bfb159a5e36740847808f6f5b659960115ff29c7cecec1741c648"}, - {file = "mypy-0.991-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5e80e758243b97b618cdf22004beb09e8a2de1af481382e4d84bc52152d1c476"}, - {file = "mypy-0.991-cp39-cp39-win_amd64.whl", hash = "sha256:74e259b5c19f70d35fcc1ad3d56499065c601dfe94ff67ae48b85596b9ec1461"}, - {file = "mypy-0.991-py3-none-any.whl", hash = "sha256:de32edc9b0a7e67c2775e574cb061a537660e51210fbf6006b0b36ea695ae9bb"}, - {file = "mypy-0.991.tar.gz", hash = "sha256:3c0165ba8f354a6d9881809ef29f1a9318a236a6d81c690094c5df32107bde06"}, + {file = "mypy-1.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:701189408b460a2ff42b984e6bd45c3f41f0ac9f5f58b8873bbedc511900086d"}, + {file = "mypy-1.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fe91be1c51c90e2afe6827601ca14353bbf3953f343c2129fa1e247d55fd95ba"}, + {file = "mypy-1.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d26b513225ffd3eacece727f4387bdce6469192ef029ca9dd469940158bc89e"}, + {file = "mypy-1.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:3a2d219775a120581a0ae8ca392b31f238d452729adbcb6892fa89688cb8306a"}, + {file = "mypy-1.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:2e93a8a553e0394b26c4ca683923b85a69f7ccdc0139e6acd1354cc884fe0128"}, + {file = "mypy-1.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3efde4af6f2d3ccf58ae825495dbb8d74abd6d176ee686ce2ab19bd025273f41"}, + {file = "mypy-1.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:695c45cea7e8abb6f088a34a6034b1d273122e5530aeebb9c09626cea6dca4cb"}, + {file = "mypy-1.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d0e9464a0af6715852267bf29c9553e4555b61f5904a4fc538547a4d67617937"}, + {file = "mypy-1.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8293a216e902ac12779eb7a08f2bc39ec6c878d7c6025aa59464e0c4c16f7eb9"}, + {file = "mypy-1.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:f46af8d162f3d470d8ffc997aaf7a269996d205f9d746124a179d3abe05ac602"}, + {file = "mypy-1.2.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:031fc69c9a7e12bcc5660b74122ed84b3f1c505e762cc4296884096c6d8ee140"}, + {file = "mypy-1.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:390bc685ec209ada4e9d35068ac6988c60160b2b703072d2850457b62499e336"}, + {file = "mypy-1.2.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:4b41412df69ec06ab141808d12e0bf2823717b1c363bd77b4c0820feaa37249e"}, + {file = "mypy-1.2.0-cp37-cp37m-win_amd64.whl", hash = "sha256:4e4a682b3f2489d218751981639cffc4e281d548f9d517addfd5a2917ac78119"}, + {file = "mypy-1.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a197ad3a774f8e74f21e428f0de7f60ad26a8d23437b69638aac2764d1e06a6a"}, + {file = "mypy-1.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c9a084bce1061e55cdc0493a2ad890375af359c766b8ac311ac8120d3a472950"}, + {file = "mypy-1.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eaeaa0888b7f3ccb7bcd40b50497ca30923dba14f385bde4af78fac713d6d6f6"}, + {file = "mypy-1.2.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:bea55fc25b96c53affab852ad94bf111a3083bc1d8b0c76a61dd101d8a388cf5"}, + {file = "mypy-1.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:4c8d8c6b80aa4a1689f2a179d31d86ae1367ea4a12855cc13aa3ba24bb36b2d8"}, + {file = "mypy-1.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:70894c5345bea98321a2fe84df35f43ee7bb0feec117a71420c60459fc3e1eed"}, + {file = "mypy-1.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4a99fe1768925e4a139aace8f3fb66db3576ee1c30b9c0f70f744ead7e329c9f"}, + {file = "mypy-1.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:023fe9e618182ca6317ae89833ba422c411469156b690fde6a315ad10695a521"}, + {file = "mypy-1.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4d19f1a239d59f10fdc31263d48b7937c585810288376671eaf75380b074f238"}, + {file = "mypy-1.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:2de7babe398cb7a85ac7f1fd5c42f396c215ab3eff731b4d761d68d0f6a80f48"}, + {file = "mypy-1.2.0-py3-none-any.whl", hash = "sha256:d8e9187bfcd5ffedbe87403195e1fc340189a68463903c39e2b63307c9fa0394"}, + {file = "mypy-1.2.0.tar.gz", hash = "sha256:f70a40410d774ae23fcb4afbbeca652905a04de7948eaf0b1789c8d1426b72d1"}, ] [package.dependencies] -mypy-extensions = ">=0.4.3" +mypy-extensions = ">=1.0.0" tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} typing-extensions = ">=3.10" @@ -4043,21 +3964,16 @@ sqlfluff = "2.0.2" [[package]] name = "sqlparse" -version = "0.4.4" +version = "0.4.3" description = "A non-validating SQL parser." category = "main" optional = false python-versions = ">=3.5" files = [ - {file = "sqlparse-0.4.4-py3-none-any.whl", hash = "sha256:5430a4fe2ac7d0f93e66f1efc6e1338a41884b7ddf2a350cedd20ccc4d9d28f3"}, - {file = "sqlparse-0.4.4.tar.gz", hash = "sha256:d446183e84b8349fa3061f0fe7f06ca94ba65b426946ffebe6e3e8295332420c"}, + {file = "sqlparse-0.4.3-py3-none-any.whl", hash = "sha256:0323c0ec29cd52bceabc1b4d9d579e311f3e4961b98d174201d5622a23b85e34"}, + {file = "sqlparse-0.4.3.tar.gz", hash = "sha256:69ca804846bb114d2ec380e4360a8a340db83f0ccf3afceeb1404df028f57268"}, ] -[package.extras] -dev = ["build", "flake8"] -doc = ["sphinx"] -test = ["pytest", "pytest-cov"] - [[package]] name = "stack-data" version = "0.6.2" @@ -4078,17 +3994,6 @@ pure-eval = "*" [package.extras] tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"] -[[package]] -name = "stringcase" -version = "1.2.0" -description = "String case converter." -category = "main" -optional = false -python-versions = "*" -files = [ - {file = "stringcase-1.2.0.tar.gz", hash = "sha256:48a06980661908efe8d9d34eab2b6c13aefa2163b3ced26972902e3bdfd87008"}, -] - [[package]] name = "tblib" version = "1.7.0" @@ -4470,4 +4375,4 @@ testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more [metadata] lock-version = "2.0" python-versions = "~3.9" -content-hash = "36177951daad9a87cde34c0ee0eebd2ee5efe59b59c477b2d170246257e7ad8b" +content-hash = "39aa47475f90b7a0bc4122fa83871b01f9a33e71a13b1b64f168ced023e21dea" diff --git a/warehouse/pyproject.toml b/warehouse/pyproject.toml index 937cb4fdfd..5348379b61 100644 --- a/warehouse/pyproject.toml +++ b/warehouse/pyproject.toml @@ -29,12 +29,11 @@ networkx = {version = "<3", extras = ["default"]} # export CFLAGS="-I $(brew --prefix graphviz)/include" # export LDFLAGS="-L $(brew --prefix graphviz)/lib" pygraphviz = "^1.10" -dbt-bigquery = "^1.4.3" palettable = "^3.3.0" +dbt-bigquery = "1.6.0b1" [tool.poetry.group.dev.dependencies] black = "^22.12.0" -mypy = "^0.991" isort = "^5.11.4" types-tqdm = "^4.64.7" types-requests = "^2.28.11" @@ -46,10 +45,8 @@ datamodel-code-generator = "^0.17.1" sqlfluff = "^2.0.2" sqlfluff-templater-dbt = "^2.0.2" ipdb = "^0.13.13" +mypy = "^1.2.0" [build-system] requires = ["poetry-core>=1.0.0"] build-backend = "poetry.core.masonry.api" - -[tool.mypy] -plugins = "sqlmypy" diff --git a/warehouse/scripts/dbt_artifacts.py b/warehouse/scripts/dbt_artifacts/__init__.py similarity index 54% rename from warehouse/scripts/dbt_artifacts.py rename to warehouse/scripts/dbt_artifacts/__init__.py index 0dd2c7d04e..40f39d9683 100644 --- a/warehouse/scripts/dbt_artifacts.py +++ b/warehouse/scripts/dbt_artifacts/__init__.py @@ -2,23 +2,37 @@ Built off the starting point of https://guitton.co/posts/dbt-artifacts """ import abc -import json import os -from datetime import datetime from enum import Enum -from pathlib import Path from typing import Annotated, Any, ClassVar, Dict, List, Literal, Optional, Union import humanize import pendulum -import yaml -from catalog import Catalog, CatalogTable from palettable.scientific.sequential import LaJolla_6 # type: ignore -from pydantic import BaseModel, Field, constr, validator +from pydantic import BaseModel, Field, constr, root_validator from slugify import slugify from sqlalchemy import MetaData, Table, create_engine, select from sqlalchemy.sql import Select +from .catalog import CatalogTable +from .catalog import Model as Catalog +from .manifest import AnalysisNode as BaseAnalysisNode +from .manifest import ColumnInfo, DependsOn +from .manifest import Exposure as BaseExposure +from .manifest import GenericTestNode as BaseGenericTestNode +from .manifest import HookNode as BaseHookNode +from .manifest import Model as BaseManifest +from .manifest import ModelNode as BaseModelNode +from .manifest import RPCNode as BaseRPCNode +from .manifest import SeedNode as BaseSeedNode +from .manifest import SingularTestNode as BaseSingularTestNode +from .manifest import SnapshotNode as BaseSnapshotNode +from .manifest import SourceDefinition as BaseSourceDefinition +from .manifest import SqlNode as BaseSqlNode +from .run_results import Model as BaseRunResults +from .run_results import RunResultOutput as BaseRunResultOutput +from .sources import Model as Sources + # Taken from the calitp repo which we can't install because of deps issue def get_engine(project, max_bytes=None): @@ -33,118 +47,48 @@ def get_engine(project, max_bytes=None): ) -class FileFormat(str, Enum): - csv = "csv" - geojson = "geojson" - geojsonl = "geojsonl" - json = "json" - jsonl = "jsonl" - - -class TileFormat(str, Enum): - mbtiles = "mbtiles" - pbf = "pbf" - - -class DbtResourceType(str, Enum): - model = "model" - analysis = "analysis" - test = "test" - operation = "operation" - seed = "seed" - source = "source" +# Monkey patches -class DbtMaterializationType(str, Enum): - table = "table" - view = "view" - incremental = "incremental" - ephemeral = "ephemeral" - seed = "seed" - test = "test" +def num_bytes(self) -> Optional[int]: + if "num_bytes" in self.stats: + value = self.stats["num_bytes"].value + # for some reason, 0 gets parsed as bool by pydantic + # maybe because it's first in the union? + if isinstance(value, bool): + return 0 + assert isinstance(value, (float, str)) + return int(float(value)) + return None -class NodeDeps(BaseModel): - macros: List[str] - nodes: Optional[List[str]] # does not exist on seeds - - @property - def resolved_nodes(self) -> List["BaseNode"]: - return [BaseNode._instances[node] for node in self.nodes] if self.nodes else [] - - -class NodeConfig(BaseModel): - alias: Optional[str] - schema_: str = Field(None, alias="schema") - materialized: Optional[DbtMaterializationType] - - -class Column(BaseModel): - name: str - description: Optional[str] - meta: Dict[str, Any] = {} - parent: Optional[ - "BaseNode" - ] = None # this is set after the fact; it's Optional to make mypy happy - - @property - def publish(self) -> bool: - return not self.meta.get("publish.ignore", False) +CatalogTable.num_bytes = property(num_bytes) # type: ignore[attr-defined] - @property - def tests(self) -> List[str]: - # this has a lot of stuff to make mypy happy - return [ - node.name - for name, node in BaseNode._instances.items() - if node.resource_type == DbtResourceType.test - and isinstance(node, Test) - and node.depends_on - and node.depends_on.nodes is not None - and self.parent - and self.parent.unique_id in node.depends_on.nodes - and node.test_metadata - and self.name == node.test_metadata.kwargs.get("column_name") - ] +DependsOn.resolved_nodes = property( # type: ignore[attr-defined] + lambda self: [NodeModelMixin._instances[node] for node in self.nodes] + if self.nodes + else [] +) +ColumnInfo.publish = property(lambda self: not self.meta.get("publish.ignore", False)) # type: ignore[attr-defined] - def docblock(self, prefix="") -> str: - return f""" -{{% docs {prefix}{self.name} %}} -{self.description} -{{% enddocs %}} -""" +# End monkey patches - def yaml(self, include_description=True, extras={}) -> str: - include = { - "name", - } - if include_description: - include.add("description") - return yaml.dump([{**self.dict(include=include), **extras}], sort_keys=False) +class NodeModelMixin(BaseModel): + _instances: ClassVar[Dict[str, "NodeModelMixin"]] = {} + catalog_entry: Optional[CatalogTable] -class BaseNode(BaseModel): - _instances: ClassVar[Dict[str, "BaseNode"]] = {} + # TODO: can we avoid re-defining these here? unique_id: str fqn: List[str] - path: Path - original_file_path: Path - database: str - schema_: str = Field(None, alias="schema") name: str - resource_type: DbtResourceType - description: str - depends_on: Optional[NodeDeps] - config: NodeConfig - columns: Dict[str, Column] - meta: Dict = {} - catalog_entry: Optional[CatalogTable] + schema_: str + database: Optional[str] + columns: Optional[Dict[str, ColumnInfo]] = {} def __init__(self, **kwargs): - super(BaseNode, self).__init__(**kwargs) + super(NodeModelMixin, self).__init__(**kwargs) self._instances[self.unique_id] = self - for column in self.columns.values(): - column.parent = self @property def strfqn(self) -> str: @@ -152,7 +96,7 @@ def strfqn(self) -> str: @property def table_name(self): - return self.config.alias or self.name + return self.config.alias or self.name # type: ignore[attr-defined] @property def schema_table(self): @@ -167,7 +111,7 @@ def select(self) -> Select: columns = [ c for c in self.sqlalchemy_table(engine).columns - if c.name not in self.columns or self.columns[c.name].publish + if not self.columns or c.name not in self.columns or self.columns[c.name].publish # type: ignore[attr-defined] ] return select(columns=columns) @@ -178,7 +122,7 @@ def gvrepr(self) -> str: """ return "\n".join( [ - self.config.materialized or self.resource_type.value, + self.config.materialized or self.resource_type.value, # type: ignore[attr-defined] self.name, ] ) @@ -193,119 +137,118 @@ def gvattrs(self) -> Dict[str, Any]: } -class Seed(BaseNode): - resource_type: Literal[DbtResourceType.seed] - - @property - def gvattrs(self) -> Dict[str, Any]: - return { - "fillcolor": "green", - } - +class AnalysisNode(BaseAnalysisNode, NodeModelMixin): + pass -class Source(BaseNode): - resource_type: Literal[DbtResourceType.source] - @property - def gvattrs(self) -> Dict[str, Any]: - return { - "fillcolor": "blue", - } +class SingularTestNode(BaseSingularTestNode, NodeModelMixin): + pass -# TODO: this should be a discriminated type based on materialization -class Model(BaseNode): - resource_type: Literal[DbtResourceType.model] - depends_on: NodeDeps +class HookNode(BaseHookNode, NodeModelMixin): + pass - @property - def children(self) -> List["Model"]: - children = [] - for unique_id, node in BaseNode._instances.items(): - if ( - isinstance(node, Model) - and node.depends_on.nodes - and self.unique_id in node.depends_on.nodes - ): - children.append(node) - return children +class ModelNode(BaseModelNode, NodeModelMixin): @property def gvrepr(self) -> str: if ( - self.config.materialized - in (DbtMaterializationType.table, DbtMaterializationType.incremental) + self.config + and self.config.materialized in ("table", "incremental") and self.catalog_entry - and self.catalog_entry.num_bytes + and self.catalog_entry.num_bytes # type: ignore[attr-defined] ): return "\n".join( [ - super(Model, self).gvrepr, - f"Storage: {humanize.naturalsize(self.catalog_entry.num_bytes)}", + super(ModelNode, self).gvrepr, + f"Storage: {humanize.naturalsize(self.catalog_entry.num_bytes)}", # type: ignore[attr-defined] ] ) - return super(Model, self).gvrepr + return super(ModelNode, self).gvrepr @property def gvattrs(self) -> Dict[str, Any]: fillcolor = "white" - if self.config.materialized in ( - DbtMaterializationType.table, - DbtMaterializationType.incremental, - ): + if self.config and self.config.materialized in ("table", "incremental"): fillcolor = "aquamarine" if ( self.catalog_entry - and self.catalog_entry.num_bytes - and self.catalog_entry.num_bytes > 100_000_000_000 + and self.catalog_entry.num_bytes # type: ignore[attr-defined] + and self.catalog_entry.num_bytes > 100_000_000_000 # type: ignore[attr-defined] and "clustering_fields" not in self.catalog_entry.stats and "partitioning_type" not in self.catalog_entry.stats ): fillcolor = "red" - if ( - self.config.materialized == DbtMaterializationType.view - and len(self.children) > 1 - ): - fillcolor = "pink" + # TODO: bring me back + # if self.config.materialized == "view" and len(self.children) > 1: + # fillcolor = "pink" return { "fillcolor": fillcolor, } -class TestMetadata(BaseModel): - name: str - kwargs: Dict[str, Union[str, List, Dict]] +class RPCNode(BaseRPCNode, NodeModelMixin): + pass + + +class SqlNode(BaseSqlNode, NodeModelMixin): + pass + +class GenericTestNode(BaseGenericTestNode, NodeModelMixin): + pass -class Test(BaseNode): - resource_type: Literal[DbtResourceType.test] - # test_metadata is optional because singular tests (custom defined) do not have test_metadata attribute - # for example: https://github.com/dbt-labs/dbt-docs/blob/main/src/app/services/graph.service.js#L355 - # ^ singular test is specifically identified by not having the test_metadata attribute - test_metadata: Optional[TestMetadata] + +class SnapshotNode(BaseSnapshotNode, NodeModelMixin): + pass + + +class SeedNode(BaseSeedNode, NodeModelMixin): + @property + def gvattrs(self): + return { + "fillcolor": "green", + } -Node = Annotated[ - Union[Seed, Source, Model, Test], - Field(discriminator="resource_type"), +DbtNode = Union[ + AnalysisNode, + SingularTestNode, + HookNode, + ModelNode, + RPCNode, + SqlNode, + GenericTestNode, + SnapshotNode, + SeedNode, ] -class ExposureType(str, Enum): - dashboard = "dashboard" - notebook = "notebook" - analysis = "analysis" - ml = "ml" - application = "application" +class SourceDefinition(BaseSourceDefinition, NodeModelMixin): + @property + def gvattrs(self) -> Dict: + return { + "fillcolor": "blue", + } + pass -class Owner(BaseModel): - name: Optional[str] - email: str + +class FileFormat(str, Enum): + csv = "csv" + geojson = "geojson" + geojsonl = "geojsonl" + json = "json" + jsonl = "jsonl" + + +class TileFormat(str, Enum): + mbtiles = "mbtiles" + pbf = "pbf" class BaseDestination(BaseModel, abc.ABC): @@ -399,42 +342,35 @@ class ExposureMeta(BaseModel): destinations: List[Destination] = [] -class Exposure(BaseModel): - fqn: List[str] - unique_id: str - package_name: str - path: Path - name: str - description: str - type: ExposureType - url: Optional[str] +class Exposure(BaseExposure): # TODO: we should validate that model names do not conflict with # file format names since they are used as entity names in hive partitions - depends_on: NodeDeps meta: Optional[ExposureMeta] - @validator("meta") - def must_provide_layer_names_if_tiles(cls, v, values): - if v: - for dest in v.destinations: + @root_validator + def must_provide_layer_names_if_tiles(cls, values): + if values["meta"]: + for dest in values["meta"].destinations: if isinstance(dest, TilesDestination): assert len(dest.layer_names) == len( values["depends_on"].nodes ), "must provide one layer name per depends_on" - return v + return values -class Manifest(BaseModel): - nodes: Dict[str, Node] - sources: Dict[str, Source] - metrics: Dict - exposures: Dict[str, Exposure] - macros: Dict - docs: Dict - parent_map: Dict[str, List[str]] - child_map: Dict[str, List[str]] - selectors: Dict - disabled: Dict # should be Dict[str, Node] but they lack the resource_type +class Manifest(BaseManifest): + nodes: Dict[ + str, + DbtNode, + ] = Field( + ..., description="The nodes defined in the dbt project and its dependencies" + ) + exposures: Dict[str, Exposure] = Field( + ..., description="The exposures defined in the dbt project and its dependencies" + ) + sources: Dict[str, SourceDefinition] = Field( + ..., description="The sources defined in the dbt project and its dependencies" + ) # https://github.com/pydantic/pydantic/issues/1577#issuecomment-803171322 def set_catalog(self, c: Catalog): @@ -444,36 +380,23 @@ def set_catalog(self, c: Catalog): ) -class TimingInfo(BaseModel): - name: str - started_at: Optional[datetime] - completed_at: Optional[datetime] - - -# TODO: it'd be nice to be able to distinguish between models, tests, and freshness checks -class RunResultStatus(str, Enum): - _pass = "pass" +class RunResultStatus(Enum): success = "success" error = "error" + skipped = "skipped" + pass_ = "pass" fail = "fail" warn = "warn" - skipped = "skipped" runtime_error = "runtime error" -class RunResult(BaseModel): +class RunResultOutput(BaseRunResultOutput): status: RunResultStatus - timing: List[TimingInfo] - thread_id: str - execution_time: int # seconds - adapter_response: Dict - message: Optional[str] - failures: Optional[int] - unique_id: str manifest: Optional[Manifest] + # TODO: bring me back @property - def node(self) -> Node: + def node(self) -> DbtNode: if not self.manifest: raise ValueError("must set manifest before calling node") return self.manifest.nodes[self.unique_id] @@ -493,6 +416,7 @@ def gvattrs(self) -> Dict[str, Any]: """ Returns a string representation intended for graphviz labels """ + assert self.node is not None # TODO: do an actual linear transform on this # the top colors are too dark to use as a background white, yellow, orange, red, _, _ = LaJolla_6.hex_colors @@ -517,9 +441,8 @@ def gvattrs(self) -> Dict[str, Any]: } -class RunResults(BaseModel): - metadata: Dict - results: List[RunResult] +class RunResults(BaseRunResults): + results: List[RunResultOutput] # type: ignore[assignment] manifest: Optional[Manifest] # https://github.com/pydantic/pydantic/issues/1577#issuecomment-803171322 @@ -529,14 +452,19 @@ def set_manifest(self, m: Manifest): result.manifest = m -# mainly just to test that these models work -if __name__ == "__main__": - paths = [ - ("./target/manifest.json", Manifest), - ("./target/run_results.json", RunResults), - ] - - for path, model in paths: - with open(path) as f: - model(**json.load(f)) - print(f"{path} is a valid {model.__name__}!", flush=True) +__all__ = [ + "Catalog", + "DbtNode", + "GenericTestNode", + "Manifest", + "RunResultOutput", + "RunResults", + "SeedNode", + "SourceDefinition", + "Sources", + "Exposure", + "CkanDestination", + "NodeModelMixin", + "TilesDestination", + "TileFormat", +] diff --git a/warehouse/scripts/dbt_artifacts/__main__.py b/warehouse/scripts/dbt_artifacts/__main__.py new file mode 100644 index 0000000000..7465547d7d --- /dev/null +++ b/warehouse/scripts/dbt_artifacts/__main__.py @@ -0,0 +1,15 @@ +# mainly just to test that these models work +import json + +from . import Catalog, Manifest, RunResults + +with open("./target/manifest.json") as f: + manifest = Manifest(**json.load(f)) + +with open("./target/run_results.json") as f: + run_results = RunResults(**json.load(f)) + +with open("./target/catalog.json") as f: + catalog = Catalog(**json.load(f)) + +manifest.set_catalog(c=catalog) diff --git a/warehouse/scripts/catalog.py b/warehouse/scripts/dbt_artifacts/catalog.py similarity index 66% rename from warehouse/scripts/catalog.py rename to warehouse/scripts/dbt_artifacts/catalog.py index d09c579480..3d17ca0017 100644 --- a/warehouse/scripts/catalog.py +++ b/warehouse/scripts/dbt_artifacts/catalog.py @@ -1,6 +1,6 @@ # generated by datamodel-codegen: -# filename: v1.json -# timestamp: 2023-03-06T19:42:51+00:00 +# filename: https://schemas.getdbt.com/dbt/catalog/v1.json +# timestamp: 2023-05-15T18:21:32+00:00 from __future__ import annotations @@ -12,18 +12,18 @@ class CatalogMetadata(BaseModel): class Config: - extra = Extra.forbid + extra = Extra.allow dbt_schema_version: Optional[str] = "https://schemas.getdbt.com/dbt/catalog/v1.json" dbt_version: Optional[str] = "0.20.0rc1" - generated_at: Optional[datetime] = "2021-06-07T14:49:01.098234Z" # type: ignore + generated_at: Optional[datetime] = "2021-06-07T14:49:01.098234Z" invocation_id: Optional[str] = None env: Optional[Dict[str, str]] = {} class TableMetadata(BaseModel): class Config: - extra = Extra.forbid + extra = Extra.allow type: str schema_: str = Field(..., alias="schema") @@ -35,7 +35,7 @@ class Config: class ColumnMetadata(BaseModel): class Config: - extra = Extra.forbid + extra = Extra.allow type: str index: int @@ -45,7 +45,7 @@ class Config: class StatsItem(BaseModel): class Config: - extra = Extra.forbid + extra = Extra.allow id: str label: str @@ -56,29 +56,17 @@ class Config: class CatalogTable(BaseModel): class Config: - extra = Extra.forbid + extra = Extra.allow metadata: TableMetadata columns: Dict[str, ColumnMetadata] stats: Dict[str, StatsItem] unique_id: Optional[str] = None - @property - def num_bytes(self) -> Optional[int]: - if "num_bytes" in self.stats: - value = self.stats["num_bytes"].value - # for some reason, 0 gets parsed as bool by pydantic - # maybe because it's first in the union? - if isinstance(value, bool): - return 0 - assert isinstance(value, (float, str)) - return int(float(value)) - return None - -class Catalog(BaseModel): +class Model(BaseModel): class Config: - extra = Extra.forbid + extra = Extra.allow metadata: CatalogMetadata nodes: Dict[str, CatalogTable] diff --git a/warehouse/scripts/dbt_artifacts/gen_artifacts.sh b/warehouse/scripts/dbt_artifacts/gen_artifacts.sh new file mode 100755 index 0000000000..633905a663 --- /dev/null +++ b/warehouse/scripts/dbt_artifacts/gen_artifacts.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +poetry run datamodel-codegen --url https://schemas.getdbt.com/dbt/catalog/v1.json --output "$SCRIPT_DIR"/catalog.py --field-constraints --use-double-quotes --allow-extra-fields +poetry run datamodel-codegen --url https://schemas.getdbt.com/dbt/manifest/v9.json --output "$SCRIPT_DIR"/manifest.py --field-constraints --use-double-quotes --allow-extra-fields +poetry run datamodel-codegen --url https://schemas.getdbt.com/dbt/run-results/v4.json --output "$SCRIPT_DIR"/run_results.py --field-constraints --use-double-quotes --allow-extra-fields +poetry run datamodel-codegen --url https://schemas.getdbt.com/dbt/sources/v3.json --output "$SCRIPT_DIR"/sources.py --field-constraints --use-double-quotes --allow-extra-fields diff --git a/warehouse/scripts/dbt_artifacts/manifest.py b/warehouse/scripts/dbt_artifacts/manifest.py new file mode 100644 index 0000000000..4b3a2f7959 --- /dev/null +++ b/warehouse/scripts/dbt_artifacts/manifest.py @@ -0,0 +1,1441 @@ +# generated by datamodel-codegen: +# filename: https://schemas.getdbt.com/dbt/manifest/v9.json +# timestamp: 2023-05-15T18:21:34+00:00 + +from __future__ import annotations + +from datetime import datetime +from enum import Enum +from typing import Any, Dict, List, Optional, Union + +from pydantic import BaseModel, Extra, Field + + +class UserIdItem(BaseModel): + class Config: + extra = Extra.allow + + __root__: str = Field( + ..., regex="[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}" + ) + + +class ManifestMetadata(BaseModel): + class Config: + extra = Extra.allow + + dbt_schema_version: Optional[ + str + ] = "https://schemas.getdbt.com/dbt/manifest/v9.json" + dbt_version: Optional[str] = "1.6.0a1" + generated_at: Optional[datetime] = "2023-04-21T11:09:06.496436Z" + invocation_id: Optional[str] = "c4b245be-8edb-4ad7-ba54-9337ce594f5d" + env: Optional[Dict[str, str]] = {} + project_id: Optional[str] = Field( + None, description="A unique identifier for the project" + ) + user_id: Optional[UserIdItem] = Field( + None, description="A unique identifier for the user" + ) + send_anonymous_usage_stats: Optional[bool] = Field( + None, description="Whether dbt is configured to send anonymous usage statistics" + ) + adapter_type: Optional[str] = Field( + None, description="The type name of the adapter" + ) + + +class ResourceType(Enum): + analysis = "analysis" + + +class FileHash(BaseModel): + class Config: + extra = Extra.allow + + name: str + checksum: str + + +class Hook(BaseModel): + class Config: + extra = Extra.allow + + sql: str + transaction: Optional[bool] = True + index: Optional[int] = None + + +class Docs(BaseModel): + class Config: + extra = Extra.allow + + show: Optional[bool] = True + node_color: Optional[str] = None + + +class ContractConfig(BaseModel): + class Config: + extra = Extra.allow + + enforced: Optional[bool] = False + + +class Type(Enum): + check = "check" + not_null = "not_null" + unique = "unique" + primary_key = "primary_key" + foreign_key = "foreign_key" + custom = "custom" + + +class ColumnLevelConstraint(BaseModel): + class Config: + extra = Extra.allow + + type: Type + name: Optional[str] = None + expression: Optional[str] = None + warn_unenforced: Optional[bool] = True + warn_unsupported: Optional[bool] = True + + +class RefArgs(BaseModel): + class Config: + extra = Extra.allow + + name: str + package: Optional[str] = None + version: Optional[Union[str, float]] = None + + +class DependsOn(BaseModel): + class Config: + extra = Extra.allow + + macros: Optional[List[str]] = [] + nodes: Optional[List[str]] = [] + + +class InjectedCTE(BaseModel): + class Config: + extra = Extra.allow + + id: str + sql: str + + +class Contract(BaseModel): + class Config: + extra = Extra.allow + + enforced: Optional[bool] = False + checksum: Optional[str] = None + + +class ResourceType1(Enum): + test = "test" + + +class TestConfig(BaseModel): + class Config: + extra = Extra.allow + + enabled: Optional[bool] = True + alias: Optional[str] = None + schema_: Optional[str] = Field("dbt_test__audit", alias="schema") + database: Optional[str] = None + tags: Optional[Union[List[str], str]] = [] + meta: Optional[Dict[str, Any]] = {} + group: Optional[str] = None + materialized: Optional[str] = "test" + severity: Optional[str] = Field( + "ERROR", regex="^([Ww][Aa][Rr][Nn]|[Ee][Rr][Rr][Oo][Rr])$" + ) + store_failures: Optional[bool] = None + where: Optional[str] = None + limit: Optional[int] = None + fail_calc: Optional[str] = "count(*)" + warn_if: Optional[str] = "!= 0" + error_if: Optional[str] = "!= 0" + + +class ResourceType2(Enum): + operation = "operation" + + +class ResourceType3(Enum): + model = "model" + + +class Access(Enum): + protected = "protected" + private = "private" + public = "public" + + +class ModelLevelConstraint(BaseModel): + class Config: + extra = Extra.allow + + type: Type + name: Optional[str] = None + expression: Optional[str] = None + warn_unenforced: Optional[bool] = True + warn_unsupported: Optional[bool] = True + columns: Optional[List[str]] = [] + + +class ResourceType4(Enum): + rpc = "rpc" + + +class ResourceType5(Enum): + sql_operation = "sql operation" + + +class ResourceType6(Enum): + test = "test" + + +class TestMetadata(BaseModel): # type: ignore[no-redef] + class Config: + extra = Extra.allow + + name: str + kwargs: Optional[Dict[str, Any]] = {} + namespace: Optional[str] = None + + +class ResourceType7(Enum): + snapshot = "snapshot" + + +class SnapshotConfig(BaseModel): + class Config: + extra = Extra.allow + + enabled: Optional[bool] = True + alias: Optional[str] = None + schema_: Optional[str] = Field(None, alias="schema") + database: Optional[str] = None + tags: Optional[Union[List[str], str]] = [] + meta: Optional[Dict[str, Any]] = {} + group: Optional[str] = None + materialized: Optional[str] = "snapshot" + incremental_strategy: Optional[str] = None + persist_docs: Optional[Dict[str, Any]] = {} + post_hook: Optional[List[Hook]] = Field([], alias="post-hook") + pre_hook: Optional[List[Hook]] = Field([], alias="pre-hook") + quoting: Optional[Dict[str, Any]] = {} + column_types: Optional[Dict[str, Any]] = {} + full_refresh: Optional[bool] = None + unique_key: Optional[str] = None + on_schema_change: Optional[str] = "ignore" + grants: Optional[Dict[str, Any]] = {} + packages: Optional[List[str]] = [] + docs: Optional[Docs] = Field( + default_factory=lambda: Docs.parse_obj({"show": True, "node_color": None}) + ) + contract: Optional[ContractConfig] = Field( + default_factory=lambda: ContractConfig.parse_obj({"enforced": False}) + ) + strategy: Optional[str] = None + target_schema: Optional[str] = None + target_database: Optional[str] = None + updated_at: Optional[str] = None + check_cols: Optional[Union[str, List[str]]] = None + + +class ResourceType8(Enum): + seed = "seed" + + +class SeedConfig(BaseModel): + class Config: + extra = Extra.allow + + enabled: Optional[bool] = True + alias: Optional[str] = None + schema_: Optional[str] = Field(None, alias="schema") + database: Optional[str] = None + tags: Optional[Union[List[str], str]] = [] + meta: Optional[Dict[str, Any]] = {} + group: Optional[str] = None + materialized: Optional[str] = "seed" + incremental_strategy: Optional[str] = None + persist_docs: Optional[Dict[str, Any]] = {} + post_hook: Optional[List[Hook]] = Field([], alias="post-hook") + pre_hook: Optional[List[Hook]] = Field([], alias="pre-hook") + quoting: Optional[Dict[str, Any]] = {} + column_types: Optional[Dict[str, Any]] = {} + full_refresh: Optional[bool] = None + unique_key: Optional[Union[str, List[str]]] = None + on_schema_change: Optional[str] = "ignore" + grants: Optional[Dict[str, Any]] = {} + packages: Optional[List[str]] = [] + docs: Optional[Docs] = Field( + default_factory=lambda: Docs.parse_obj({"show": True, "node_color": None}) + ) + contract: Optional[ContractConfig] = Field( + default_factory=lambda: ContractConfig.parse_obj({"enforced": False}) + ) + quote_columns: Optional[bool] = None + + +class MacroDependsOn(BaseModel): + class Config: + extra = Extra.allow + + macros: Optional[List[str]] = [] + + +class ResourceType9(Enum): + source = "source" + + +class Quoting(BaseModel): + class Config: + extra = Extra.allow + + database: Optional[bool] = None + schema_: Optional[bool] = Field(None, alias="schema") + identifier: Optional[bool] = None + column: Optional[bool] = None + + +class FreshnessMetadata(BaseModel): + class Config: + extra = Extra.allow + + dbt_schema_version: Optional[str] = "https://schemas.getdbt.com/dbt/sources/v3.json" + dbt_version: Optional[str] = "1.6.0a1" + generated_at: Optional[datetime] = "2023-04-21T11:09:06.494387Z" + invocation_id: Optional[str] = "c4b245be-8edb-4ad7-ba54-9337ce594f5d" + env: Optional[Dict[str, str]] = {} + + +class Status(Enum): + runtime_error = "runtime error" + + +class SourceFreshnessRuntimeError(BaseModel): + class Config: + extra = Extra.allow + + unique_id: str + error: Optional[Union[str, int]] = None + status: Status + + +class Status1(Enum): + pass_ = "pass" + warn = "warn" + error = "error" + runtime_error = "runtime error" + + +class PeriodEnum(Enum): + minute = "minute" + hour = "hour" + day = "day" + + +class Time(BaseModel): + class Config: + extra = Extra.allow + + count: Optional[int] = None + period: Optional[PeriodEnum] = None + + +class TimingInfo(BaseModel): + class Config: + extra = Extra.allow + + name: str + started_at: Optional[datetime] = None + completed_at: Optional[datetime] = None + + +class ExternalPartition(BaseModel): + class Config: + extra = Extra.allow + + name: Optional[str] = "" + description: Optional[str] = "" + data_type: Optional[str] = "" + meta: Optional[Dict[str, Any]] = {} + + +class SourceConfig(BaseModel): + class Config: + extra = Extra.allow + + enabled: Optional[bool] = True + + +class ResourceType10(Enum): + macro = "macro" + + +class SupportedLanguage(Enum): + python = "python" + sql = "sql" + + +class MacroArgument(BaseModel): + class Config: + extra = Extra.allow + + name: str + type: Optional[str] = None + description: Optional[str] = "" + + +class ResourceType11(Enum): + doc = "doc" + + +class Documentation(BaseModel): + class Config: + extra = Extra.allow + + name: str + resource_type: ResourceType11 + package_name: str + path: str + original_file_path: str + unique_id: str + block_contents: str + + +class ResourceType12(Enum): + exposure = "exposure" + + +class Type2(Enum): + dashboard = "dashboard" + notebook = "notebook" + analysis = "analysis" + ml = "ml" + application = "application" + + +class MaturityEnum(Enum): + low = "low" + medium = "medium" + high = "high" + + +class Owner(BaseModel): + class Config: + extra = Extra.allow + + email: Optional[str] = None + name: Optional[str] = None + + +class ExposureConfig(BaseModel): + class Config: + extra = Extra.allow + + enabled: Optional[bool] = True + + +class ResourceType13(Enum): + metric = "metric" + + +class MetricFilter(BaseModel): + class Config: + extra = Extra.allow + + field: str + operator: str + value: str + + +class PeriodEnum1(Enum): + day = "day" + week = "week" + month = "month" + year = "year" + + +class MetricTime(BaseModel): + class Config: + extra = Extra.allow + + count: Optional[int] = None + period: Optional[PeriodEnum1] = None + + +class MetricConfig(BaseModel): + class Config: + extra = Extra.allow + + enabled: Optional[bool] = True + group: Optional[str] = None + + +class ResourceType14(Enum): + group = "group" + + +class Group(BaseModel): + class Config: + extra = Extra.allow + + name: str + resource_type: ResourceType14 + package_name: str + path: str + original_file_path: str + unique_id: str + owner: Owner + + +class NodeConfig(BaseModel): + class Config: + extra = Extra.allow + + enabled: Optional[bool] = True + alias: Optional[str] = None + schema_: Optional[str] = Field(None, alias="schema") + database: Optional[str] = None + tags: Optional[Union[List[str], str]] = [] + meta: Optional[Dict[str, Any]] = {} + group: Optional[str] = None + materialized: Optional[str] = "view" + incremental_strategy: Optional[str] = None + persist_docs: Optional[Dict[str, Any]] = {} + post_hook: Optional[List[Hook]] = Field([], alias="post-hook") + pre_hook: Optional[List[Hook]] = Field([], alias="pre-hook") + quoting: Optional[Dict[str, Any]] = {} + column_types: Optional[Dict[str, Any]] = {} + full_refresh: Optional[bool] = None + unique_key: Optional[Union[str, List[str]]] = None + on_schema_change: Optional[str] = "ignore" + grants: Optional[Dict[str, Any]] = {} + packages: Optional[List[str]] = [] + docs: Optional[Docs] = Field( + default_factory=lambda: Docs.parse_obj({"show": True, "node_color": None}) + ) + contract: Optional[ContractConfig] = Field( + default_factory=lambda: ContractConfig.parse_obj({"enforced": False}) + ) + + +class ColumnInfo(BaseModel): + class Config: + extra = Extra.allow + + name: str + description: Optional[str] = "" + meta: Optional[Dict[str, Any]] = {} + data_type: Optional[str] = None + constraints: Optional[List[ColumnLevelConstraint]] = [] + quote: Optional[bool] = None + tags: Optional[List[str]] = [] + + +class SingularTestNode(BaseModel): + class Config: + extra = Extra.allow + + database: Optional[str] = None + schema_: str = Field(..., alias="schema") + name: str + resource_type: ResourceType1 + package_name: str + path: str + original_file_path: str + unique_id: str + fqn: List[str] + alias: str + checksum: FileHash + config: Optional[TestConfig] = Field( + default_factory=lambda: TestConfig.parse_obj( + { + "enabled": True, + "alias": None, + "schema": "dbt_test__audit", + "database": None, + "tags": [], + "meta": {}, + "group": None, + "materialized": "test", + "severity": "ERROR", + "store_failures": None, + "where": None, + "limit": None, + "fail_calc": "count(*)", + "warn_if": "!= 0", + "error_if": "!= 0", + } + ) + ) + tags: Optional[List[str]] = [] + description: Optional[str] = "" + columns: Optional[Dict[str, ColumnInfo]] = {} + meta: Optional[Dict[str, Any]] = {} + group: Optional[str] = None + docs: Optional[Docs] = Field( + default_factory=lambda: Docs.parse_obj({"show": True, "node_color": None}) + ) + patch_path: Optional[str] = None + build_path: Optional[str] = None + deferred: Optional[bool] = False + unrendered_config: Optional[Dict[str, Any]] = {} + created_at: Optional[float] = 1682075346.499299 + config_call_dict: Optional[Dict[str, Any]] = {} + relation_name: Optional[str] = None + raw_code: Optional[str] = "" + language: Optional[str] = "sql" + refs: Optional[List[RefArgs]] = [] + sources: Optional[List[List[str]]] = [] + metrics: Optional[List[List[str]]] = [] + depends_on: Optional[DependsOn] = Field( + default_factory=lambda: DependsOn.parse_obj({"macros": [], "nodes": []}) + ) + compiled_path: Optional[str] = None + compiled: Optional[bool] = False + compiled_code: Optional[str] = None + extra_ctes_injected: Optional[bool] = False + extra_ctes: Optional[List[InjectedCTE]] = [] + contract: Optional[Contract] = Field( + default_factory=lambda: Contract.parse_obj( + {"enforced": False, "checksum": None} + ) + ) + + +class HookNode(BaseModel): + class Config: + extra = Extra.allow + + database: Optional[str] = None + schema_: str = Field(..., alias="schema") + name: str + resource_type: ResourceType2 + package_name: str + path: str + original_file_path: str + unique_id: str + fqn: List[str] + alias: str + checksum: FileHash + config: Optional[NodeConfig] = Field( + default_factory=lambda: NodeConfig.parse_obj( + { + "enabled": True, + "alias": None, + "schema": None, + "database": None, + "tags": [], + "meta": {}, + "group": None, + "materialized": "view", + "incremental_strategy": None, + "persist_docs": {}, + "quoting": {}, + "column_types": {}, + "full_refresh": None, + "unique_key": None, + "on_schema_change": "ignore", + "grants": {}, + "packages": [], + "docs": {"show": True, "node_color": None}, + "contract": {"enforced": False}, + "post-hook": [], + "pre-hook": [], + } + ) + ) + tags: Optional[List[str]] = [] + description: Optional[str] = "" + columns: Optional[Dict[str, ColumnInfo]] = {} + meta: Optional[Dict[str, Any]] = {} + group: Optional[str] = None + docs: Optional[Docs] = Field( + default_factory=lambda: Docs.parse_obj({"show": True, "node_color": None}) + ) + patch_path: Optional[str] = None + build_path: Optional[str] = None + deferred: Optional[bool] = False + unrendered_config: Optional[Dict[str, Any]] = {} + created_at: Optional[float] = 1682075346.4999008 + config_call_dict: Optional[Dict[str, Any]] = {} + relation_name: Optional[str] = None + raw_code: Optional[str] = "" + language: Optional[str] = "sql" + refs: Optional[List[RefArgs]] = [] + sources: Optional[List[List[str]]] = [] + metrics: Optional[List[List[str]]] = [] + depends_on: Optional[DependsOn] = Field( + default_factory=lambda: DependsOn.parse_obj({"macros": [], "nodes": []}) + ) + compiled_path: Optional[str] = None + compiled: Optional[bool] = False + compiled_code: Optional[str] = None + extra_ctes_injected: Optional[bool] = False + extra_ctes: Optional[List[InjectedCTE]] = [] + contract: Optional[Contract] = Field( + default_factory=lambda: Contract.parse_obj( + {"enforced": False, "checksum": None} + ) + ) + index: Optional[int] = None + + +class ModelNode(BaseModel): + class Config: + extra = Extra.allow + + database: Optional[str] = None + schema_: str = Field(..., alias="schema") + name: str + resource_type: ResourceType3 + package_name: str + path: str + original_file_path: str + unique_id: str + fqn: List[str] + alias: str + checksum: FileHash + config: Optional[NodeConfig] = Field( + default_factory=lambda: NodeConfig.parse_obj( + { + "enabled": True, + "alias": None, + "schema": None, + "database": None, + "tags": [], + "meta": {}, + "group": None, + "materialized": "view", + "incremental_strategy": None, + "persist_docs": {}, + "quoting": {}, + "column_types": {}, + "full_refresh": None, + "unique_key": None, + "on_schema_change": "ignore", + "grants": {}, + "packages": [], + "docs": {"show": True, "node_color": None}, + "contract": {"enforced": False}, + "post-hook": [], + "pre-hook": [], + } + ) + ) + tags: Optional[List[str]] = [] + description: Optional[str] = "" + columns: Optional[Dict[str, ColumnInfo]] = {} + meta: Optional[Dict[str, Any]] = {} + group: Optional[str] = None + docs: Optional[Docs] = Field( + default_factory=lambda: Docs.parse_obj({"show": True, "node_color": None}) + ) + patch_path: Optional[str] = None + build_path: Optional[str] = None + deferred: Optional[bool] = False + unrendered_config: Optional[Dict[str, Any]] = {} + created_at: Optional[float] = 1682075346.500488 + config_call_dict: Optional[Dict[str, Any]] = {} + relation_name: Optional[str] = None + raw_code: Optional[str] = "" + language: Optional[str] = "sql" + refs: Optional[List[RefArgs]] = [] + sources: Optional[List[List[str]]] = [] + metrics: Optional[List[List[str]]] = [] + depends_on: Optional[DependsOn] = Field( + default_factory=lambda: DependsOn.parse_obj({"macros": [], "nodes": []}) + ) + compiled_path: Optional[str] = None + compiled: Optional[bool] = False + compiled_code: Optional[str] = None + extra_ctes_injected: Optional[bool] = False + extra_ctes: Optional[List[InjectedCTE]] = [] + contract: Optional[Contract] = Field( + default_factory=lambda: Contract.parse_obj( + {"enforced": False, "checksum": None} + ) + ) + access: Optional[Access] = "protected" + constraints: Optional[List[ModelLevelConstraint]] = [] + version: Optional[Union[str, float]] = None + latest_version: Optional[Union[str, float]] = None + + +class RPCNode(BaseModel): + class Config: + extra = Extra.allow + + database: Optional[str] = None + schema_: str = Field(..., alias="schema") + name: str + resource_type: ResourceType4 + package_name: str + path: str + original_file_path: str + unique_id: str + fqn: List[str] + alias: str + checksum: FileHash + config: Optional[NodeConfig] = Field( + default_factory=lambda: NodeConfig.parse_obj( + { + "enabled": True, + "alias": None, + "schema": None, + "database": None, + "tags": [], + "meta": {}, + "group": None, + "materialized": "view", + "incremental_strategy": None, + "persist_docs": {}, + "quoting": {}, + "column_types": {}, + "full_refresh": None, + "unique_key": None, + "on_schema_change": "ignore", + "grants": {}, + "packages": [], + "docs": {"show": True, "node_color": None}, + "contract": {"enforced": False}, + "post-hook": [], + "pre-hook": [], + } + ) + ) + tags: Optional[List[str]] = [] + description: Optional[str] = "" + columns: Optional[Dict[str, ColumnInfo]] = {} + meta: Optional[Dict[str, Any]] = {} + group: Optional[str] = None + docs: Optional[Docs] = Field( + default_factory=lambda: Docs.parse_obj({"show": True, "node_color": None}) + ) + patch_path: Optional[str] = None + build_path: Optional[str] = None + deferred: Optional[bool] = False + unrendered_config: Optional[Dict[str, Any]] = {} + created_at: Optional[float] = 1682075346.50125 + config_call_dict: Optional[Dict[str, Any]] = {} + relation_name: Optional[str] = None + raw_code: Optional[str] = "" + language: Optional[str] = "sql" + refs: Optional[List[RefArgs]] = [] + sources: Optional[List[List[str]]] = [] + metrics: Optional[List[List[str]]] = [] + depends_on: Optional[DependsOn] = Field( + default_factory=lambda: DependsOn.parse_obj({"macros": [], "nodes": []}) + ) + compiled_path: Optional[str] = None + compiled: Optional[bool] = False + compiled_code: Optional[str] = None + extra_ctes_injected: Optional[bool] = False + extra_ctes: Optional[List[InjectedCTE]] = [] + contract: Optional[Contract] = Field( + default_factory=lambda: Contract.parse_obj( + {"enforced": False, "checksum": None} + ) + ) + + +class SqlNode(BaseModel): + class Config: + extra = Extra.allow + + database: Optional[str] = None + schema_: str = Field(..., alias="schema") + name: str + resource_type: ResourceType5 + package_name: str + path: str + original_file_path: str + unique_id: str + fqn: List[str] + alias: str + checksum: FileHash + config: Optional[NodeConfig] = Field( + default_factory=lambda: NodeConfig.parse_obj( + { + "enabled": True, + "alias": None, + "schema": None, + "database": None, + "tags": [], + "meta": {}, + "group": None, + "materialized": "view", + "incremental_strategy": None, + "persist_docs": {}, + "quoting": {}, + "column_types": {}, + "full_refresh": None, + "unique_key": None, + "on_schema_change": "ignore", + "grants": {}, + "packages": [], + "docs": {"show": True, "node_color": None}, + "contract": {"enforced": False}, + "post-hook": [], + "pre-hook": [], + } + ) + ) + tags: Optional[List[str]] = [] + description: Optional[str] = "" + columns: Optional[Dict[str, ColumnInfo]] = {} + meta: Optional[Dict[str, Any]] = {} + group: Optional[str] = None + docs: Optional[Docs] = Field( + default_factory=lambda: Docs.parse_obj({"show": True, "node_color": None}) + ) + patch_path: Optional[str] = None + build_path: Optional[str] = None + deferred: Optional[bool] = False + unrendered_config: Optional[Dict[str, Any]] = {} + created_at: Optional[float] = 1682075346.501826 + config_call_dict: Optional[Dict[str, Any]] = {} + relation_name: Optional[str] = None + raw_code: Optional[str] = "" + language: Optional[str] = "sql" + refs: Optional[List[RefArgs]] = [] + sources: Optional[List[List[str]]] = [] + metrics: Optional[List[List[str]]] = [] + depends_on: Optional[DependsOn] = Field( + default_factory=lambda: DependsOn.parse_obj({"macros": [], "nodes": []}) + ) + compiled_path: Optional[str] = None + compiled: Optional[bool] = False + compiled_code: Optional[str] = None + extra_ctes_injected: Optional[bool] = False + extra_ctes: Optional[List[InjectedCTE]] = [] + contract: Optional[Contract] = Field( + default_factory=lambda: Contract.parse_obj( + {"enforced": False, "checksum": None} + ) + ) + + +class GenericTestNode(BaseModel): + class Config: + extra = Extra.allow + + test_metadata: TestMetadata + database: Optional[str] = None + schema_: str = Field(..., alias="schema") + name: str + resource_type: ResourceType6 + package_name: str + path: str + original_file_path: str + unique_id: str + fqn: List[str] + alias: str + checksum: FileHash + config: Optional[TestConfig] = Field( + default_factory=lambda: TestConfig.parse_obj( + { + "enabled": True, + "alias": None, + "schema": "dbt_test__audit", + "database": None, + "tags": [], + "meta": {}, + "group": None, + "materialized": "test", + "severity": "ERROR", + "store_failures": None, + "where": None, + "limit": None, + "fail_calc": "count(*)", + "warn_if": "!= 0", + "error_if": "!= 0", + } + ) + ) + tags: Optional[List[str]] = [] + description: Optional[str] = "" + columns: Optional[Dict[str, ColumnInfo]] = {} + meta: Optional[Dict[str, Any]] = {} + group: Optional[str] = None + docs: Optional[Docs] = Field( + default_factory=lambda: Docs.parse_obj({"show": True, "node_color": None}) + ) + patch_path: Optional[str] = None + build_path: Optional[str] = None + deferred: Optional[bool] = False + unrendered_config: Optional[Dict[str, Any]] = {} + created_at: Optional[float] = 1682075346.502492 + config_call_dict: Optional[Dict[str, Any]] = {} + relation_name: Optional[str] = None + raw_code: Optional[str] = "" + language: Optional[str] = "sql" + refs: Optional[List[RefArgs]] = [] + sources: Optional[List[List[str]]] = [] + metrics: Optional[List[List[str]]] = [] + depends_on: Optional[DependsOn] = Field( + default_factory=lambda: DependsOn.parse_obj({"macros": [], "nodes": []}) + ) + compiled_path: Optional[str] = None + compiled: Optional[bool] = False + compiled_code: Optional[str] = None + extra_ctes_injected: Optional[bool] = False + extra_ctes: Optional[List[InjectedCTE]] = [] + contract: Optional[Contract] = Field( + default_factory=lambda: Contract.parse_obj( + {"enforced": False, "checksum": None} + ) + ) + column_name: Optional[str] = None + file_key_name: Optional[str] = None + attached_node: Optional[str] = None + + +class SnapshotNode(BaseModel): + class Config: + extra = Extra.allow + + database: Optional[str] = None + schema_: str = Field(..., alias="schema") + name: str + resource_type: ResourceType7 + package_name: str + path: str + original_file_path: str + unique_id: str + fqn: List[str] + alias: str + checksum: FileHash + config: SnapshotConfig + tags: Optional[List[str]] = [] + description: Optional[str] = "" + columns: Optional[Dict[str, ColumnInfo]] = {} + meta: Optional[Dict[str, Any]] = {} + group: Optional[str] = None + docs: Optional[Docs] = Field( + default_factory=lambda: Docs.parse_obj({"show": True, "node_color": None}) + ) + patch_path: Optional[str] = None + build_path: Optional[str] = None + deferred: Optional[bool] = False + unrendered_config: Optional[Dict[str, Any]] = {} + created_at: Optional[float] = 1682075346.503581 + config_call_dict: Optional[Dict[str, Any]] = {} + relation_name: Optional[str] = None + raw_code: Optional[str] = "" + language: Optional[str] = "sql" + refs: Optional[List[RefArgs]] = [] + sources: Optional[List[List[str]]] = [] + metrics: Optional[List[List[str]]] = [] + depends_on: Optional[DependsOn] = Field( + default_factory=lambda: DependsOn.parse_obj({"macros": [], "nodes": []}) + ) + compiled_path: Optional[str] = None + compiled: Optional[bool] = False + compiled_code: Optional[str] = None + extra_ctes_injected: Optional[bool] = False + extra_ctes: Optional[List[InjectedCTE]] = [] + contract: Optional[Contract] = Field( + default_factory=lambda: Contract.parse_obj( + {"enforced": False, "checksum": None} + ) + ) + + +class SeedNode(BaseModel): + class Config: + extra = Extra.allow + + database: Optional[str] = None + schema_: str = Field(..., alias="schema") + name: str + resource_type: ResourceType8 + package_name: str + path: str + original_file_path: str + unique_id: str + fqn: List[str] + alias: str + checksum: FileHash + config: Optional[SeedConfig] = Field( + default_factory=lambda: SeedConfig.parse_obj( + { + "enabled": True, + "alias": None, + "schema": None, + "database": None, + "tags": [], + "meta": {}, + "group": None, + "materialized": "seed", + "incremental_strategy": None, + "persist_docs": {}, + "quoting": {}, + "column_types": {}, + "full_refresh": None, + "unique_key": None, + "on_schema_change": "ignore", + "grants": {}, + "packages": [], + "docs": {"show": True, "node_color": None}, + "contract": {"enforced": False}, + "quote_columns": None, + "post-hook": [], + "pre-hook": [], + } + ) + ) + tags: Optional[List[str]] = [] + description: Optional[str] = "" + columns: Optional[Dict[str, ColumnInfo]] = {} + meta: Optional[Dict[str, Any]] = {} + group: Optional[str] = None + docs: Optional[Docs] = Field( + default_factory=lambda: Docs.parse_obj({"show": True, "node_color": None}) + ) + patch_path: Optional[str] = None + build_path: Optional[str] = None + deferred: Optional[bool] = False + unrendered_config: Optional[Dict[str, Any]] = {} + created_at: Optional[float] = 1682075346.504591 + config_call_dict: Optional[Dict[str, Any]] = {} + relation_name: Optional[str] = None + raw_code: Optional[str] = "" + root_path: Optional[str] = None + depends_on: Optional[MacroDependsOn] = Field( + default_factory=lambda: MacroDependsOn.parse_obj({"macros": []}) + ) + + +class FreshnessThreshold(BaseModel): + class Config: + extra = Extra.allow + + warn_after: Optional[Time] = Field( + default_factory=lambda: Time.parse_obj({"count": None, "period": None}) + ) + error_after: Optional[Time] = Field( + default_factory=lambda: Time.parse_obj({"count": None, "period": None}) + ) + filter: Optional[str] = None + + +class SourceFreshnessOutput(BaseModel): + class Config: + extra = Extra.allow + + unique_id: str + max_loaded_at: datetime + snapshotted_at: datetime + max_loaded_at_time_ago_in_s: float + status: Status1 + criteria: FreshnessThreshold + adapter_response: Dict[str, Any] + timing: List[TimingInfo] + thread_id: str + execution_time: float + + +class ExternalTable(BaseModel): + class Config: + extra = Extra.allow + + location: Optional[str] = None + file_format: Optional[str] = None + row_format: Optional[str] = None + tbl_properties: Optional[str] = None + partitions: Optional[Union[List[str], List[ExternalPartition]]] = None + + +class Macro(BaseModel): + class Config: + extra = Extra.allow + + name: str + resource_type: ResourceType10 + package_name: str + path: str + original_file_path: str + unique_id: str + macro_sql: str + depends_on: Optional[MacroDependsOn] = Field( + default_factory=lambda: MacroDependsOn.parse_obj({"macros": []}) + ) + description: Optional[str] = "" + meta: Optional[Dict[str, Any]] = {} + docs: Optional[Docs] = Field( + default_factory=lambda: Docs.parse_obj({"show": True, "node_color": None}) + ) + patch_path: Optional[str] = None + arguments: Optional[List[MacroArgument]] = [] + created_at: Optional[float] = 1682075346.505871 + supported_languages: Optional[List[SupportedLanguage]] = None + + +class Exposure(BaseModel): + class Config: + extra = Extra.allow + + name: str + resource_type: ResourceType12 + package_name: str + path: str + original_file_path: str + unique_id: str + fqn: List[str] + type: Type2 + owner: Owner + description: Optional[str] = "" + label: Optional[str] = None + maturity: Optional[MaturityEnum] = None + meta: Optional[Dict[str, Any]] = {} + tags: Optional[List[str]] = [] + config: Optional[ExposureConfig] = Field( + default_factory=lambda: ExposureConfig.parse_obj({"enabled": True}) + ) + unrendered_config: Optional[Dict[str, Any]] = {} + url: Optional[str] = None + depends_on: Optional[DependsOn] = Field( + default_factory=lambda: DependsOn.parse_obj({"macros": [], "nodes": []}) + ) + refs: Optional[List[RefArgs]] = [] + sources: Optional[List[List[str]]] = [] + metrics: Optional[List[List[str]]] = [] + created_at: Optional[float] = 1682075346.50651 + + +class Metric(BaseModel): + class Config: + extra = Extra.allow + + name: str + resource_type: ResourceType13 + package_name: str + path: str + original_file_path: str + unique_id: str + fqn: List[str] + description: str + label: str + calculation_method: str + expression: str + filters: List[MetricFilter] + time_grains: List[str] + dimensions: List[str] + timestamp: Optional[str] = None + window: Optional[MetricTime] = None + model: Optional[str] = None + model_unique_id: Optional[str] = None + meta: Optional[Dict[str, Any]] = {} + tags: Optional[List[str]] = [] + config: Optional[MetricConfig] = Field( + default_factory=lambda: MetricConfig.parse_obj({"enabled": True, "group": None}) + ) + unrendered_config: Optional[Dict[str, Any]] = {} + sources: Optional[List[List[str]]] = [] + depends_on: Optional[DependsOn] = Field( + default_factory=lambda: DependsOn.parse_obj({"macros": [], "nodes": []}) + ) + refs: Optional[List[RefArgs]] = [] + metrics: Optional[List[List[str]]] = [] + created_at: Optional[float] = 1682075346.507186 + group: Optional[str] = None + + +class AnalysisNode(BaseModel): + class Config: + extra = Extra.allow + + database: Optional[str] = None + schema_: str = Field(..., alias="schema") + name: str + resource_type: ResourceType + package_name: str + path: str + original_file_path: str + unique_id: str + fqn: List[str] + alias: str + checksum: FileHash + config: Optional[NodeConfig] = Field( + default_factory=lambda: NodeConfig.parse_obj( + { + "enabled": True, + "alias": None, + "schema": None, + "database": None, + "tags": [], + "meta": {}, + "group": None, + "materialized": "view", + "incremental_strategy": None, + "persist_docs": {}, + "quoting": {}, + "column_types": {}, + "full_refresh": None, + "unique_key": None, + "on_schema_change": "ignore", + "grants": {}, + "packages": [], + "docs": {"show": True, "node_color": None}, + "contract": {"enforced": False}, + "post-hook": [], + "pre-hook": [], + } + ) + ) + tags: Optional[List[str]] = [] + description: Optional[str] = "" + columns: Optional[Dict[str, ColumnInfo]] = {} + meta: Optional[Dict[str, Any]] = {} + group: Optional[str] = None + docs: Optional[Docs] = Field( + default_factory=lambda: Docs.parse_obj({"show": True, "node_color": None}) + ) + patch_path: Optional[str] = None + build_path: Optional[str] = None + deferred: Optional[bool] = False + unrendered_config: Optional[Dict[str, Any]] = {} + created_at: Optional[float] = 1682075346.49809 + config_call_dict: Optional[Dict[str, Any]] = {} + relation_name: Optional[str] = None + raw_code: Optional[str] = "" + language: Optional[str] = "sql" + refs: Optional[List[RefArgs]] = [] + sources: Optional[List[List[str]]] = [] + metrics: Optional[List[List[str]]] = [] + depends_on: Optional[DependsOn] = Field( + default_factory=lambda: DependsOn.parse_obj({"macros": [], "nodes": []}) + ) + compiled_path: Optional[str] = None + compiled: Optional[bool] = False + compiled_code: Optional[str] = None + extra_ctes_injected: Optional[bool] = False + extra_ctes: Optional[List[InjectedCTE]] = [] + contract: Optional[Contract] = Field( + default_factory=lambda: Contract.parse_obj( + {"enforced": False, "checksum": None} + ) + ) + + +class SourceDefinition(BaseModel): + class Config: + extra = Extra.allow + + database: Optional[str] = None + schema_: str = Field(..., alias="schema") + name: str + resource_type: ResourceType9 + package_name: str + path: str + original_file_path: str + unique_id: str + fqn: List[str] + source_name: str + source_description: str + loader: str + identifier: str + quoting: Optional[Quoting] = Field( + default_factory=lambda: Quoting.parse_obj( + {"database": None, "schema": None, "identifier": None, "column": None} + ) + ) + loaded_at_field: Optional[str] = None + freshness: Optional[FreshnessThreshold] = None + external: Optional[ExternalTable] = None + description: Optional[str] = "" + columns: Optional[Dict[str, ColumnInfo]] = {} + meta: Optional[Dict[str, Any]] = {} + source_meta: Optional[Dict[str, Any]] = {} + tags: Optional[List[str]] = [] + config: Optional[SourceConfig] = Field( + default_factory=lambda: SourceConfig.parse_obj({"enabled": True}) + ) + patch_path: Optional[str] = None + unrendered_config: Optional[Dict[str, Any]] = {} + relation_name: Optional[str] = None + created_at: Optional[float] = 1682075346.505582 + + +class Model(BaseModel): + class Config: + extra = Extra.allow + + metadata: ManifestMetadata = Field(..., description="Metadata about the manifest") + nodes: Dict[ + str, + Union[ + AnalysisNode, + SingularTestNode, + HookNode, + ModelNode, + RPCNode, + SqlNode, + GenericTestNode, + SnapshotNode, + SeedNode, + ], + ] = Field( + ..., description="The nodes defined in the dbt project and its dependencies" + ) + sources: Dict[str, SourceDefinition] = Field( + ..., description="The sources defined in the dbt project and its dependencies" + ) + macros: Dict[str, Macro] = Field( + ..., description="The macros defined in the dbt project and its dependencies" + ) + docs: Dict[str, Documentation] = Field( + ..., description="The docs defined in the dbt project and its dependencies" + ) + exposures: Dict[str, Exposure] = Field( + ..., description="The exposures defined in the dbt project and its dependencies" + ) + metrics: Dict[str, Metric] = Field( + ..., description="The metrics defined in the dbt project and its dependencies" + ) + groups: Dict[str, Group] = Field( + ..., description="The groups defined in the dbt project" + ) + selectors: Dict[str, Any] = Field( + ..., description="The selectors defined in selectors.yml" + ) + disabled: Optional[ + Dict[ + str, + List[ + Union[ + AnalysisNode, + SingularTestNode, + HookNode, + ModelNode, + RPCNode, + SqlNode, + GenericTestNode, + SnapshotNode, + SeedNode, + SourceDefinition, + Exposure, + Metric, + ] + ], + ] + ] = Field(None, description="A mapping of the disabled nodes in the target") + parent_map: Optional[Dict[str, List[str]]] = Field( + None, description="A mapping from\xa0child nodes to their dependencies" + ) + child_map: Optional[Dict[str, List[str]]] = Field( + None, description="A mapping from parent nodes to their dependents" + ) + group_map: Optional[Dict[str, List[str]]] = Field( + None, description="A mapping from group names to their nodes" + ) diff --git a/warehouse/scripts/dbt_artifacts/run_results.py b/warehouse/scripts/dbt_artifacts/run_results.py new file mode 100644 index 0000000000..72297aea2c --- /dev/null +++ b/warehouse/scripts/dbt_artifacts/run_results.py @@ -0,0 +1,150 @@ +# generated by datamodel-codegen: +# filename: https://schemas.getdbt.com/dbt/run-results/v4.json +# timestamp: 2023-05-15T18:21:36+00:00 + +from __future__ import annotations + +from datetime import datetime +from enum import Enum +from typing import Any, Dict, List, Optional, Union + +from pydantic import BaseModel, Extra, Field + + +class BaseArtifactMetadata(BaseModel): + class Config: + extra = Extra.allow + + dbt_schema_version: str + dbt_version: Optional[str] = "1.0.0b2" + generated_at: Optional[datetime] = "2021-11-02T20:18:06.799863Z" + invocation_id: Optional[str] = None + env: Optional[Dict[str, str]] = {} + + +class StatusEnum(Enum): + success = "success" + error = "error" + skipped = "skipped" + + +class StatusEnum1(Enum): + pass_ = "pass" + error = "error" + fail = "fail" + warn = "warn" + skipped = "skipped" + + +class StatusEnum2(Enum): + pass_ = "pass" + warn = "warn" + error = "error" + runtime_error = "runtime error" + + +class TimingInfo(BaseModel): + class Config: + extra = Extra.allow + + name: str + started_at: Optional[datetime] = None + completed_at: Optional[datetime] = None + + +class FreshnessMetadata(BaseModel): + class Config: + extra = Extra.allow + + dbt_schema_version: Optional[str] = "https://schemas.getdbt.com/dbt/sources/v3.json" + dbt_version: Optional[str] = "1.0.0b2" + generated_at: Optional[datetime] = "2021-11-02T20:18:06.796684Z" + invocation_id: Optional[str] = None + env: Optional[Dict[str, str]] = {} + + +class Status(Enum): + runtime_error = "runtime error" + + +class SourceFreshnessRuntimeError(BaseModel): + class Config: + extra = Extra.allow + + unique_id: str + error: Optional[Union[str, int]] = None + status: Status + + +class Status1(Enum): + pass_ = "pass" + warn = "warn" + error = "error" + runtime_error = "runtime error" + + +class PeriodEnum(Enum): + minute = "minute" + hour = "hour" + day = "day" + + +class Time(BaseModel): + class Config: + extra = Extra.allow + + count: Optional[int] = None + period: Optional[PeriodEnum] = None + + +class RunResultOutput(BaseModel): + class Config: + extra = Extra.allow + + status: Union[StatusEnum, StatusEnum1, StatusEnum2] + timing: List[TimingInfo] + thread_id: str + execution_time: float + adapter_response: Dict[str, Any] + message: Optional[str] = None + failures: Optional[int] = None + unique_id: str + + +class FreshnessThreshold(BaseModel): + class Config: + extra = Extra.allow + + warn_after: Optional[Time] = Field( + default_factory=lambda: Time.parse_obj({"count": None, "period": None}) + ) + error_after: Optional[Time] = Field( + default_factory=lambda: Time.parse_obj({"count": None, "period": None}) + ) + filter: Optional[str] = None + + +class Model(BaseModel): + class Config: + extra = Extra.allow + + metadata: BaseArtifactMetadata + results: List[RunResultOutput] + elapsed_time: float + args: Optional[Dict[str, Any]] = {} + + +class SourceFreshnessOutput(BaseModel): + class Config: + extra = Extra.allow + + unique_id: str + max_loaded_at: datetime + snapshotted_at: datetime + max_loaded_at_time_ago_in_s: float + status: Status1 + criteria: FreshnessThreshold + adapter_response: Dict[str, Any] + timing: List[TimingInfo] + thread_id: str + execution_time: float diff --git a/warehouse/scripts/dbt_artifacts/sources.py b/warehouse/scripts/dbt_artifacts/sources.py new file mode 100644 index 0000000000..dff0fa4e1b --- /dev/null +++ b/warehouse/scripts/dbt_artifacts/sources.py @@ -0,0 +1,103 @@ +# generated by datamodel-codegen: +# filename: https://schemas.getdbt.com/dbt/sources/v3.json +# timestamp: 2023-05-15T18:21:37+00:00 + +from __future__ import annotations + +from datetime import datetime +from enum import Enum +from typing import Any, Dict, List, Optional, Union + +from pydantic import BaseModel, Extra, Field + + +class FreshnessMetadata(BaseModel): + class Config: + extra = Extra.allow + + dbt_schema_version: Optional[str] = "https://schemas.getdbt.com/dbt/sources/v3.json" + dbt_version: Optional[str] = "1.0.0b2" + generated_at: Optional[datetime] = "2021-11-02T20:18:06.796684Z" + invocation_id: Optional[str] = None + env: Optional[Dict[str, str]] = {} + + +class Status(Enum): + runtime_error = "runtime error" + + +class SourceFreshnessRuntimeError(BaseModel): + class Config: + extra = Extra.allow + + unique_id: str + error: Optional[Union[str, int]] = None + status: Status + + +class Status1(Enum): + pass_ = "pass" + warn = "warn" + error = "error" + runtime_error = "runtime error" + + +class PeriodEnum(Enum): + minute = "minute" + hour = "hour" + day = "day" + + +class Time(BaseModel): + class Config: + extra = Extra.allow + + count: Optional[int] = None + period: Optional[PeriodEnum] = None + + +class TimingInfo(BaseModel): + class Config: + extra = Extra.allow + + name: str + started_at: Optional[datetime] = None + completed_at: Optional[datetime] = None + + +class FreshnessThreshold(BaseModel): + class Config: + extra = Extra.allow + + warn_after: Optional[Time] = Field( + default_factory=lambda: Time.parse_obj({"count": None, "period": None}) + ) + error_after: Optional[Time] = Field( + default_factory=lambda: Time.parse_obj({"count": None, "period": None}) + ) + filter: Optional[str] = None + + +class SourceFreshnessOutput(BaseModel): + class Config: + extra = Extra.allow + + unique_id: str + max_loaded_at: datetime + snapshotted_at: datetime + max_loaded_at_time_ago_in_s: float + status: Status1 + criteria: FreshnessThreshold + adapter_response: Dict[str, Any] + timing: List[TimingInfo] + thread_id: str + execution_time: float + + +class Model(BaseModel): + class Config: + extra = Extra.allow + + metadata: FreshnessMetadata + results: List[Union[SourceFreshnessRuntimeError, SourceFreshnessOutput]] + elapsed_time: float diff --git a/warehouse/scripts/json_to_docblocks.py b/warehouse/scripts/json_to_docblocks.py old mode 100644 new mode 100755 index ddd740c3ee..5de79ce114 --- a/warehouse/scripts/json_to_docblocks.py +++ b/warehouse/scripts/json_to_docblocks.py @@ -1,7 +1,11 @@ +# type: ignore """ Super useful with https://www.convertjson.com/html-table-to-json.htm Originally used to produce dbt docs from https://gtfs.org/reference/static#field-definitions + +NOTE: This won't work without updates as the artifacts have changed in https://github.com/cal-itp/data-infra/pull/2598. +You will need to re-create missing functionality on top of the new generated types. """ import json import sys diff --git a/warehouse/scripts/publish.py b/warehouse/scripts/publish.py old mode 100644 new mode 100755 index 213db87f00..8c926ea872 --- a/warehouse/scripts/publish.py +++ b/warehouse/scripts/publish.py @@ -25,10 +25,10 @@ import shapely.wkt # type: ignore import typer from dbt_artifacts import ( - BaseNode, CkanDestination, Exposure, Manifest, + NodeModelMixin, TileFormat, TilesDestination, ) @@ -45,7 +45,7 @@ MANIFEST_DEFAULT = f"{DBT_ARTIFACTS_BUCKET}/latest/manifest.json" PUBLISH_BUCKET = os.environ["CALITP_BUCKET__PUBLISH"] -app = typer.Typer() +app = typer.Typer(pretty_exceptions_enable=False) WGS84 = "EPSG:4326" # "standard" lat/lon coordinate system CHUNK_SIZE = ( @@ -266,7 +266,11 @@ def ckan_request(action: str, data: Dict) -> Response: def _generate_exposure_documentation( exposure: Exposure, ) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]: - assert exposure.meta and exposure.meta.destinations is not None + assert ( + exposure.meta + and exposure.meta.destinations is not None + and exposure.depends_on is not None + ) try: resources = next( dest @@ -281,7 +285,7 @@ def _generate_exposure_documentation( metadata_rows: List[Dict[str, Any]] = [] dictionary_rows: List[Dict[str, Any]] = [] - for node in exposure.depends_on.resolved_nodes: + for node in exposure.depends_on.resolved_nodes: # type: ignore[attr-defined] assert exposure.meta is not None name = strip_modelname(node.name) @@ -375,7 +379,11 @@ def _publish_exposure( bucket: str, exposure: Exposure, publish: bool, model: Optional[str] = None ): ts = pendulum.now() - assert exposure.meta is not None and exposure.depends_on.nodes is not None + assert ( + exposure.meta is not None + and exposure.depends_on is not None + and exposure.depends_on.nodes is not None + ) for destination in exposure.meta.destinations: with tempfile.TemporaryDirectory() as tmpdir: if isinstance(destination, CkanDestination): @@ -402,7 +410,7 @@ def _publish_exposure( ) with fs.open(hive_path, "w", newline="") as f: writer = csv.DictWriter( - f, fieldnames=[key.upper() for key in cls.__fields__.keys()] + f, fieldnames=[key.upper() for key in cls.__fields__.keys()] # type: ignore[attr-defined] ) writer.writeheader() for row in rows: @@ -420,7 +428,9 @@ def _publish_exposure( typer.secho( f"handling {model_name} {resource.id}", fg=typer.colors.MAGENTA ) - node = BaseNode._instances[f"model.calitp_warehouse.{model_name}"] + node = NodeModelMixin._instances[ + f"model.calitp_warehouse.{model_name}" + ] fpath = strip_modelname( os.path.join(tmpdir, destination.filename(model_name)) @@ -434,7 +444,9 @@ def _publish_exposure( precisions = {} + assert node.columns is not None for name, column in node.columns.items(): + assert column.meta is not None ckan_precision = column.meta.get("ckan.precision") if ckan_precision: assert isinstance(ckan_precision, (str, int)) @@ -482,7 +494,7 @@ def _publish_exposure( elif isinstance(destination, TilesDestination): layer_geojson_paths: Dict[str, Path] = {} for model in exposure.depends_on.nodes: - node = BaseNode._instances[model] + node = NodeModelMixin._instances[model] geojsonl_fpath = Path( os.path.join(tmpdir, f"{strip_modelname(node.name)}.geojsonl") @@ -523,7 +535,7 @@ def _publish_exposure( fg=typer.colors.GREEN, ) fs = gcsfs.GCSFileSystem(token="google_default") - fs.put(geojsonl_fpath, hive_path) + fs.put(str(geojsonl_fpath), hive_path) if destination.tile_format == TileFormat.mbtiles: mbtiles_path = os.path.join(tmpdir, "tiles.mbtiles") @@ -594,7 +606,7 @@ def document_exposure( ): with opener(file, "w", newline="") as f: writer = csv.DictWriter( - f, fieldnames=[key.upper() for key in cls.__fields__.keys()] + f, fieldnames=[key.upper() for key in cls.__fields__.keys()] # type: ignore[attr-defined] ) writer.writeheader() for row in rows: diff --git a/warehouse/scripts/run_and_upload.py b/warehouse/scripts/run_and_upload.py index b45656ee30..f672401dbc 100755 --- a/warehouse/scripts/run_and_upload.py +++ b/warehouse/scripts/run_and_upload.py @@ -13,12 +13,15 @@ import sentry_sdk import typer from dbt_artifacts import ( - DbtResourceType, + DbtNode, + DependsOn, + GenericTestNode, Manifest, - Node, - RunResult, + ModelNode, + RunResultOutput, RunResults, RunResultStatus, + SeedNode, ) CALITP_BUCKET__DBT_ARTIFACTS = os.getenv("CALITP_BUCKET__DBT_ARTIFACTS") @@ -60,14 +63,14 @@ class DbtMetabaseSyncFailure(Exception): pass -def get_failure_context(failure: RunResult, node: Node) -> Dict[str, Any]: +def get_failure_context(failure: RunResultOutput, node: DbtNode) -> Dict[str, Any]: context: Dict[str, Any] = { "unique_id": failure.unique_id, "path": str(node.original_file_path), } if failure.unique_id.startswith("test"): - if node.depends_on: - context["models"] = node.depends_on.nodes + assert isinstance(node.depends_on, DependsOn) + context["models"] = node.depends_on.nodes return context @@ -88,16 +91,16 @@ def report_failures_to_sentry( ] for failure in failures: node = manifest.nodes[failure.unique_id] - fingerprint = [failure.status, failure.unique_id] + fingerprint = [str(failure.status), failure.unique_id] # this is awkward and manual; maybe could do dynamically exc_types = { - (DbtResourceType.seed, RunResultStatus.error): DbtSeedError, - (DbtResourceType.model, RunResultStatus.error): DbtModelError, - (DbtResourceType.test, RunResultStatus.error): DbtTestError, - (DbtResourceType.test, RunResultStatus.fail): DbtTestFail, - (DbtResourceType.test, RunResultStatus.warn): DbtTestWarn, + (SeedNode, RunResultStatus.error): DbtSeedError, + (ModelNode, RunResultStatus.error): DbtModelError, + (GenericTestNode, RunResultStatus.error): DbtTestError, + (GenericTestNode, RunResultStatus.fail): DbtTestFail, + (GenericTestNode, RunResultStatus.warn): DbtTestWarn, } - exc_type = exc_types.get((node.resource_type, failure.status), DbtException) + exc_type = exc_types.get((type(node), failure.status), DbtException) if verbose: typer.secho( f"reporting failure of {node.resource_type} with fingerprint {fingerprint}", diff --git a/warehouse/scripts/visualize.py b/warehouse/scripts/visualize.py old mode 100644 new mode 100755 index f1f0ea1087..f27c0371b2 --- a/warehouse/scripts/visualize.py +++ b/warehouse/scripts/visualize.py @@ -9,8 +9,16 @@ import gcsfs # type: ignore import networkx as nx # type: ignore import typer -from catalog import Catalog -from dbt_artifacts import BaseNode, Manifest, RunResult, RunResults, Seed, Source, Test +from dbt_artifacts import ( + Catalog, + DbtNode, + GenericTestNode, + Manifest, + RunResultOutput, + RunResults, + SeedNode, + SourceDefinition, +) app = typer.Typer(pretty_exceptions_enable=False) @@ -41,7 +49,7 @@ def read_artifacts_folder( def should_display( - node: BaseNode, + node: DbtNode, analyses: bool = False, models: bool = True, seeds: bool = False, @@ -51,11 +59,11 @@ def should_display( include: Optional[List[str]] = None, exclude: Optional[List[str]] = None, ) -> bool: - if isinstance(node, Seed) and not seeds: + if isinstance(node, SeedNode) and not seeds: return False - if isinstance(node, Test) and not tests: + if isinstance(node, GenericTestNode) and not tests: return False - if isinstance(node, Source) and not sources: + if isinstance(node, SourceDefinition) and not sources: return False if include: return any( @@ -88,11 +96,11 @@ def build_graph( # Add all nodes first in case we're visualizing RunResults # We want to be able to add immediate parents of RunResult # nodes as dashed - node_or_result: Union[BaseNode, RunResult] + node_or_result: Union[DbtNode, RunResultOutput] for node_or_result in nodes: - node: BaseNode = ( + node: DbtNode = ( node_or_result.node - if isinstance(node_or_result, RunResult) + if isinstance(node_or_result, RunResultOutput) else node_or_result ) @@ -114,7 +122,7 @@ def build_graph( G.add_node(node_or_result.gvrepr, **node_or_result.gvattrs, style="filled") for node_or_result in nodes: - node: BaseNode = node_or_result.node if isinstance(node_or_result, RunResult) else node_or_result # type: ignore[no-redef] + node: DbtNode = node_or_result.node if isinstance(node_or_result, RunResultOutput) else node_or_result # type: ignore[no-redef] if not should_display( node, analyses, @@ -129,8 +137,8 @@ def build_graph( if verbose: typer.secho(f"skipping {node.name}") continue - if node.depends_on and node.depends_on.nodes: - for dep in node.depends_on.resolved_nodes: + if node.depends_on and node.depends_on.nodes: # type: ignore[union-attr] + for dep in node.depends_on.resolved_nodes: # type: ignore[union-attr] if not should_display( dep, analyses,