From 6973427fa7894073255155b35c0c5566bb953cdf Mon Sep 17 00:00:00 2001 From: Saar Cohen <66667568+theSaarco@users.noreply.github.com> Date: Tue, 23 Aug 2022 09:15:22 +0000 Subject: [PATCH] [Feature store] Changed fset status targets to have partitioning related fields (#2285) --- mlrun/datastore/targets.py | 9 +++++++++ mlrun/model.py | 4 ++++ 2 files changed, 13 insertions(+) diff --git a/mlrun/datastore/targets.py b/mlrun/datastore/targets.py index d6b7f9bd1c6..cb900b10e8e 100644 --- a/mlrun/datastore/targets.py +++ b/mlrun/datastore/targets.py @@ -610,6 +610,15 @@ def update_resource_status(self, status="", producer=None, size=None): target.updated = now_date().isoformat() target.size = size target.producer = producer or target.producer + # Copy partitioning-related fields to the status, since these are needed if reading the actual data that + # is related to the specific target. + # TODO - instead of adding more fields to the status targets, we should consider changing any functionality + # that depends on "spec-fields" to use a merge between the status and the spec targets. One such place + # is the fset.to_dataframe() function. + target.partitioned = self.partitioned + target.key_bucketing_number = self.key_bucketing_number + target.partition_cols = self.partition_cols + target.time_partitioning_granularity = self.time_partitioning_granularity self._resource.status.update_target(target) return target diff --git a/mlrun/model.py b/mlrun/model.py index 0658955c0ec..aa2d2deab8b 100644 --- a/mlrun/model.py +++ b/mlrun/model.py @@ -1234,6 +1234,10 @@ class DataTarget(DataTargetBase): "size", "last_written", "run_id", + "partitioned", + "key_bucketing_number", + "partition_cols", + "time_partitioning_granularity", ] def __init__(