gabledata · criccomini · Jan 3, 2024 · Dec 22, 2023 · Dec 24, 2023 · Dec 24, 2023
diff --git a/recap/clients/postgresql.py b/recap/clients/postgresql.py
@@ -5,6 +5,7 @@
 
 from recap.clients.dbapi import Connection, DbapiClient
 from recap.converters.postgresql import PostgresqlConverter
+from recap.types import StructType
 
 PSYCOPG2_CONNECT_ARGS = {
     "host",
@@ -78,3 +79,25 @@ def ls_catalogs(self) -> list[str]:
             """
         )
         return [row[0] for row in cursor.fetchall()]
+
+    def schema(self, catalog: str, schema: str, table: str) -> StructType:
+        cursor = self.connection.cursor()
+        cursor.execute(
+            f"""
+                SELECT
+                    information_schema.columns.*,
+                    pg_attribute.attndims
+                FROM information_schema.columns
+                JOIN pg_attribute on information_schema.columns.column_name = pg_attribute.attname
+                WHERE table_name = {self.param_style}
+                    AND table_schema = {self.param_style}
+                    AND table_catalog = {self.param_style}
+                ORDER BY ordinal_position ASC
+            """,
+            (table, schema, catalog),
+        )
+        names = [name[0].upper() for name in cursor.description]
+        return self.converter.to_recap(
+            # Make each row be a dict with the column names as keys
+            [dict(zip(names, row)) for row in cursor.fetchall()]
+        )
diff --git a/recap/converters/postgresql.py b/recap/converters/postgresql.py
@@ -8,32 +8,22 @@
     FloatType,
     IntType,
     ListType,
-    ProxyType,
+    NullType,
     RecapType,
-    RecapTypeRegistry,
     StringType,
     UnionType,
 )
 
 MAX_FIELD_SIZE = 1073741824
 
-DEFAULT_NAMESPACE = "_root"
-"""
-Namespace to use when no namespace is specified in the schema.
-"""
-
 
 class PostgresqlConverter(DbapiConverter):
-    def __init__(self, namespace: str = DEFAULT_NAMESPACE) -> None:
-        self.namespace = namespace
-        self.registry = RecapTypeRegistry()
-
     def _parse_type(self, column_props: dict[str, Any]) -> RecapType:
-        column_name = column_props["COLUMN_NAME"]
         data_type = column_props["DATA_TYPE"].lower()
         octet_length = column_props["CHARACTER_OCTET_LENGTH"]
         max_length = column_props["CHARACTER_MAXIMUM_LENGTH"]
         udt_name = (column_props["UDT_NAME"] or "").lower()
+        ndims = column_props["ATTNDIMS"]
 
         if data_type in ["bigint", "int8", "bigserial", "serial8"]:
             base_type = IntType(bits=64, signed=True)
@@ -90,7 +80,6 @@ def _parse_type(self, column_props: dict[str, Any]) -> RecapType:
             # lengths, etc. Thus, we only set DATA_TYPE here. Sigh.
             value_type = self._parse_type(
                 {
-                    "COLUMN_NAME": None,
                     "DATA_TYPE": nested_data_type,
                     # Default strings, bits, etc. to the max field size since
                     # information_schema doesn't contain lengths for array
@@ -102,29 +91,22 @@ def _parse_type(self, column_props: dict[str, Any]) -> RecapType:
                     # * 8 because bit columns use bits not bytes.
                     "CHARACTER_MAXIMUM_LENGTH": MAX_FIELD_SIZE * 8,
                     "UDT_NAME": None,
+                    "ATTNDIMS": 0,
                 }
             )
-            column_name_without_periods = column_name.replace(".", "_")
-            base_type_alias = f"{self.namespace}.{column_name_without_periods}"
-            # Construct a self-referencing list comprised of the array's value
-            # type and a proxy to the list itself. This allows arrays to be an
-            # arbitrary number of dimensions, which is how PostgreSQL treats
-            # lists. See https://github.com/recap-build/recap/issues/264 for
-            # more details.
-            base_type = ListType(
-                alias=base_type_alias,
-                values=UnionType(
-                    types=[
-                        value_type,
-                        ProxyType(
-                            alias=base_type_alias,
-                            registry=self.registry,
-                        ),
-                    ],
-                ),
-            )
-            self.registry.register_alias(base_type)
+            base_type = self._create_n_dimension_list(value_type, ndims)
         else:
             raise ValueError(f"Unknown data type: {data_type}")
 
         return base_type
+
+    def _create_n_dimension_list(self, base_type: RecapType, ndims: int) -> RecapType:
+        """
+        Build a list type with `ndims` dimensions containing nullable `base_type` as the innermost value type.
+        """
+        if ndims == 0:
+            return UnionType(types=[NullType(), base_type])
+        else:
+            return ListType(
+                values=self._create_n_dimension_list(base_type, ndims - 1),
+            )
diff --git a/tests/integration/clients/test_postgresql.py b/tests/integration/clients/test_postgresql.py
@@ -10,7 +10,6 @@
     IntType,
     ListType,
     NullType,
-    ProxyType,
     StringType,
     StructType,
     UnionType,
@@ -51,7 +50,9 @@ def setup_class(cls):
                 test_default INTEGER DEFAULT 2,
                 test_int_array INTEGER[],
                 test_varchar_array VARCHAR(255)[] DEFAULT '{"Hello", "World"}',
-                test_bit_array BIT(8)[]
+                test_bit_array BIT(8)[],
+                test_int_array_2d INTEGER[][],
+                test_text_array_3d TEXT[][][]
             );
         """
         )
@@ -164,14 +165,10 @@ def test_struct_method(self):
                 types=[
                     NullType(),
                     ListType(
-                        alias="_root.test_int_array",
                         values=UnionType(
                             types=[
+                                NullType(),
                                 IntType(bits=32),
-                                ProxyType(
-                                    alias="_root.test_int_array",
-                                    registry=client.converter.registry,  # type: ignore
-                                ),
                             ]
                         ),
                     ),
@@ -183,14 +180,10 @@ def test_struct_method(self):
                 types=[
                     NullType(),
                     ListType(
-                        alias="_root.test_varchar_array",
                         values=UnionType(
                             types=[
+                                NullType(),
                                 StringType(bytes_=MAX_FIELD_SIZE),
-                                ProxyType(
-                                    alias="_root.test_varchar_array",
-                                    registry=client.converter.registry,  # type: ignore
-                                ),
                             ]
                         ),
                     ),
@@ -202,19 +195,53 @@ def test_struct_method(self):
                 types=[
                     NullType(),
                     ListType(
-                        alias="_root.test_bit_array",
                         values=UnionType(
                             types=[
+                                NullType(),
                                 BytesType(bytes_=MAX_FIELD_SIZE, variable=False),
-                                ProxyType(
-                                    alias="_root.test_bit_array",
-                                    registry=client.converter.registry,  # type: ignore
-                                ),
                             ]
                         ),
                     ),
                 ],
             ),
+            UnionType(
+                default=None,
+                name="test_int_array_2d",
+                types=[
+                    NullType(),
+                    ListType(
+                        values=ListType(
+                            values=UnionType(
+                                types=[
+                                    NullType(),
+                                    IntType(bits=32),
+                                ]
+                            )
+                        ),
+                    ),
+                ],
+            ),
+            UnionType(
+                default=None,
+                name="test_text_array_3d",
+                types=[
+                    NullType(),
+                    ListType(
+                        values=ListType(
+                            values=ListType(
+                                values=UnionType(
+                                    types=[
+                                        NullType(),
+                                        StringType(
+                                            bytes_=MAX_FIELD_SIZE, variable=True
+                                        ),
+                                    ]
+                                )
+                            )
+                        ),
+                    ),
+                ],
+            ),
         ]
 
         # Going field by field to make debugging easier when test fails

diff --git a/tests/unit/converters/test_postgresql.py b/tests/unit/converters/test_postgresql.py
@@ -7,7 +7,7 @@
     FloatType,
     IntType,
     ListType,
-    ProxyType,
+    NullType,
     StringType,
     UnionType,
 )
@@ -25,6 +25,7 @@
                 "NUMERIC_PRECISION": None,
                 "NUMERIC_SCALE": None,
                 "UDT_NAME": None,
+                "ATTNDIMS": 0,
             },
             IntType(bits=64, signed=True),
         ),
@@ -37,6 +38,7 @@
                 "NUMERIC_PRECISION": None,
                 "NUMERIC_SCALE": None,
                 "UDT_NAME": None,
+                "ATTNDIMS": 0,
             },
             IntType(bits=32, signed=True),
         ),
@@ -49,6 +51,7 @@
                 "NUMERIC_PRECISION": None,
                 "NUMERIC_SCALE": None,
                 "UDT_NAME": None,
+                "ATTNDIMS": 0,
             },
             IntType(bits=16, signed=True),
         ),
@@ -61,6 +64,7 @@
                 "NUMERIC_PRECISION": None,
                 "NUMERIC_SCALE": None,
                 "UDT_NAME": None,
+                "ATTNDIMS": 0,
             },
             FloatType(bits=64),
         ),
@@ -73,6 +77,7 @@
                 "NUMERIC_PRECISION": None,
                 "NUMERIC_SCALE": None,
                 "UDT_NAME": None,
+                "ATTNDIMS": 0,
             },
             FloatType(bits=32),
         ),
@@ -85,6 +90,7 @@
                 "NUMERIC_PRECISION": None,
                 "NUMERIC_SCALE": None,
                 "UDT_NAME": None,
+                "ATTNDIMS": 0,
             },
             BoolType(),
         ),
@@ -97,6 +103,7 @@
                 "NUMERIC_PRECISION": None,
                 "NUMERIC_SCALE": None,
                 "UDT_NAME": None,
+                "ATTNDIMS": 0,
             },
             StringType(bytes_=65536, variable=True),
         ),
@@ -109,6 +116,7 @@
                 "NUMERIC_PRECISION": None,
                 "NUMERIC_SCALE": None,
                 "UDT_NAME": None,
+                "ATTNDIMS": 0,
             },
             StringType(bytes_=255, variable=True),
         ),
@@ -121,6 +129,7 @@
                 "NUMERIC_PRECISION": None,
                 "NUMERIC_SCALE": None,
                 "UDT_NAME": None,
+                "ATTNDIMS": 0,
             },
             StringType(bytes_=255, variable=False),
         ),
@@ -133,6 +142,7 @@
                 "NUMERIC_PRECISION": None,
                 "NUMERIC_SCALE": None,
                 "UDT_NAME": None,
+                "ATTNDIMS": 0,
             },
             BytesType(bytes_=MAX_FIELD_SIZE),
         ),
@@ -145,6 +155,7 @@
                 "NUMERIC_PRECISION": None,
                 "NUMERIC_SCALE": None,
                 "UDT_NAME": None,
+                "ATTNDIMS": 0,
             },
             BytesType(bytes_=1, variable=False),
         ),
@@ -157,6 +168,7 @@
                 "NUMERIC_PRECISION": None,
                 "NUMERIC_SCALE": None,
                 "UDT_NAME": None,
+                "ATTNDIMS": 0,
             },
             BytesType(bytes_=3, variable=False),
         ),
@@ -170,6 +182,7 @@
                 "NUMERIC_SCALE": None,
                 "UDT_NAME": None,
                 "DATETIME_PRECISION": 3,
+                "ATTNDIMS": 0,
             },
             IntType(bits=64, logical="build.recap.Timestamp", unit="millisecond"),
         ),
@@ -183,6 +196,7 @@
                 "NUMERIC_SCALE": None,
                 "UDT_NAME": None,
                 "DATETIME_PRECISION": 3,
+                "ATTNDIMS": 0,
             },
             IntType(bits=64, logical="build.recap.Timestamp", unit="millisecond"),
         ),
@@ -195,6 +209,7 @@
                 "NUMERIC_PRECISION": 10,
                 "NUMERIC_SCALE": 2,
                 "UDT_NAME": None,
+                "ATTNDIMS": 0,
             },
             BytesType(
                 logical="build.recap.Decimal",
@@ -213,6 +228,7 @@
                 "NUMERIC_PRECISION": 5,
                 "NUMERIC_SCALE": 0,
                 "UDT_NAME": None,
+                "ATTNDIMS": 0,
             },
             BytesType(
                 logical="build.recap.Decimal",
@@ -239,16 +255,13 @@ def test_postgresql_converter_array():
         "NUMERIC_PRECISION": 5,
         "NUMERIC_SCALE": 0,
         "UDT_NAME": "_int4",
+        "ATTNDIMS": 1,
     }
     expected = ListType(
-        alias="_root.test_column",
         values=UnionType(
             types=[
+                NullType(),
                 IntType(bits=32, signed=True),
-                ProxyType(
-                    alias="_root.test_column",
-                    registry=converter.registry,
-                ),
             ],
         ),
     )