Skip to content

Commit

Permalink
fix(urls): standardize pass-through of parsed query parameters (#9482)
Browse files Browse the repository at this point in the history
This PR cleans up our URL handling a bit.

1. We were doing a bunch of unnecessary url parse -> url unparse, only
to
immediately again parse the URL as the first call in every `_from_url`
   implementation.
1. Each backend was separately handling converting query parameters to
single
   scalars where possible, due to the unparsing
1. Query parameters were sometimes passed into `kwargs` and sometimes
parsed
   again to then be used as kwargs later


So, I did the following:

1. Pass the `ParseResult` into the `_from_url` implementation.
1. Convert query parameters into single values where possible _before_
calling `backend._from_url`
1. Pass all query parameters as `**kwargs` into the `_from_url` call.

Fixes #9456.
  • Loading branch information
cpcloud authored Jul 1, 2024
1 parent 8b9f26e commit 87cba01
Show file tree
Hide file tree
Showing 15 changed files with 71 additions and 165 deletions.
42 changes: 11 additions & 31 deletions ibis/backends/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import urllib.parse
from pathlib import Path
from typing import TYPE_CHECKING, Any, ClassVar
from urllib.parse import parse_qs, urlparse

import ibis
import ibis.common.exceptions as exc
Expand All @@ -21,6 +20,7 @@

if TYPE_CHECKING:
from collections.abc import Iterable, Iterator, Mapping, MutableMapping
from urllib.parse import ParseResult

import pandas as pd
import polars as pl
Expand Down Expand Up @@ -1352,6 +1352,11 @@ def connect(resource: Path | str, **kwargs: Any) -> BaseBackend:
orig_kwargs = kwargs.copy()
kwargs = dict(urllib.parse.parse_qsl(parsed.query))

# convert single parameter lists value to single values
for name, value in kwargs.items():
if len(value) == 1:
kwargs[name] = value[0]

if scheme == "file":
path = parsed.netloc + parsed.path
# Merge explicit kwargs with query string, explicit kwargs
Expand All @@ -1369,35 +1374,21 @@ def connect(resource: Path | str, **kwargs: Any) -> BaseBackend:
else:
raise ValueError(f"Don't know how to connect to {resource!r}")

if kwargs:
# If there are kwargs (either explicit or from the query string),
# re-add them to the parsed URL
query = urllib.parse.urlencode(kwargs)
parsed = parsed._replace(query=query)

if scheme in ("postgres", "postgresql"):
# Treat `postgres://` and `postgresql://` the same
scheme = "postgres"

# Convert all arguments back to a single URL string
url = parsed.geturl()
if "://" not in url:
# urllib may roundtrip `duckdb://` to `duckdb:`. Here we re-add the
# missing `//`.
url = url.replace(":", "://", 1)
# Treat `postgres://` and `postgresql://` the same
scheme = scheme.replace("postgresql", "postgres")

try:
backend = getattr(ibis, scheme)
except AttributeError:
raise ValueError(f"Don't know how to connect to {resource!r}") from None

return backend._from_url(url, **orig_kwargs)
return backend._from_url(parsed, **kwargs)


class UrlFromPath:
__slots__ = ()

def _from_url(self, url: str, **kwargs) -> BaseBackend:
def _from_url(self, url: ParseResult, **kwargs: Any) -> BaseBackend:
"""Connect to a backend using a URL `url`.
Parameters
Expand All @@ -1413,7 +1404,6 @@ def _from_url(self, url: str, **kwargs) -> BaseBackend:
A backend instance
"""
url = urlparse(url)
netloc = url.netloc
parts = list(filter(None, (netloc, url.path[bool(netloc) :])))
database = Path(*parts) if parts and parts != [":memory:"] else ":memory:"
Expand All @@ -1424,16 +1414,6 @@ def _from_url(self, url: str, **kwargs) -> BaseBackend:
elif isinstance(database, Path):
database = database.absolute()

query_params = parse_qs(url.query)

for name, value in query_params.items():
if len(value) > 1:
kwargs[name] = value
elif len(value) == 1:
kwargs[name] = value[0]
else:
raise exc.IbisError(f"Invalid URL parameter: {name}")

self._convert_kwargs(kwargs)
return self.connect(database=database, **kwargs)

Expand All @@ -1443,7 +1423,7 @@ class NoUrl:

name: str

def _from_url(self, url: str, **kwargs) -> BaseBackend:
def _from_url(self, url: ParseResult, **kwargs) -> BaseBackend:
"""Connect to the backend with empty url.
Parameters
Expand Down
10 changes: 4 additions & 6 deletions ibis/backends/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import os
import re
from typing import TYPE_CHECKING, Any, Optional
from urllib.parse import parse_qs, urlparse

import google.api_core.exceptions
import google.auth.credentials
Expand Down Expand Up @@ -41,6 +40,7 @@
if TYPE_CHECKING:
from collections.abc import Iterable, Mapping
from pathlib import Path
from urllib.parse import ParseResult

import pandas as pd
import polars as pl
Expand Down Expand Up @@ -332,12 +332,10 @@ def read_json(
)
return self._read_file(path, table_name=table_name, job_config=job_config)

def _from_url(self, url: str, **kwargs):
result = urlparse(url)
params = parse_qs(result.query)
def _from_url(self, url: ParseResult, **kwargs):
return self.connect(
project_id=result.netloc or params.get("project_id", [""])[0],
dataset_id=result.path[1:] or params.get("dataset_id", [""])[0],
project_id=url.netloc or kwargs.get("project_id", [""])[0],
dataset_id=url.path[1:] or kwargs.get("dataset_id", [""])[0],
**kwargs,
)

Expand Down
8 changes: 8 additions & 0 deletions ibis/backends/bigquery/tests/system/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import collections
import datetime
import decimal
from urllib.parse import urlparse

import pandas as pd
import pandas.testing as tm
Expand Down Expand Up @@ -428,3 +429,10 @@ def test_table_suffix():
expr = t.filter(t._TABLE_SUFFIX == "1929", t.max != 9999.9).head(1)
result = expr.execute()
assert not result.empty


def test_parameters_in_url_connect(mocker):
spy = mocker.spy(ibis.bigquery, "_from_url")
parsed = urlparse("bigquery://ibis-gbq?location=us-east1")
ibis.connect("bigquery://ibis-gbq?location=us-east1")
spy.assert_called_once_with(parsed, location="us-east1")
16 changes: 4 additions & 12 deletions ibis/backends/clickhouse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from contextlib import closing
from functools import partial
from typing import TYPE_CHECKING, Any, Literal
from urllib.parse import parse_qs, unquote_plus, urlparse
from urllib.parse import unquote_plus

import clickhouse_connect as cc
import pyarrow as pa
Expand All @@ -32,6 +32,7 @@
if TYPE_CHECKING:
from collections.abc import Iterable, Iterator, Mapping
from pathlib import Path
from urllib.parse import ParseResult

import pandas as pd
import polars as pl
Expand Down Expand Up @@ -60,7 +61,7 @@ class Options(ibis.config.Config):

bool_type: Literal["Bool", "UInt8", "Int8"] = "Bool"

def _from_url(self, url: str, **kwargs) -> BaseBackend:
def _from_url(self, url: ParseResult, **kwargs) -> BaseBackend:
"""Connect to a backend using a URL `url`.
Parameters
Expand All @@ -76,25 +77,16 @@ def _from_url(self, url: str, **kwargs) -> BaseBackend:
A backend instance
"""
url = urlparse(url)
database = url.path[1:]
query_params = parse_qs(url.query)

connect_args = {
"user": url.username,
"password": unquote_plus(url.password or ""),
"host": url.hostname,
"database": database or "",
**kwargs,
}

for name, value in query_params.items():
if len(value) > 1:
connect_args[name] = value
elif len(value) == 1:
connect_args[name] = value[0]
else:
raise com.IbisError(f"Invalid URL parameter: {name}")

kwargs.update(connect_args)
self._convert_kwargs(kwargs)

Expand Down
15 changes: 7 additions & 8 deletions ibis/backends/clickhouse/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,11 +352,10 @@ def test_create_table_no_syntax_error(con):
def test_password_with_bracket():
password = f'{os.environ.get("IBIS_TEST_CLICKHOUSE_PASSWORD", "")}['
quoted_pass = quote_plus(password)
with pytest.raises(cc.driver.exceptions.DatabaseError) as e:
ibis.clickhouse.connect(
host=os.environ.get("IBIS_TEST_CLICKHOUSE_HOST", "localhost"),
user=os.environ.get("IBIS_TEST_CLICKHOUSE_USER", "default"),
port=int(os.environ.get("IBIS_TEST_CLICKHOUSE_PORT", 8123)),
password=quoted_pass,
)
assert "password is incorrect" in str(e.value)
host = os.environ.get("IBIS_TEST_CLICKHOUSE_HOST", "localhost")
user = os.environ.get("IBIS_TEST_CLICKHOUSE_USER", "default")
port = int(os.environ.get("IBIS_TEST_CLICKHOUSE_PORT", 8123))
with pytest.raises(
cc.driver.exceptions.DatabaseError, match="password is incorrect"
):
ibis.clickhouse.connect(host=host, user=user, port=port, password=quoted_pass)
20 changes: 5 additions & 15 deletions ibis/backends/druid/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,11 @@
import contextlib
import json
from typing import TYPE_CHECKING, Any
from urllib.parse import parse_qs, unquote_plus, urlparse
from urllib.parse import unquote_plus

import pydruid.db
import sqlglot as sg

import ibis.common.exceptions as com
import ibis.expr.datatypes as dt
import ibis.expr.schema as sch
from ibis.backends.druid.compiler import DruidCompiler
Expand All @@ -20,6 +19,7 @@

if TYPE_CHECKING:
from collections.abc import Iterable, Mapping
from urllib.parse import ParseResult

import pandas as pd
import pyarrow as pa
Expand All @@ -40,7 +40,7 @@ def version(self) -> str:
[(version,)] = result.fetchall()
return version

def _from_url(self, url: str, **kwargs):
def _from_url(self, url: ParseResult, **kwargs):
"""Connect to a backend using a URL `url`.
Parameters
Expand All @@ -56,9 +56,6 @@ def _from_url(self, url: str, **kwargs):
A backend instance
"""

url = urlparse(url)
query_params = parse_qs(url.query)
kwargs = {
"user": url.username,
"password": unquote_plus(url.password)
Expand All @@ -67,15 +64,8 @@ def _from_url(self, url: str, **kwargs):
"host": url.hostname,
"path": url.path,
"port": url.port,
} | kwargs

for name, value in query_params.items():
if len(value) > 1:
kwargs[name] = value
elif len(value) == 1:
kwargs[name] = value[0]
else:
raise com.IbisError(f"Invalid URL parameter: {name}")
**kwargs,
}

self._convert_kwargs(kwargs)

Expand Down
18 changes: 5 additions & 13 deletions ibis/backends/exasol/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import datetime
import re
from typing import TYPE_CHECKING, Any
from urllib.parse import parse_qs, unquote_plus, urlparse
from urllib.parse import unquote_plus

import pyexasol
import sqlglot as sg
Expand All @@ -25,6 +25,7 @@

if TYPE_CHECKING:
from collections.abc import Iterable, Mapping
from urllib.parse import ParseResult

import pandas as pd
import polars as pl
Expand Down Expand Up @@ -99,10 +100,8 @@ def do_connect(
with self.begin() as con:
con.execute(f"ALTER SESSION SET TIME_ZONE = {timezone!r}")

def _from_url(self, url: str, **kwargs) -> BaseBackend:
def _from_url(self, url: ParseResult, **kwargs) -> BaseBackend:
"""Construct an ibis backend from a URL."""
url = urlparse(url)
query_params = parse_qs(url.query)
kwargs = {
"user": url.username,
"password": unquote_plus(url.password)
Expand All @@ -111,15 +110,8 @@ def _from_url(self, url: str, **kwargs) -> BaseBackend:
"schema": url.path[1:] or None,
"host": url.hostname,
"port": url.port,
} | kwargs

for name, value in query_params.items():
if len(value) > 1:
kwargs[name] = value
elif len(value) == 1:
kwargs[name] = value[0]
else:
raise com.IbisError(f"Invalid URL parameter: {name}")
**kwargs,
}

self._convert_kwargs(kwargs)

Expand Down
16 changes: 2 additions & 14 deletions ibis/backends/impala/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import os
from functools import cached_property
from typing import TYPE_CHECKING, Any, Literal
from urllib.parse import parse_qs, urlparse

import impala.dbapi as impyla
import sqlglot as sg
Expand Down Expand Up @@ -44,6 +43,7 @@
if TYPE_CHECKING:
from collections.abc import Mapping
from pathlib import Path
from urllib.parse import ParseResult

import pandas as pd
import polars as pl
Expand All @@ -67,7 +67,7 @@ class Backend(SQLBackend):

supports_in_memory_tables = True

def _from_url(self, url: str, **kwargs: Any) -> Backend:
def _from_url(self, url: ParseResult, **kwargs: Any) -> Backend:
"""Connect to a backend using a URL `url`.
Parameters
Expand All @@ -83,8 +83,6 @@ def _from_url(self, url: str, **kwargs: Any) -> Backend:
A backend instance
"""
url = urlparse(url)

for name in ("username", "hostname", "port", "password"):
if value := (
getattr(url, name, None)
Expand All @@ -99,16 +97,6 @@ def _from_url(self, url: str, **kwargs: Any) -> Backend:
if database:
kwargs["database"] = database

query_params = parse_qs(url.query)

for name, value in query_params.items():
if len(value) > 1:
kwargs[name] = value
elif len(value) == 1:
kwargs[name] = value[0]
else:
raise com.IbisError(f"Invalid URL parameter: {name}")

self._convert_kwargs(kwargs)
return self.connect(**kwargs)

Expand Down
Loading

0 comments on commit 87cba01

Please sign in to comment.