Skip to content

Commit

Permalink
0.0.162
Browse files Browse the repository at this point in the history
  • Loading branch information
joocer committed Jun 22, 2024
1 parent fab84bf commit 0f7f070
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 19 deletions.
22 changes: 8 additions & 14 deletions orso/compute/compiled.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,21 @@
# limitations under the License.

from cpython.bytes cimport PyBytes_AsString, PyBytes_GET_SIZE
from cpython.object cimport PyObject_Str
from cython cimport int
from datetime import datetime
from ormsgpack import unpackb
from orso.exceptions import DataError
from typing import Dict, Any, Tuple
from libc.stdlib cimport malloc, free

import numpy as np
cimport cython
cimport numpy as cnp
from numpy cimport ndarray

cnp.import_array()



HEADER_PREFIX = b"\x10\x00"
MAXIMUM_RECORD_SIZE = 8 * 1024 * 1024
Expand Down Expand Up @@ -78,13 +85,6 @@ cpdef tuple extract_dict_columns(dict data, tuple fields):
return tuple(sorted_data) # Convert list to tuple




from libc.stdlib cimport malloc, free
import numpy as np
cimport cython
cimport numpy as cnp

@cython.boundscheck(False)
@cython.wraparound(False)
def collect_cython(list rows, cnp.ndarray[cnp.int32_t, ndim=1] columns, int limit=-1, int single=False) -> list:
Expand All @@ -106,8 +106,6 @@ def collect_cython(list rows, cnp.ndarray[cnp.int32_t, ndim=1] columns, int limi

return result[0] if single else result

from cpython.object cimport PyObject_Str

@cython.boundscheck(False)
@cython.wraparound(False)
cpdef int calculate_data_width(list column_values):
Expand All @@ -124,10 +122,6 @@ cpdef int calculate_data_width(list column_values):
return max_width



import numpy as np
from numpy cimport ndarray

def process_table(table, row_factory, int max_chunksize):
cdef list[] batches = table.to_batches(max_chunksize)
cdef list rows = []
Expand Down
3 changes: 2 additions & 1 deletion orso/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ def __init__(self, columns):
self.columns = columns
message = (
f"Data did not pass validation checks; "
f"Additional fields were present in the record - " + ", ".join(columns)
f"Additional fields, not defined in the schema, were present in the record - "
+ ", ".join(columns)
)
super().__init__(message)

Expand Down
13 changes: 12 additions & 1 deletion orso/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ class FlatColumn:
"""

name: str
default: Optional[Any] = None
type: OrsoTypes = OrsoTypes._MISSING_TYPE
description: Optional[str] = None
disposition: Optional[ColumnDisposition] = None
Expand Down Expand Up @@ -197,6 +198,15 @@ def __init__(self, **kwargs):
if self.type == OrsoTypes.DECIMAL and self.scale is None:
self.scale = int(0.75 * self.precision)

# if we have a default value, parse it to the correct type and fail if we can't
if self.default:
try:
self.default = self.type.parse(self.default)
except Exception:
raise ValueError(
f"Column '{self.name}' default value not compatible with '{self.type}'."
)

def __str__(self):
return self.identity

Expand Down Expand Up @@ -245,6 +255,7 @@ def to_flatcolumn(self) -> "FlatColumn":
"""
return FlatColumn(
name=str(self.name),
default=self.default,
description=self.description,
aliases=self.aliases,
identity=self.identity,
Expand Down Expand Up @@ -632,7 +643,7 @@ def validate(self, data: MutableMapping) -> bool:

for column in self.columns:
if column.name not in data:
errors["Column Missing"].append(column.name)
errors["Column in Schema Not Found in Record"].append(column.name)

else:
value = data[column.name]
Expand Down
2 changes: 1 addition & 1 deletion orso/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@
# See the License for the specific language governing permissions and
# limitations under the License.

__version__: str = "0.0.161"
__version__: str = "0.0.162"
__author__: str = "@joocer"
15 changes: 13 additions & 2 deletions tests/test_schema_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,16 @@ def test_columns_with_unknown_parameters():
FlatColumn(name="athena", type=OrsoTypes.INTEGER, alpha="betty")
FunctionColumn(name="aries", type=OrsoTypes.DATE, binding=datetime.date.today, sketty="yum")

def test_column_with_valid_default():
col = FlatColumn(name="valid", type=OrsoTypes.INTEGER, default="1")
assert col.default == 1

col = FlatColumn(name="valid", type=OrsoTypes.INTEGER, default=1)
assert col.default == 1

def test_column_with_invalid_default():
with pytest.raises(ValueError):
FlatColumn(name="invalid", type=OrsoTypes.INTEGER, default="green")

def test_flat_column_from_arrow():
field_name = "test_field"
Expand Down Expand Up @@ -128,11 +138,13 @@ def test_column_type_mapping():
FlatColumn(name="able", type="LEFT")


def test_missing_columns():
def test_missing_column_missing_name():
with pytest.raises(ColumnDefinitionError):
FlatColumn()




def test_type_checks():
from decimal import Decimal
from orso.schema import RelationSchema
Expand Down Expand Up @@ -410,5 +422,4 @@ def test_arrow_conversion():
if __name__ == "__main__": # prgama: nocover
from tests import run_tests

test_arrow_conversion()
run_tests()

0 comments on commit 0f7f070

Please sign in to comment.