From bd5cab46eff879638076d4313de892c7bffd5569 Mon Sep 17 00:00:00 2001 From: Oleksii Vykaliuk Date: Fri, 20 Dec 2024 11:41:18 +0100 Subject: [PATCH 1/5] feat(cli): Add support for geoparquet reading --- Cargo.toml | 1 + geozero-cli/Cargo.toml | 2 ++ geozero-cli/src/main.rs | 33 ++++++++++++++++++++++++++++++++- 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index cdbe2665..d4346848 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,6 +35,7 @@ gdal = { version = "0.16", default-features = false } gdal-sys = "0.9" geo = "0.26.0" geo-types = { version = "0.7.11", default-features = false } +geoarrow = { version = "0.3.0" } geojson = { version = "0.24.1", default-features = false } geos = "9.0" gpx = { version = "0.9", default-features = false } diff --git a/geozero-cli/Cargo.toml b/geozero-cli/Cargo.toml index f585ec60..509f651c 100644 --- a/geozero-cli/Cargo.toml +++ b/geozero-cli/Cargo.toml @@ -21,3 +21,5 @@ env_logger.workspace = true flatgeobuf.workspace = true geozero = { workspace = true, default-features = true, features = ["with-csv"] } tokio = { workspace = true, default-features = true, features = ["full"] } +geo.workspace = true +geoarrow = { version = "0.4.0-beta.2", features = ["parquet_compression"] } diff --git a/geozero-cli/src/main.rs b/geozero-cli/src/main.rs index 130fa02c..f7d64e6a 100644 --- a/geozero-cli/src/main.rs +++ b/geozero-cli/src/main.rs @@ -1,5 +1,8 @@ use clap::Parser; use flatgeobuf::{FgbReader, FgbWriter, GeometryType, HttpFgbReader}; +use geo::Rect; +use geoarrow::io::parquet::{GeoParquetReaderOptions, GeoParquetRecordBatchReaderBuilder}; +use geoarrow::io::RecordBatchReader; use geozero::csv::{CsvReader, CsvWriter}; use geozero::error::{GeozeroError, Result}; use geozero::geojson::{GeoJsonLineReader, GeoJsonReader, GeoJsonWriter}; @@ -24,7 +27,7 @@ struct Cli { #[arg(short, long, value_parser = parse_extent)] extent: Option, - /// The path or URL to the FlatGeobuf file to read + /// The path or URL to the input file to read input: String, /// The path to the file to write @@ -88,6 +91,26 @@ async fn transform(args: Cli, processor: &mut P) -> Result< Some("jsonl") | Some("geojsonl") => { GeozeroDatasource::process(&mut GeoJsonLineReader::new(filein), processor) } + Some("parquet") | Some("geoparquet") => { + let mut geo_options = GeoParquetReaderOptions::default(); + if let Some(bbox) = &args.extent { + geo_options = geo_options.with_bbox( + Rect::new((bbox.minx, bbox.miny), (bbox.maxx, bbox.maxy)), + None, + ); + } + let reader = GeoParquetRecordBatchReaderBuilder::try_new_with_options( + File::open(path_in)?, + Default::default(), + geo_options, + ) + .map_err(arrow_to_geozero_err)? + .build() + .map_err(arrow_to_geozero_err)?; + + let mut wrapper = RecordBatchReader::new(Box::new(reader)); + wrapper.process(processor) + } Some("fgb") => { let ds = FgbReader::open(&mut filein).map_err(fgb_to_geozero_err)?; let mut ds = if let Some(bbox) = &args.extent { @@ -127,6 +150,7 @@ async fn process(args: Cli) -> Result<()> { } Ok(()) } + fn set_dimensions(processor: &mut SvgWriter<&mut BufWriter>, extent: Option) { if let Some(extent) = extent { processor.set_dimensions(extent.minx, extent.miny, extent.maxx, extent.maxy, 800, 600); @@ -136,6 +160,13 @@ fn set_dimensions(processor: &mut SvgWriter<&mut BufWriter>, extent: Optio } } +fn arrow_to_geozero_err(parquet_err: geoarrow::error::GeoArrowError) -> GeozeroError { + match parquet_err { + geoarrow::error::GeoArrowError::IOError(e) => GeozeroError::IoError(e), + err => GeozeroError::Dataset(format!("Unknown GeoArrow error: {err:?}")), + } +} + fn fgb_to_geozero_err(fgb_err: flatgeobuf::Error) -> GeozeroError { match fgb_err { flatgeobuf::Error::MissingMagicBytes => { From c4fedff0c98a053bafcfb5685bf62840ad8cb278 Mon Sep 17 00:00:00 2001 From: Oleksii Vykaliuk Date: Fri, 20 Dec 2024 18:10:43 +0100 Subject: [PATCH 2/5] fix: move geoarrow to CLI crate dep and use latest stable --- Cargo.toml | 1 - geozero-cli/Cargo.toml | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index d4346848..cdbe2665 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,7 +35,6 @@ gdal = { version = "0.16", default-features = false } gdal-sys = "0.9" geo = "0.26.0" geo-types = { version = "0.7.11", default-features = false } -geoarrow = { version = "0.3.0" } geojson = { version = "0.24.1", default-features = false } geos = "9.0" gpx = { version = "0.9", default-features = false } diff --git a/geozero-cli/Cargo.toml b/geozero-cli/Cargo.toml index 509f651c..353dba37 100644 --- a/geozero-cli/Cargo.toml +++ b/geozero-cli/Cargo.toml @@ -22,4 +22,4 @@ flatgeobuf.workspace = true geozero = { workspace = true, default-features = true, features = ["with-csv"] } tokio = { workspace = true, default-features = true, features = ["full"] } geo.workspace = true -geoarrow = { version = "0.4.0-beta.2", features = ["parquet_compression"] } +geoarrow = { version = "0.3.0", features = ["parquet_compression"] } From 7471d1a42f29149b5168bcbe8dc79963b1f58496 Mon Sep 17 00:00:00 2001 From: Oleksii Vykaliuk Date: Fri, 20 Dec 2024 18:26:12 +0100 Subject: [PATCH 3/5] fix: update geoarrow and add more help. --- geozero-cli/Cargo.toml | 2 +- geozero-cli/src/main.rs | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/geozero-cli/Cargo.toml b/geozero-cli/Cargo.toml index 353dba37..509f651c 100644 --- a/geozero-cli/Cargo.toml +++ b/geozero-cli/Cargo.toml @@ -22,4 +22,4 @@ flatgeobuf.workspace = true geozero = { workspace = true, default-features = true, features = ["with-csv"] } tokio = { workspace = true, default-features = true, features = ["full"] } geo.workspace = true -geoarrow = { version = "0.3.0", features = ["parquet_compression"] } +geoarrow = { version = "0.4.0-beta.2", features = ["parquet_compression"] } diff --git a/geozero-cli/src/main.rs b/geozero-cli/src/main.rs index f7d64e6a..ce0a1a91 100644 --- a/geozero-cli/src/main.rs +++ b/geozero-cli/src/main.rs @@ -28,9 +28,13 @@ struct Cli { extent: Option, /// The path or URL to the input file to read + /// + /// Supported formats: CSV, GeoJSON, GeoJSON Lines, FGB, WKT, Parquet input: String, /// The path to the file to write + /// + /// Supported formats: CSV, GeoJSON, GeoJSON Lines, FGB, WKT, SVG dest: PathBuf, } From 1e3b2e11f0558bbf0c8c74379059fb573cf1e00f Mon Sep 17 00:00:00 2001 From: Oleksii Vykaliuk Date: Fri, 20 Dec 2024 18:39:08 +0100 Subject: [PATCH 4/5] fix: remove long help messages --- geozero-cli/src/main.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/geozero-cli/src/main.rs b/geozero-cli/src/main.rs index ce0a1a91..f7d64e6a 100644 --- a/geozero-cli/src/main.rs +++ b/geozero-cli/src/main.rs @@ -28,13 +28,9 @@ struct Cli { extent: Option, /// The path or URL to the input file to read - /// - /// Supported formats: CSV, GeoJSON, GeoJSON Lines, FGB, WKT, Parquet input: String, /// The path to the file to write - /// - /// Supported formats: CSV, GeoJSON, GeoJSON Lines, FGB, WKT, SVG dest: PathBuf, } From 9535a6e729c54b2914163d23200b06b9cb14b3e8 Mon Sep 17 00:00:00 2001 From: Oleksii Vykaliuk Date: Fri, 20 Dec 2024 18:40:42 +0100 Subject: [PATCH 5/5] fix: wording in input param help --- geozero-cli/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/geozero-cli/src/main.rs b/geozero-cli/src/main.rs index f7d64e6a..53405b96 100644 --- a/geozero-cli/src/main.rs +++ b/geozero-cli/src/main.rs @@ -27,7 +27,7 @@ struct Cli { #[arg(short, long, value_parser = parse_extent)] extent: Option, - /// The path or URL to the input file to read + /// The path to the input file, or the URL for remote FlatGeobuf files input: String, /// The path to the file to write