diff --git a/geozero-cli/Cargo.toml b/geozero-cli/Cargo.toml index f585ec60..509f651c 100644 --- a/geozero-cli/Cargo.toml +++ b/geozero-cli/Cargo.toml @@ -21,3 +21,5 @@ env_logger.workspace = true flatgeobuf.workspace = true geozero = { workspace = true, default-features = true, features = ["with-csv"] } tokio = { workspace = true, default-features = true, features = ["full"] } +geo.workspace = true +geoarrow = { version = "0.4.0-beta.2", features = ["parquet_compression"] } diff --git a/geozero-cli/src/main.rs b/geozero-cli/src/main.rs index 130fa02c..53405b96 100644 --- a/geozero-cli/src/main.rs +++ b/geozero-cli/src/main.rs @@ -1,5 +1,8 @@ use clap::Parser; use flatgeobuf::{FgbReader, FgbWriter, GeometryType, HttpFgbReader}; +use geo::Rect; +use geoarrow::io::parquet::{GeoParquetReaderOptions, GeoParquetRecordBatchReaderBuilder}; +use geoarrow::io::RecordBatchReader; use geozero::csv::{CsvReader, CsvWriter}; use geozero::error::{GeozeroError, Result}; use geozero::geojson::{GeoJsonLineReader, GeoJsonReader, GeoJsonWriter}; @@ -24,7 +27,7 @@ struct Cli { #[arg(short, long, value_parser = parse_extent)] extent: Option, - /// The path or URL to the FlatGeobuf file to read + /// The path to the input file, or the URL for remote FlatGeobuf files input: String, /// The path to the file to write @@ -88,6 +91,26 @@ async fn transform(args: Cli, processor: &mut P) -> Result< Some("jsonl") | Some("geojsonl") => { GeozeroDatasource::process(&mut GeoJsonLineReader::new(filein), processor) } + Some("parquet") | Some("geoparquet") => { + let mut geo_options = GeoParquetReaderOptions::default(); + if let Some(bbox) = &args.extent { + geo_options = geo_options.with_bbox( + Rect::new((bbox.minx, bbox.miny), (bbox.maxx, bbox.maxy)), + None, + ); + } + let reader = GeoParquetRecordBatchReaderBuilder::try_new_with_options( + File::open(path_in)?, + Default::default(), + geo_options, + ) + .map_err(arrow_to_geozero_err)? + .build() + .map_err(arrow_to_geozero_err)?; + + let mut wrapper = RecordBatchReader::new(Box::new(reader)); + wrapper.process(processor) + } Some("fgb") => { let ds = FgbReader::open(&mut filein).map_err(fgb_to_geozero_err)?; let mut ds = if let Some(bbox) = &args.extent { @@ -127,6 +150,7 @@ async fn process(args: Cli) -> Result<()> { } Ok(()) } + fn set_dimensions(processor: &mut SvgWriter<&mut BufWriter>, extent: Option) { if let Some(extent) = extent { processor.set_dimensions(extent.minx, extent.miny, extent.maxx, extent.maxy, 800, 600); @@ -136,6 +160,13 @@ fn set_dimensions(processor: &mut SvgWriter<&mut BufWriter>, extent: Optio } } +fn arrow_to_geozero_err(parquet_err: geoarrow::error::GeoArrowError) -> GeozeroError { + match parquet_err { + geoarrow::error::GeoArrowError::IOError(e) => GeozeroError::IoError(e), + err => GeozeroError::Dataset(format!("Unknown GeoArrow error: {err:?}")), + } +} + fn fgb_to_geozero_err(fgb_err: flatgeobuf::Error) -> GeozeroError { match fgb_err { flatgeobuf::Error::MissingMagicBytes => {