From 353e71d61f86557e79aea0e07067b4bd6b7cc5ee Mon Sep 17 00:00:00 2001 From: Faisal Dosani Date: Thu, 9 Jan 2025 14:17:50 -0500 Subject: [PATCH] house cleaning. Fixes #356 --- CONTRIBUTORS | 4 +++- README.md | 18 +++++++++++++----- ROADMAP.rst | 10 ++-------- docs/source/pandas_usage.rst | 4 ++-- pyproject.toml | 6 ++++-- 5 files changed, 24 insertions(+), 18 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index e59e6454..1ec09cae 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -4,4 +4,6 @@ - Mark Zhou - Ian Whitestone - Faisal Dosani -- Lorenzo Mercado \ No newline at end of file +- Lorenzo Mercado +- Jacob Dawang +- Raymond Haffar diff --git a/README.md b/README.md index 7ac5f43b..0796f696 100644 --- a/README.md +++ b/README.md @@ -7,11 +7,19 @@ ![PyPI - Downloads](https://img.shields.io/pypi/dm/datacompy) -DataComPy is a package to compare two Pandas DataFrames. Originally started to -be something of a replacement for SAS's ``PROC COMPARE`` for Pandas DataFrames -with some more functionality than just ``Pandas.DataFrame.equals(Pandas.DataFrame)`` -(in that it prints out some stats, and lets you tweak how accurate matches have to be). -Then extended to carry that functionality over to Spark Dataframes. +DataComPy is a package to compare two DataFrames (or tables) such as Pandas, Spark, Polars, and +even Snowflake. Originally it was created to be something of a replacement +for SAS's ``PROC COMPARE`` for Pandas DataFrames with some more functionality than +just ``Pandas.DataFrame.equals(Pandas.DataFrame)`` (in that it prints out some stats, +and lets you tweak how accurate matches have to be). Supported types include: + +- Pandas +- Polars +- Spark +- Snowflake (via snowpark) +- Dask (via Fugue) +- DuckDB (via Fugue) + ## Quick Installation diff --git a/ROADMAP.rst b/ROADMAP.rst index 9e9d1209..7e05ef00 100644 --- a/ROADMAP.rst +++ b/ROADMAP.rst @@ -2,13 +2,7 @@ datacompy Roadmap ----------------- At this current time ``datacompy`` is in a stable state. We are planning on continuing to -add features and functionality as the community of users asks for them, but there are no +add features and functionality as the community of users asks for them, but there are no pressing issues which we are looking to add in immediately. -There are some longer term issues which are open for people to work on, and some which are more of a nice to have. -We are looking for contributors and also maintaners to help with the project. - -- Add in docs how to change the number of mismatches in report `#6 `_ -- Make duplicate handling better `#7 `_ -- Refactor Spark datacompy `#13 `_ -- Drop Python 3.7 suport `#173 `_ +Please feel free to check the issues section of the repository for the most up to date list. diff --git a/docs/source/pandas_usage.rst b/docs/source/pandas_usage.rst index 8c1e62f9..a64396c7 100644 --- a/docs/source/pandas_usage.rst +++ b/docs/source/pandas_usage.rst @@ -5,7 +5,7 @@ Overview -------- The main goal of ``datacompy`` is to provide a human-readable output describing -differences between two dataframes. For example, if you have two dataframes +differences between two dataframes. For example, if you have two dataframes containing data like: df1 @@ -289,4 +289,4 @@ There's a number of limitations with ``datacompy``: #Numpy testing npt.assert_array_equal(arr1, arr2) - npt.assert_almost_equal(obj1, obj2) \ No newline at end of file + npt.assert_almost_equal(obj1, obj2) diff --git a/pyproject.toml b/pyproject.toml index 51895cdc..1333c4c8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,12 +3,14 @@ name = "datacompy" description = "Dataframe comparison in Python" readme = "README.md" authors = [ + { name="Faisal Dosani", email="faisal.dosani@capitalone.com" }, { name="Ian Robertson" }, { name="Dan Coates" }, - { name="Faisal Dosani", email="faisal.dosani@capitalone.com" }, ] maintainers = [ - { name="Faisal Dosani", email="faisal.dosani@capitalone.com" } + { name="Faisal Dosani", email="faisal.dosani@capitalone.com" }, + { name="Jacob Dawang", email="jacob.dawang@capitalone.com" }, + { name="Raymond Haffar", email="raymond.haffar@capitalone.com" }, ] license = {text = "Apache Software License"} dependencies = ["pandas<=2.2.3,>=0.25.0", "numpy<=2.2.0,>=1.22.0", "ordered-set<=4.1.0,>=4.0.2", "polars[pandas]<=1.17.1,>=0.20.4"]