From 19777e72e5063950ce376551c02010b46d8637c1 Mon Sep 17 00:00:00 2001 From: Ayush Dattagupta Date: Tue, 21 May 2024 10:43:55 -0700 Subject: [PATCH] Export libcudf env for long string support Signed-off-by: Ayush Dattagupta --- nemo_curator/__init__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/nemo_curator/__init__.py b/nemo_curator/__init__.py index 80af4d698..650a8c511 100644 --- a/nemo_curator/__init__.py +++ b/nemo_curator/__init__.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os import sys import dask @@ -48,3 +49,6 @@ # See https://github.com/NVIDIA/NeMo-Curator/issues/33 # This also happens when reading and writing to files dask.config.set({"dataframe.convert-string": False}) + +# Enable libcudf large string support +os.environ["LIBCUDF_LARGE_STRINGS_ENABLED"] = "1"