From cab7aaf75afe0615e95df4c9b5cb7186d9704b06 Mon Sep 17 00:00:00 2001 From: Aubin Date: Tue, 7 Jan 2025 16:38:59 +0100 Subject: [PATCH] add webcrawler mime types --- .../src/connectors/webcrawler/temporal/activities.ts | 3 ++- types/src/shared/internal_mime_types.ts | 11 ++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/connectors/src/connectors/webcrawler/temporal/activities.ts b/connectors/src/connectors/webcrawler/temporal/activities.ts index 49e208c376c1f..70a441d0aec79 100644 --- a/connectors/src/connectors/webcrawler/temporal/activities.ts +++ b/connectors/src/connectors/webcrawler/temporal/activities.ts @@ -3,6 +3,7 @@ import { stripNullBytes, WEBCRAWLER_MAX_DEPTH, WEBCRAWLER_MAX_PAGES, + WEBCRAWLER_MIME_TYPES, } from "@dust-tt/types"; import { validateUrl } from "@dust-tt/types/src/shared/utils/url_utils"; import { Context } from "@temporalio/activity"; @@ -291,7 +292,7 @@ export async function crawlWebsiteByConnectorId(connectorId: ModelId) { parents, parentId: parents[1] || null, title: folder, - mimeType: "application/vnd.dust.webcrawler.folder", + mimeType: WEBCRAWLER_MIME_TYPES.FOLDER, }); createdFolders.add(folder); diff --git a/types/src/shared/internal_mime_types.ts b/types/src/shared/internal_mime_types.ts index 4e63326135760..4677383678ff6 100644 --- a/types/src/shared/internal_mime_types.ts +++ b/types/src/shared/internal_mime_types.ts @@ -70,6 +70,14 @@ export const SNOWFLAKE_MIME_TYPES = { export type SnowflakeMimeType = (typeof SNOWFLAKE_MIME_TYPES)[keyof typeof SNOWFLAKE_MIME_TYPES]; +export const WEBCRAWLER_MIME_TYPES = { + FOLDER: "application/vnd.dust.webcrawler.folder", + // pages are upserted as text/html, not an internal mime type +}; + +export type WebcrawlerMimeType = + (typeof WEBCRAWLER_MIME_TYPES)[keyof typeof WEBCRAWLER_MIME_TYPES]; + export type DustMimeType = | ConfluenceMimeType | GithubMimeType @@ -78,4 +86,5 @@ export type DustMimeType = | MicrosoftMimeType | NotionMimeType | SlackMimeType - | SnowflakeMimeType; + | SnowflakeMimeType + | WebcrawlerMimeType;