Skip to content

Commit

Permalink
add webcrawler mime types
Browse files Browse the repository at this point in the history
  • Loading branch information
aubin-tchoi committed Jan 7, 2025
1 parent 6309cab commit cab7aaf
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 2 deletions.
3 changes: 2 additions & 1 deletion connectors/src/connectors/webcrawler/temporal/activities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import {
stripNullBytes,
WEBCRAWLER_MAX_DEPTH,
WEBCRAWLER_MAX_PAGES,
WEBCRAWLER_MIME_TYPES,
} from "@dust-tt/types";
import { validateUrl } from "@dust-tt/types/src/shared/utils/url_utils";
import { Context } from "@temporalio/activity";
Expand Down Expand Up @@ -291,7 +292,7 @@ export async function crawlWebsiteByConnectorId(connectorId: ModelId) {
parents,
parentId: parents[1] || null,
title: folder,
mimeType: "application/vnd.dust.webcrawler.folder",
mimeType: WEBCRAWLER_MIME_TYPES.FOLDER,
});

createdFolders.add(folder);
Expand Down
11 changes: 10 additions & 1 deletion types/src/shared/internal_mime_types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,14 @@ export const SNOWFLAKE_MIME_TYPES = {
export type SnowflakeMimeType =
(typeof SNOWFLAKE_MIME_TYPES)[keyof typeof SNOWFLAKE_MIME_TYPES];

export const WEBCRAWLER_MIME_TYPES = {
FOLDER: "application/vnd.dust.webcrawler.folder",
// pages are upserted as text/html, not an internal mime type
};

export type WebcrawlerMimeType =
(typeof WEBCRAWLER_MIME_TYPES)[keyof typeof WEBCRAWLER_MIME_TYPES];

export type DustMimeType =
| ConfluenceMimeType
| GithubMimeType
Expand All @@ -78,4 +86,5 @@ export type DustMimeType =
| MicrosoftMimeType
| NotionMimeType
| SlackMimeType
| SnowflakeMimeType;
| SnowflakeMimeType
| WebcrawlerMimeType;

0 comments on commit cab7aaf

Please sign in to comment.