Skip to content

Commit

Permalink
feat: fetch datasets from resp. dataset repo branch; enable it on vercel
Browse files Browse the repository at this point in the history
  • Loading branch information
ivan-aksamentov committed Sep 4, 2023
1 parent 92c41d1 commit 93f31a9
Show file tree
Hide file tree
Showing 9 changed files with 88 additions and 26 deletions.
4 changes: 4 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ SYNC_DESTINATION=123.456.789.123:~/nextclade
DATA_FULL_DOMAIN=https://data.master.clades.nextstrain.org/v3
# DATA_FULL_DOMAIN=http://localhost:27722

# If enabled, Nextclade Web will first attempt to fetch datasets from the corresponding GitHub branch. If this attempt
# fails, it will use `DATA_FULL_DOMAIN` as usual.
DATA_TRY_GITHUB_BRANCH=0

# Directory path (relative to the root of the project) from which local data server takes the data.
# Useful for local testing on new datasets. See: https://github.com/neherlab/nextclade_data
# It is recommended to keep the `nextclade_data` git repo in a sibling directory of `nextclade` git repo.
Expand Down
2 changes: 2 additions & 0 deletions packages_rs/nextclade-web/config/next/lib/getEnvVars.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ export function getEnvVars() {
const DOMAIN = getDomain()
const DOMAIN_STRIPPED = DOMAIN.replace('https://', '').replace('http://', '')
const DATA_FULL_DOMAIN = getenv('DATA_FULL_DOMAIN')
const DATA_TRY_GITHUB_BRANCH = getenv('DATA_TRY_GITHUB_BRANCH')

const common = {
BABEL_ENV,
Expand All @@ -20,6 +21,7 @@ export function getEnvVars() {
DOMAIN,
DOMAIN_STRIPPED,
DATA_FULL_DOMAIN,
DATA_TRY_GITHUB_BRANCH,
}

if (PRODUCTION) {
Expand Down
2 changes: 2 additions & 0 deletions packages_rs/nextclade-web/config/next/next.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ const {
DOMAIN,
DOMAIN_STRIPPED,
DATA_FULL_DOMAIN,
DATA_TRY_GITHUB_BRANCH,
} = getEnvVars()

const BRANCH_NAME = getGitBranch()
Expand All @@ -61,6 +62,7 @@ const clientEnv = {
DOMAIN,
DOMAIN_STRIPPED,
DATA_FULL_DOMAIN,
DATA_TRY_GITHUB_BRANCH,
BLOCK_SEARCH_INDEXING: DOMAIN === RELEASE_URL ? '0' : '1',
}

Expand Down
2 changes: 2 additions & 0 deletions packages_rs/nextclade-web/src/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ export const URL_GITHUB_COMMITS = 'https://github.com/nextstrain/nextclade/commi
export const URL_CLADE_SCHEMA_REPO = 'https://github.com/nextstrain/ncov-clades-schema/'
export const URL_CLADE_SCHEMA_SVG = 'https://raw.githubusercontent.com/nextstrain/ncov-clades-schema/master/clades.svg'

export const URL_GITHUB_DATA_RAW = 'https://raw.githubusercontent.com/nextstrain/nextclade_data' as const

export const SUPPORT_EMAIL = '[email protected]'

export const TWITTER_USERNAME_RAW = 'nextstrain' as const
Expand Down
11 changes: 11 additions & 0 deletions packages_rs/nextclade-web/src/io/axiosFetch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,14 @@ export async function axiosHead<TData = unknown>(

return res.data as TData
}

export async function axiosHeadOrUndefined<TData = unknown>(
url: string | undefined,
options?: AxiosRequestConfig,
): Promise<TData | undefined> {
try {
return await axiosHead<TData>(url, options)
} catch {
return undefined
}
}
71 changes: 63 additions & 8 deletions packages_rs/nextclade-web/src/io/fetchDatasets.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
/* eslint-disable prefer-destructuring */
import type { ParsedUrlQuery } from 'querystring'
import { findSimilarStrings } from 'src/helpers/string'
import { axiosHeadOrUndefined } from 'src/io/axiosFetch'
import { isGithubUrlOrShortcut, parseGitHubRepoUrlOrShortcut } from 'src/io/fetchSingleDatasetFromGithub'

import { Dataset } from 'src/types'
import {
Expand All @@ -10,9 +13,11 @@ import {
} from 'src/io/fetchDatasetsIndex'
import { getQueryParamMaybe } from 'src/io/getQueryParamMaybe'
import { useRecoilValue, useSetRecoilState } from 'recoil'
import { datasetCurrentAtom, datasetsAtom, datasetServerUrlAtom, datasetUpdatedAtom } from 'src/state/dataset.state'
import { datasetCurrentAtom, datasetsAtom, datasetUpdatedAtom } from 'src/state/dataset.state'
import { useQuery } from 'react-query'
import { isNil } from 'lodash'
import urljoin from 'url-join'
import { URL_GITHUB_DATA_RAW } from 'src/constants'

export async function getDatasetFromUrlParams(urlQuery: ParsedUrlQuery, datasets: Dataset[]) {
// Retrieve dataset-related URL params and try to find a dataset based on these params
Expand Down Expand Up @@ -41,8 +46,60 @@ export async function getDatasetFromUrlParams(urlQuery: ParsedUrlQuery, datasets
return dataset
}

export async function initializeDatasets(urlQuery: ParsedUrlQuery, datasetServerUrlDefault: string) {
const datasetServerUrl = getQueryParamMaybe(urlQuery, 'dataset-server') ?? datasetServerUrlDefault
export async function getGithubDatasetServerUrl(): Promise<string | undefined> {
const BRANCH_NAME = process.env.BRANCH_NAME
if (!BRANCH_NAME) {
return undefined
}

const githubDatasetServerUrl = urljoin(URL_GITHUB_DATA_RAW, BRANCH_NAME)
const githubIndexJsonUrl = urljoin(githubDatasetServerUrl, 'data_output', 'index.json')

if (await axiosHeadOrUndefined(githubIndexJsonUrl)) {
return githubDatasetServerUrl
}

return undefined
}

export function toAbsoluteUrl(url: string): string {
if (typeof window !== 'undefined' && url.slice(0) === '/') {
return urljoin(window.location.origin, url)
}
return url
}

export async function getDatasetServerUrl(urlQuery: ParsedUrlQuery) {
// Get dataset URL from query URL params.
let datasetServerUrl = getQueryParamMaybe(urlQuery, 'dataset-server')

// If the URL is formatted as a GitHub URL or as a GitHub URL shortcut, use it without any checking
if (datasetServerUrl && isGithubUrlOrShortcut(datasetServerUrl)) {
const { owner, repo, branch, path } = await parseGitHubRepoUrlOrShortcut(datasetServerUrl)
return urljoin('https://raw.githubusercontent.com', owner, repo, branch, path)
}

// If requested to try GitHub-hosted datasets either using `DATA_TRY_GITHUB_BRANCH` env var (e.g. from
// `.env` file), or using `&dataset-server=gh` or `&dataset-server=github` URL parameters, then check if the
// corresponding branch in the default data repo on GitHub contains an `index.json` file. And and if yes, use it.
const datasetServerTryGithubBranch =
process.env.DATA_TRY_GITHUB_BRANCH === '1' || (datasetServerUrl && ['gh', 'github'].includes(datasetServerUrl))
if (datasetServerTryGithubBranch) {
const githubDatasetServerUrl = await getGithubDatasetServerUrl()
if (githubDatasetServerUrl) {
datasetServerUrl = githubDatasetServerUrl
}
}

// If none of the above, use hardcoded default URL (from `.env` file)
datasetServerUrl = datasetServerUrl ?? process.env.DATA_FULL_DOMAIN ?? '/'

// If the URL happens to be a relative path, then convert to absolute URL (on the app's current host)
return toAbsoluteUrl(datasetServerUrl)
}

export async function initializeDatasets(urlQuery: ParsedUrlQuery) {
const datasetServerUrl = await getDatasetServerUrl(urlQuery)

const datasetsIndexJson = await fetchDatasetsIndex(datasetServerUrl)

Expand All @@ -57,11 +114,10 @@ export async function initializeDatasets(urlQuery: ParsedUrlQuery, datasetServer
/** Refetch dataset index periodically and update the local copy of if */
export function useUpdatedDatasetIndex() {
const setDatasetsState = useSetRecoilState(datasetsAtom)
const datasetServerUrl = useRecoilValue(datasetServerUrlAtom)
useQuery(
'refetchDatasetIndex',
async () => {
const { currentDataset: _, ...datasetsState } = await initializeDatasets({}, datasetServerUrl)
const { currentDataset: _, ...datasetsState } = await initializeDatasets({})
setDatasetsState(datasetsState)
},
{
Expand Down Expand Up @@ -89,10 +145,9 @@ export function useUpdatedDataset() {
'currentDatasetState',
async () => {
const path = datasetCurrent?.path
const refAccession = datasetCurrent?.attributes.reference.value
const updatedAt = datasetCurrent?.version?.updatedAt
if (!isNil(refAccession) && !isNil(updatedAt)) {
const candidateDatasets = filterDatasets(datasets, path, refAccession)
if (!isNil(updatedAt)) {
const candidateDatasets = filterDatasets(datasets, path)
const updatedDataset = candidateDatasets.find((candidate) => {
const candidateTag = candidate.version?.updatedAt
return candidateTag && candidateTag > updatedAt
Expand Down
5 changes: 2 additions & 3 deletions packages_rs/nextclade-web/src/pages/_app.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ import { SEO } from 'src/components/Common/SEO'
import { Plausible } from 'src/components/Common/Plausible'
import i18n, { changeLocale, getLocaleWithKey } from 'src/i18n/i18n'
import { theme } from 'src/theme'
import { datasetCurrentAtom, datasetsAtom, datasetServerUrlAtom } from 'src/state/dataset.state'
import { datasetCurrentAtom, datasetsAtom } from 'src/state/dataset.state'
import { ErrorBoundary } from 'src/components/Error/ErrorBoundary'
import { PreviewWarning } from 'src/components/Common/PreviewWarning'

Expand Down Expand Up @@ -99,8 +99,7 @@ export function RecoilStateInitializer() {
return datasetInfo
}

const datasetServerUrlDefault = await getPromise(datasetServerUrlAtom)
return initializeDatasets(urlQuery, datasetServerUrlDefault)
return initializeDatasets(urlQuery)
})
.catch((error) => {
// Dataset error is fatal and we want error to be handled in the ErrorBoundary
Expand Down
15 changes: 0 additions & 15 deletions packages_rs/nextclade-web/src/state/dataset.state.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import { isNil } from 'lodash'
import { atom, DefaultValue, selector } from 'recoil'
import urljoin from 'url-join'

import type { Dataset } from 'src/types'
// import { GENE_OPTION_NUC_SEQUENCE } from 'src/constants'
Expand All @@ -10,20 +9,6 @@ import { persistAtom } from 'src/state/persist/localStorage'
import { isDefaultValue } from 'src/state/utils/isDefaultValue'
import { areDatasetsEqual } from 'src/types'

export function getDefaultDatasetServer(): string {
let datasetServerUrl = process.env.DATA_FULL_DOMAIN ?? '/'
// Add HTTP Origin if datasetServerUrl is a relative path (start with '/')
if (typeof window !== 'undefined' && datasetServerUrl.slice(0) === '/') {
datasetServerUrl = urljoin(window.location.origin, datasetServerUrl)
}
return datasetServerUrl
}

export const datasetServerUrlAtom = atom<string>({
key: 'datasetServerUrl',
default: getDefaultDatasetServer(),
})

export interface Datasets {
datasets: Dataset[]
}
Expand Down
2 changes: 2 additions & 0 deletions scripts/build_on_vercel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ sed -i'' "s|PROD_ENABLE_TYPE_CHECKS=1|PROD_ENABLE_TYPE_CHECKS=0|g" .env
sed -i'' "s|PROD_ENABLE_ESLINT=1|PROD_ENABLE_ESLINT=0|g" .env
sed -i'' "s|PROD_ENABLE_STYLELINT=1|PROD_ENABLE_STYLELINT=0|g" .env
sed -i'' "s|DATA_TRY_GITHUB_BRANCH=0|DATA_TRY_GITHUB_BRANCH=1|g" .env
cd packages_rs/nextclade-web
yarn install --frozen-lockfile
Expand Down

1 comment on commit 93f31a9

@vercel
Copy link

@vercel vercel bot commented on 93f31a9 Sep 4, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

nextclade – ./

nextclade-nextstrain.vercel.app
nextclade.vercel.app
nextclade-git-master-nextstrain.vercel.app

Please sign in to comment.