From 7fde40de8a54e6bbcfc7f6490c2b86fe57f4dbb4 Mon Sep 17 00:00:00 2001 From: chenkenbio Date: Fri, 2 Jun 2023 17:01:54 +0800 Subject: [PATCH] add download.sh Former-commit-id: 342c17b0c226d40d46c4142ad915015c984678c0 --- README.md | 10 +++------- download.sh | 23 +++++++++++++++++++++++ 2 files changed, 26 insertions(+), 7 deletions(-) create mode 100644 download.sh diff --git a/README.md b/README.md index 2e44559..0a3b0df 100644 --- a/README.md +++ b/README.md @@ -22,11 +22,7 @@ See [official guide](https://huggingface.co/docs/transformers/model_doc/bert) fo **Download SpliceBERT** -- [SpliceBERT.1024nt.tar.gz](https://github.com/biomed-AI/SpliceBERT/releases/download/v0.1/SpliceBERT.1024nt.tar.gz) -- [SpliceBERT.510nt.tar.gz](https://github.com/biomed-AI/SpliceBERT/releases/download/v0.1/SpliceBERT.510nt.tar.gz) -- [SpliceBERT-human.510nt.tar.gz](https://github.com/biomed-AI/SpliceBERT/releases/download/v0.1/SpliceBERT-human.510nt.tar.gz) - - The model weights are also available at [zenodo](https://doi.org/10.5281/zenodo.7995778). +The weights of SpliceBERT can be downloaded from [zenodo](https://doi.org/10.5281/zenodo.7995778): https://zenodo.org/record/7995778/files/models.tar.gz?download=1 **System requirements** @@ -69,8 +65,8 @@ model = AutoModelForSequenceClassification.from_pretrained(SPLICEBERT_PATH, num_ ## Reproduce the analysis in manuscript -Before running the codes, run `bash setup.sh` in the `./examples` folder to compile the codes written in cython (`cython` is required). -Then, run `bash download.sh` to fetch the data used in the analysis. +Before running the codes, run `bash download.sh` to fetch the data used in the analysis. +Then, run `bash setup.sh` in the `./examples` folder to compile the codes written in cython (`cython` is required). The codes for analyzing SpliceBERT are available in [examples](./examples): - [evolutionary conservation analysis](./examples/00-conservation) (related to Figure 1) diff --git a/download.sh b/download.sh new file mode 100644 index 0000000..417421e --- /dev/null +++ b/download.sh @@ -0,0 +1,23 @@ +#!/bin/bash + + +echo "Downloading the data to ./examples/ ..." +wget -c -O ./examples/data.tar.gz https://zenodo.org/record/7995778/files/data.tar.gz?download=1 && cd examples && tar -xzvf data.tar.gz && cd .. && echo "Done" + +echo "Downloading the model weights ..." +wget -c -O models.tar.gz https://zenodo.org/record/7995778/files/models.tar.gz?download=1 && tar -xzvf models.tar.gz && echo "Done" + + +## check dnabert +echo "Preparing the DNABERT weights ..." +test -d ./models/dnabert || mkdir -p ./models/dnabert +cd ./models/dnabert +for k in 3 4 5 6; do + if [ ! -e "${k}-new-12w-0" ]; then + if [ -e "${k}-new-12w-0.zip" ]; then + unzip "${k}-new-12w-0.zip" && echo "unzip: ${k}-new-12w-0.zip -> ${k}-new-12w-0" + else + echo "NOTE: Users should manually download the weights of DNABERT${k} from https://github.com/jerryji1993/DNABERT and decompress it to ./models/dnabert/" + fi + fi +done