-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbowtie2.qsub
executable file
·89 lines (78 loc) · 2.71 KB
/
bowtie2.qsub
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#############################################
## FIND UNMAPPED READS
## FOR PATHOGEN DISCOVERY
## USING BOWTIE
#############################################
## MULTITHREAD ARRAY JOB
## AND MERGE
#############################################
## This script produces
## ${prefix}_1.unmapped.fastq and
## ${prefix}_2.unmapped.fastq
#############################################
export prefix=$1
export input_1=$2
export input_2=$3
export ref_bowtie_transcriptome=$4
# export prefix=ARC_3
# export input_1=${prefix}_1.fastq
# export input_2=${prefix}_2.fastq
# export ref_bowtie_transcriptome=/ifs/scratch/c2b2/rr_lab/shares/ref/hg19/bowtie2/transcriptome/transcriptome_fasta
#export njobs=2
export njobs=124
export npairs_per_job=524288
#export npairs_per_job=262144 # 2^18 (number used to estimate the insert size distribution in BWA)
export PATH=$PATH:/ifs/home/c2b2/rr_lab/shares/bin/bowtie2-2.0.2
#############################################
## ARRAY JOB
## The qsub option -t controls the number of subjobs.
## The bowtie option -s 'skip' tells each subjob where to start,
## with -u this gives a subset of the input reads.
## Without -S, bowtie sends a sam file to stdout; this is thrown away.
## The option --un-conc writes paired-end reads that fail to align concordantly
## (both pairs are written, even if only one fails to align);
## unpaired fastq cannot be written from paired input (--un).
## System env vars, including those set above, are exported to jobs (-V)
qsub_output=$( cat << 'EOF' | qsub -t 1-$njobs
#$ -l mem=2G,time=8::
#$ -pe smp 4
#$ -N bowtie
#$ -R y
#$ -j n
#$ -o /dev/null
#$ -e ./logs
#$ -V
#$ -cwd
npairs_skip=$(( ($SGE_TASK_ID - 1) * $npairs_per_job ))
bowtie2 \
-x $ref_bowtie_transcriptome \
-1 $input_1 \
-2 $input_2 \
-u $npairs_per_job \
-s $npairs_skip \
-p 4 \
--very-fast-local \
--time \
--un-conc ${prefix}_%.$SGE_TASK_ID.unmapped.fastq \
> /dev/null
EOF
)
## Alternate redirection for unmapped BAM output
## | samtools view -Sbf 4 > ${prefix}.$SGE_TASK_ID.unmapped.bam \
echo $qsub_output
jid=$(echo $qsub_output | cut -d" " -f3 | cut -d. -f1)
#exit
#############################################
## MERGE JOBS
cat << 'EOF' | qsub -hold_jid $jid
#$ -N merge_bowtie
#$ -l mem=500M,time=1::
#$ -V
cat ${prefix}_1.*.unmapped.fastq > ${prefix}_1.unmapped.fastq
rm ${prefix}_1.*.unmapped.fastq
echo -e ${prefix}_1.unmapped.fastq \\t $(( $(wc -l < ${prefix}_1.unmapped.fastq) / 4 )) > Nreads.txt
cat ${prefix}_2.*.unmapped.fastq > ${prefix}_2.unmapped.fastq
rm ${prefix}_2.*.unmapped.fastq
echo -e ${prefix}_2.unmapped.fastq \\t $(( $(wc -l < ${prefix}_2.unmapped.fastq) / 4 )) >> Nreads.txt
## samtools merge ${prefix}.unmapped.bam ${prefix}.*.unmapped.bam
EOF