-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcollect_stats.sh
executable file
·76 lines (62 loc) · 1.65 KB
/
collect_stats.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/bin/bash
# Copyright 2021 Anurag Priyam, Queen Mary University of London.
set -eo pipefail
usage() {
cat <<EOF
CompareGenomeQualities script for collecting assembly metrics.
--help View this message
EOF
exit
}
msg() {
echo >&2 -e "${1-}"
}
die() {
local msg=$1
local code=${2-1} # default exit status 1
msg "$msg"
exit "$code"
}
# Parse arguments.
while :; do
case "${1-}" in
-h | --help) usage ;;
-?*) die "Unknown option: $1" ;;
*) break ;;
esac
done
directory=$1
# If we did not get all the required parameters, print usage and exit.
[[ -z $directory ]] && usage
# We need to cd to the given directory.
cd ${directory}
# Output files.
ng50_tsv=NG50.tsv
busco_tsv=busco.tsv
solid_tsv=solid_pairs.tsv
resolved_tsv=resolved_length.tsv
exists() {
compgen -G "$1" > /dev/null
}
# NG50
if exists '*/quast/report.txt'; then
echo -e "ID\tNG50" > ${ng50_tsv}
grep NG50 */quast/report.txt \
| cat | awk -F'/|[ \t]+' -v OFS='\t' '{print $1,$4}' >> ${ng50_tsv}
fi
# BUSCO score
if exists '*/busco/busco_output/short_summary*.txt'; then
echo -e "ID\tBUSCO" > ${busco_tsv}
grep 'C:' */busco/busco_output/short_summary*.txt \
| cat | awk -F'[]/%:[ \t]+' -v OFS='\t' '{print $1,$6}' >> ${busco_tsv}
fi
if exists '*/illumina/solid_pairs.txt'; then
# Solid pairs
echo -e "ID\tSolid pairs" > ${solid_tsv}
grep 'Solidly mapped' */illumina/solid_pairs.txt \
| cat | awk -F'[/:)( \t]+' -v OFS='\t' '{print $1,$9}' >> ${solid_tsv}
# Resolved length
echo -e "ID\tResolved length" > ${resolved_tsv}
grep 'Resolved length' */illumina/solid_pairs.txt \
| cat | awk -F'[/: \t]+' -v OFS='\t' '{print $1,$6}' >> ${resolved_tsv}
fi