-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path3-bundle.sh
executable file
·90 lines (75 loc) · 2.06 KB
/
3-bundle.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#!/bin/sh
# Generate CBZ file and do OCR cleanup
# Resulting files:
# - text/
# - <current_directory_name> + .cbz
# - <current_directory_name>_text + .zip
# go to the directory where this script is located
cd "$(dirname "$0")"
# include the config file
. ./config.sh
# Get current directory name
basename="$(basename "$PWD")"
# Compress images
if [ -d img/orig ]; then
if [ -n "$compressed_image_format" ]; then
echo Compressing images...
mkdir img/compressed
if [ -n "$compressed_image_quality" ]; then
quality="-quality $compressed_image_quality"
fi
if [ ! $rotate -eq 0 ]; then
rotation="-rotate $rotate"
fi
for f in img/orig/*; do
page=$(echo "$f" | tr -d img/orig/ | tr -d .pnm)
convert $quality $rotation "img/orig/$page.pnm" "img/compressed/$page.$compressed_image_format"
done
fi
fi
# Make CBZ file
if [ -d img/compressed ]; then
echo CBZ-ing...
cd img/compressed
zip -q -0 "../../$basename.cbz" ./*
cd ../..
fi
# Clean and merge text files
if [ -d text-raw ]; then
echo Handling text...
cp -r text-raw text
cd text
for f in page-*; do
# remove blank spaces from beginning and end of each page
# we use Python here, for its handy strip() function
# TODO: maybe loop through files inside python instead of invoking it multiple times
# - https://stackoverflow.com/questions/11968976/list-files-only-in-the-current-directory
python3 -c "fi = open('${f}', 'r')
text = fi.read().strip()
fi.close()
fo = open('${f}', 'w')
fo.write(text)
fo.close()"
# Romanian-specific fixes
if [ "$language" = 'ron' ]; then
# fix diacritics and quotation marks
sed -i -E '
s/ã/ă/g; s/Ã/Ă/g;
s/ş/ș/g; s/Ş/Ș/g;
s/ţ/ț/g; s/Ţ/Ț/g;
s/“/”/g' "$f"
fi
# Get page number from file name
pag=$(echo "$f" | tr -d page- | tr -d .txt)
# Merge text files
{
printf "===== %s =====\\n\\n" "$pag"
cat "$f"
printf "\\n\\n\\n"
} >> complete.txt
done
# Zip
zip -q "../${basename}_text.zip" ./*
cd ..
fi
echo Done.