Skip to content

Commit

Permalink
OLA Integration
Browse files Browse the repository at this point in the history
  • Loading branch information
iMac_Abdallah authored and iMac_Abdallah committed Sep 16, 2024
1 parent c3650b1 commit 15ac838
Show file tree
Hide file tree
Showing 9 changed files with 121 additions and 14 deletions.
1 change: 1 addition & 0 deletions README.md
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ This script is made to integrate Operandi with other tools such as Goobi and Kit
`script_docker.sh` is for terminal use with OCR-D docker installation.
`goobi_operandi.sh` is used for Operandi-Goobi integration.
`kitodo_operandi.sh` is used for Operandi-Kitodo integration.
`upload_to_ola_hd.sh` is used for OLA_HD-Kitodo-Goobi integration.



Expand Down
Empty file modified default_workflow.nf
100644 → 100755
Empty file.
12 changes: 6 additions & 6 deletions goobi_operandi.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ WORKFLOW="default_workflow.nf"
METS_URL=""
IMAGE_DIR=$(pwd)/images
EXT="jpg"
CPUs=4
RAM=8
CPUs=8
RAM=64
ZIP=""
workflow_id="default_workflow"
LOCAL_OCRD=false
Expand Down Expand Up @@ -423,13 +423,13 @@ upload_to_ola_hd() {
handle_results() {
echo "Process title is $PROCESS_TITLE"
unzip -o "$OCRD_RESULTS" -d "$WORKSPACE_DIR"_results
mkdir -p $PARENT_WORKSPACE/ocr/"$PROCESS_TITLE"_alto
mv -f "$WORKSPACE_DIR"_results/data/*ALTO*/* $PARENT_WORKSPACE/ocr/"$PROCESS_TITLE"_alto/
echo "$OCRD_RESULTS" > "$PARENT_WORKSPACE/.ocrd_results_path"
mkdir -p $PARENT_WORKSPACE/ocr/
mv -f "$WORKSPACE_DIR"_results/data/* $PARENT_WORKSPACE/ocr/

}

cleanup(){
rm -r .nextflow* tmp/ work/ report* $PARENT_WORKSPACE/ocrd.log "$WORKSPACE_DIR"_local "$WORKSPACE_DIR"_results $WORKSPACE_DIR
rm -r .nextflow* tmp/ work/ report* $PARENT_WORKSPACE/ocrd.log "$WORKSPACE_DIR"_local "$WORKSPACE_DIR"_results $WORKSPACE_DIR $OCRD_RESULTS
}


Expand Down
6 changes: 3 additions & 3 deletions kitodo_operandi.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -420,11 +420,11 @@ upload_to_ola_hd() {
# Function to handle results for kitodo
handle_results() {
unzip -o "$OCRD_RESULTS" -d "$WORKSPACE_DIR"_results
mv -f "$WORKSPACE_DIR"_results/data/*ALTO*/* $PARENT_WORKSPACE/ocr/alto/
echo "$OCRD_RESULTS" > "$PARENT_WORKSPACE/.ocrd_results_path"
mv -f "$WORKSPACE_DIR"_results/data/* $PARENT_WORKSPACE/ocr/
}

cleanup(){
rm -r .nextflow* tmp/ work/ report* $PARENT_WORKSPACE/ocrd.log "$WORKSPACE_DIR"_local "$WORKSPACE_DIR"_results $WORKSPACE_DIR
rm -r .nextflow* tmp/ work/ report* $PARENT_WORKSPACE/ocrd.log "$WORKSPACE_DIR"_local "$WORKSPACE_DIR"_results $WORKSPACE_DIR $OCRD_RESULTS
}


Expand Down
Empty file modified ocrd-models/qurator-gt4histocr-1/0.ckpt.h5
100644 → 100755
Empty file.
Empty file modified ocrd-models/qurator-gt4histocr-1/0.ckpt.json
100644 → 100755
Empty file.
Empty file modified script_docker.sh
100644 → 100755
Empty file.
Empty file modified script_native.sh
100644 → 100755
Empty file.
116 changes: 111 additions & 5 deletions upload_to_ola_hd.sh
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,17 +1,123 @@
#!/bin/bash

#this script is done to upload ocr bagit to OLA-HD
# OLA_USR should be added as an env variable and it refers to ola-hd username:password

#operandi script should store the results path inside process/directory($s1)/.ocrd_results_path

#ocr bagit path
OCRD_RESULTS=$(<"$1/.ocrd_results_path")
SCRIPT_PATH="$(dirname "$(realpath "$0")")"
cd "$SCRIPT_PATH"
SERVER_ADDR=141.5.99.53
CURRENT_TIME=`date +"%m%d%Y_%H%M%S"`
WORKSPACE_DIR="$PWD/ws_$CURRENT_TIME"
RESULTS_AVAILABLE=false
ERROR_LOG="error_log.txt"
LOG_FILE="log_file.txt"
METS_PATH_URL=""
OCRD_RESULTS=""

#Get the options
while getopts ":s:f:m:u:w:i:c:r:n:elz:o:" opt; do
case $opt in
s) SERVER_ADDR="$OPTARG" ;;
m) METS_PATH_URL="$OPTARG" ;;
w) WORKSPACE_DIR="$OPTARG" ;;
z) OCRD_RESULTS="$OPTARG"
RESULTS_AVAILABLE=true;;
o) OLA_USR="$OPTARG";;
\?) echo "Invalid option: -$OPTARG" >&2; exit 1 ;;
:) echo "Option -$OPTARG requires an argument." >&2; exit 1 ;;
esac
done




# Function to log errors and information with timestamp and workspace name
log_info() {
local log_message="$1"
echo "[INFO] $(date '+%Y-%m-%d %H:%M:%S') - (Upload to OLA) Workspace: $WORKSPACE_DIR - $log_message"
echo "[INFO] $(date '+%Y-%m-%d %H:%M:%S') - (Upload to OLA) Workspace: $WORKSPACE_DIR - $log_message" >> "$LOG_FILE"
}

# Function to log errors with timestamp and workspace name
log_error() {
local error_message="$1"
echo "[ERROR] $(date '+%Y-%m-%d %H:%M:%S') - (Upload to OLA) Workspace: $WORKSPACE_DIR - $error_message" >> "$ERROR_LOG"
echo "[ERROR] $(date '+%Y-%m-%d %H:%M:%S') - (Upload to OLA) Workspace: $WORKSPACE_DIR - $error_message" >> "$LOG_FILE"
}


upload_to_ola_hd() {
echo "Uploading the results to OLA-HD..."
curl -X POST 141.5.99.53/api/bag -u "$OLA_USR" -H 'content-type: multipart/form-data' -F file=@"$OCRD_RESULTS"
log_info "Uploading the results to OLA-HD..."
curl -X POST $SERVER_ADDR/api/bag -u "$OLA_USR" -H 'content-type: multipart/form-data' -F file=@"$OCRD_RESULTS"
if [ $? -ne 0 ]; then
log_error "Failed to download the results."
exit 1
fi
}

upload_to_ola_hd
create_workspace() {

# Function to generate OCR-D zip
log_info "Creating workspace..."
$DOCKER_RAPPER ocrd workspace -d "/data/$PROCESS_TITLE" clone $METS_PATH_URL

if [ $? -ne 0 ]; then
log_error "Failed to generate the OCR-D zip."
exit 1
fi


}

# Function to generate OCR-D zip
generate_ocrd_zip() {
log_info "Generating an OCR-D zip..."
$DOCKER_RAPPER ocrd zip bag -i "$PROCESS_TITLE" -d "/data/$PROCESS_TITLE"

if [ $? -ne 0 ]; then
log_error "Failed to generate the OCR-D zip."
exit 1
fi
}

# Function to validate OCR-D zip
validate_ocrd_zip() {
log_info "Validating the OCR-D zip..."
$DOCKER_RAPPER ocrd zip validate "/data/$PROCESS_TITLE.ocrd.zip"
if [ $? -ne 0 ]; then
log_error "Validation failed. The OCR-D zip is not valid."
exit 1
fi
}

cleanup() {
rm -r $WORKSPACE_DIR ocrd.log $OCRD_RESULTS $SCRIPT_PATH/tmp
}

main() {

PROCESS_TITLE=$(basename "$WORKSPACE_DIR")
PARENT_WORKSPACE=$(dirname "$WORKSPACE_DIR")
DOCKER_RAPPER="docker run --rm -u $(id -u) -v $SCRIPT_PATH/tmp:/tmp -v $SCRIPT_PATH/ocrd-models:/ocrd-models -v $PARENT_WORKSPACE:/data -- ocrd/all:maximum"

if [ "$RESULTS_AVAILABLE" == false ] ; then

if [ -z "$METS_PATH_URL" ] ; then
log_error "METS URL is not given..."
exit 1
fi
create_workspace
generate_ocrd_zip
validate_ocrd_zip
OCRD_RESULTS=$WORKSPACE_DIR.ocrd.zip
fi

upload_to_ola_hd
cleanup

}

main

0 comments on commit 15ac838

Please sign in to comment.