#!/bin/bash set -e TMP_DIR="`mktemp -d`" ## Args OPTIONAL_FLAGS=( \ "-v" "--verbose" \ "-c" "--config" \ "-h" "-?" "--help" \ ) verbose () { [[ $VERBOSE = 1 ]] && echo "$1" } pdf_to_jpg () { verbose "Converting the scan '$1' to jpg" convert -density 300 -quality 100 "$1" "$TMP_DIR/out-%05d.jpg" } tesseract_jpgs () { verbose "Applying tesseract to the jpgs" for file in $TMP_DIR/out-*.jpg; do tesseract "$file" "$TMP_DIR/$(basename "$file" .jpg)" -l deu PDF done } concat_pdfs () { new_pdf_name="$(basename "$1" .pdf) - EDITABLE.pdf" verbose "Concatenating all the pdfs into '$new_pdf_name'" pdfunite "$TMP_DIR"/out-*.pdf ./"$new_pdf_name" } usage () { cat << EOF scantopdf - A quick bash script that wraps around tesseract to make book scans "editable" Usage: scantopdf [-v|--verbose] file.pdf scantopdf [-v|--verbose] "file with spaces.pdf" scantopdf [-h|-?|--help] Options: file.pdf the file you want to make editable (make sure to put quotes " around the file name if the filename has spaces) Optional arguments: -v, --verbose print each step the script passes through -h, -?, --help show this message EOF } while [[ " ${OPTIONAL_FLAGS[@]} " =~ " $1 " ]]; do verbose && echo "Checking for optional arguments" case $1 in -v|--verbose) VERBOSE=1 echo "Setting \$VERBOSE to true (\$VERBOSE=$VERBOSE)" shift ;; -h|-\?|--help) verbose "Detected -h, -? or --help" usage exit ;; esac done export TESSDATA_PREFIX="$HOME/tesseract/tessdata" if [[ "$1" ]]; then if [[ "$1" =~ \.pdf$ ]] && [[ -f "$1" ]]; then pdf_to_jpg "$1" tesseract_jpgs concat_pdfs "$1" else echo "The file you gave me isn't a pdf or doesn't exist, so I can't perform the convertion." fi else echo "You have to give me the name of the pdf you want to make editable." fi