diff --git a/scantopdf b/scantopdf new file mode 100755 index 0000000..2fd8b35 --- /dev/null +++ b/scantopdf @@ -0,0 +1,84 @@ +#!/usr/bin/env bash +set -e + +TMP_DIR="`mktemp -d`" + +## Args +OPTIONAL_FLAGS=( \ + "-v" "--verbose" \ + "-c" "--config" \ + "-h" "-?" "--help" \ +) + +verbose () { + [[ $VERBOSE = 1 ]] && echo "$1" +} + + +pdf_to_jpg () { + verbose "Converting the scan '$1' to jpg" + + convert -density 300 -quality 100 "$1" "$TMP_DIR/out.jpg" +} + +tesseract_jpgs () { + verbose "Applying tesseract to the jpgs" + + for file in $TMP_DIR/out-*.jpg; do + tesseract "$file" "$TMP_DIR/$(basename "$file" .jpg)" -l deu PDF + done +} + +concat_pdfs () { + new_pdf_name="$(basename "$1" .pdf) - EDITABLE.pdf" + verbose "Concatenating all the pdfs into '$new_pdf_name'" + + pdfunite "$TMP_DIR"/out-*.pdf ./"$new_pdf_name" +} + +usage () { + cat << EOF +scantopdf - A quick bash script that wraps around tesseract to make book scans "editable" + +Usage: scantopdf [-v|--verbose] input.pdf + scantopdf [-h|-?|--help] + +Options: + input.pdf the file you want to make editable + +Optional arguments: + -v, --verbose print each step the script passes through + -h, -?, --help show this message +EOF +} + +while [[ " ${OPTIONAL_FLAGS[@]} " =~ " $1 " ]]; do + verbose && echo "Checking for optional arguments" + + case $1 in + -v|--verbose) + VERBOSE=1 + echo "Setting \$VERBOSE to true (\$VERBOSE=$VERBOSE)" + shift + ;; + -h|-\?|--help) + verbose "Detected -h, -? or --help" + usage + exit + ;; + esac +done + +export TESSDATA_PREFIX="$HOME/tesseract/tessdata" + +if [[ "$1" ]]; then + if [[ "$1" =~ \.pdf$ ]] && [[ -f "$1" ]]; then + pdf_to_jpg "$1" + tesseract_jpgs + concat_pdfs "$1" + else + echo "The file you gave me isn't a pdf or doesn't exist, so I can't perform the convertion." + fi +else + echo "You have to give me the name of the pdf you want to make editable." +fi