First draft of the script

This commit is contained in:
Karma Riuk 2021-05-22 00:37:34 +02:00
parent 280b4eea20
commit d6d75a5870

84
scantopdf Executable file
View File

@ -0,0 +1,84 @@
#!/usr/bin/env bash
set -e
TMP_DIR="`mktemp -d`"
## Args
OPTIONAL_FLAGS=( \
"-v" "--verbose" \
"-c" "--config" \
"-h" "-?" "--help" \
)
verbose () {
[[ $VERBOSE = 1 ]] && echo "$1"
}
pdf_to_jpg () {
verbose "Converting the scan '$1' to jpg"
convert -density 300 -quality 100 "$1" "$TMP_DIR/out.jpg"
}
tesseract_jpgs () {
verbose "Applying tesseract to the jpgs"
for file in $TMP_DIR/out-*.jpg; do
tesseract "$file" "$TMP_DIR/$(basename "$file" .jpg)" -l deu PDF
done
}
concat_pdfs () {
new_pdf_name="$(basename "$1" .pdf) - EDITABLE.pdf"
verbose "Concatenating all the pdfs into '$new_pdf_name'"
pdfunite "$TMP_DIR"/out-*.pdf ./"$new_pdf_name"
}
usage () {
cat << EOF
scantopdf - A quick bash script that wraps around tesseract to make book scans "editable"
Usage: scantopdf [-v|--verbose] input.pdf
scantopdf [-h|-?|--help]
Options:
input.pdf the file you want to make editable
Optional arguments:
-v, --verbose print each step the script passes through
-h, -?, --help show this message
EOF
}
while [[ " ${OPTIONAL_FLAGS[@]} " =~ " $1 " ]]; do
verbose && echo "Checking for optional arguments"
case $1 in
-v|--verbose)
VERBOSE=1
echo "Setting \$VERBOSE to true (\$VERBOSE=$VERBOSE)"
shift
;;
-h|-\?|--help)
verbose "Detected -h, -? or --help"
usage
exit
;;
esac
done
export TESSDATA_PREFIX="$HOME/tesseract/tessdata"
if [[ "$1" ]]; then
if [[ "$1" =~ \.pdf$ ]] && [[ -f "$1" ]]; then
pdf_to_jpg "$1"
tesseract_jpgs
concat_pdfs "$1"
else
echo "The file you gave me isn't a pdf or doesn't exist, so I can't perform the convertion."
fi
else
echo "You have to give me the name of the pdf you want to make editable."
fi