88 lines
2.1 KiB
Bash
Executable File
88 lines
2.1 KiB
Bash
Executable File
#!/bin/bash
|
|
set -e
|
|
|
|
TMP_DIR="`mktemp -d`"
|
|
|
|
## Args
|
|
OPTIONAL_FLAGS=( \
|
|
"-v" "--verbose" \
|
|
"-c" "--config" \
|
|
"-h" "-?" "--help" \
|
|
)
|
|
|
|
verbose () {
|
|
[[ $VERBOSE = 1 ]] && echo "$1"
|
|
}
|
|
|
|
|
|
pdf_to_jpg () {
|
|
verbose "Converting the scan '$1' to jpg"
|
|
|
|
convert -density 300 -quality 100 "$1" "$TMP_DIR/out-%05d.jpg"
|
|
}
|
|
|
|
tesseract_jpgs () {
|
|
verbose "Applying tesseract to the jpgs"
|
|
|
|
for file in $TMP_DIR/out-*.jpg; do
|
|
tesseract "$file" "$TMP_DIR/$(basename "$file" .jpg)" -l deu PDF
|
|
done
|
|
}
|
|
|
|
concat_pdfs () {
|
|
new_pdf_name="$(basename "$1" .pdf) - EDITABLE.pdf"
|
|
verbose "Concatenating all the pdfs into '$new_pdf_name'"
|
|
|
|
pdfunite "$TMP_DIR"/out-*.pdf ./"$new_pdf_name"
|
|
}
|
|
|
|
usage () {
|
|
cat << EOF
|
|
scantopdf - A quick bash script that wraps around tesseract to make book scans "editable"
|
|
|
|
Usage: scantopdf [-v|--verbose] file.pdf
|
|
scantopdf [-v|--verbose] "file with spaces.pdf"
|
|
scantopdf [-h|-?|--help]
|
|
|
|
Options:
|
|
file.pdf the file you want to make editable (make
|
|
sure to put quotes " around the file
|
|
name if the filename has spaces)
|
|
|
|
Optional arguments:
|
|
-v, --verbose print each step the script passes through
|
|
-h, -?, --help show this message
|
|
EOF
|
|
}
|
|
|
|
while [[ " ${OPTIONAL_FLAGS[@]} " =~ " $1 " ]]; do
|
|
verbose && echo "Checking for optional arguments"
|
|
|
|
case $1 in
|
|
-v|--verbose)
|
|
VERBOSE=1
|
|
echo "Setting \$VERBOSE to true (\$VERBOSE=$VERBOSE)"
|
|
shift
|
|
;;
|
|
-h|-\?|--help)
|
|
verbose "Detected -h, -? or --help"
|
|
usage
|
|
exit
|
|
;;
|
|
esac
|
|
done
|
|
|
|
export TESSDATA_PREFIX="$HOME/tesseract/tessdata"
|
|
|
|
if [[ "$1" ]]; then
|
|
if [[ "$1" =~ \.pdf$ ]] && [[ -f "$1" ]]; then
|
|
pdf_to_jpg "$1"
|
|
tesseract_jpgs
|
|
concat_pdfs "$1"
|
|
else
|
|
echo "The file you gave me isn't a pdf or doesn't exist, so I can't perform the convertion."
|
|
fi
|
|
else
|
|
echo "You have to give me the name of the pdf you want to make editable."
|
|
fi
|