First draft of the script
This commit is contained in:
parent
280b4eea20
commit
d6d75a5870
84
scantopdf
Executable file
84
scantopdf
Executable file
@ -0,0 +1,84 @@
|
||||
#!/usr/bin/env bash
|
||||
set -e
|
||||
|
||||
TMP_DIR="`mktemp -d`"
|
||||
|
||||
## Args
|
||||
OPTIONAL_FLAGS=( \
|
||||
"-v" "--verbose" \
|
||||
"-c" "--config" \
|
||||
"-h" "-?" "--help" \
|
||||
)
|
||||
|
||||
verbose () {
|
||||
[[ $VERBOSE = 1 ]] && echo "$1"
|
||||
}
|
||||
|
||||
|
||||
pdf_to_jpg () {
|
||||
verbose "Converting the scan '$1' to jpg"
|
||||
|
||||
convert -density 300 -quality 100 "$1" "$TMP_DIR/out.jpg"
|
||||
}
|
||||
|
||||
tesseract_jpgs () {
|
||||
verbose "Applying tesseract to the jpgs"
|
||||
|
||||
for file in $TMP_DIR/out-*.jpg; do
|
||||
tesseract "$file" "$TMP_DIR/$(basename "$file" .jpg)" -l deu PDF
|
||||
done
|
||||
}
|
||||
|
||||
concat_pdfs () {
|
||||
new_pdf_name="$(basename "$1" .pdf) - EDITABLE.pdf"
|
||||
verbose "Concatenating all the pdfs into '$new_pdf_name'"
|
||||
|
||||
pdfunite "$TMP_DIR"/out-*.pdf ./"$new_pdf_name"
|
||||
}
|
||||
|
||||
usage () {
|
||||
cat << EOF
|
||||
scantopdf - A quick bash script that wraps around tesseract to make book scans "editable"
|
||||
|
||||
Usage: scantopdf [-v|--verbose] input.pdf
|
||||
scantopdf [-h|-?|--help]
|
||||
|
||||
Options:
|
||||
input.pdf the file you want to make editable
|
||||
|
||||
Optional arguments:
|
||||
-v, --verbose print each step the script passes through
|
||||
-h, -?, --help show this message
|
||||
EOF
|
||||
}
|
||||
|
||||
while [[ " ${OPTIONAL_FLAGS[@]} " =~ " $1 " ]]; do
|
||||
verbose && echo "Checking for optional arguments"
|
||||
|
||||
case $1 in
|
||||
-v|--verbose)
|
||||
VERBOSE=1
|
||||
echo "Setting \$VERBOSE to true (\$VERBOSE=$VERBOSE)"
|
||||
shift
|
||||
;;
|
||||
-h|-\?|--help)
|
||||
verbose "Detected -h, -? or --help"
|
||||
usage
|
||||
exit
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
export TESSDATA_PREFIX="$HOME/tesseract/tessdata"
|
||||
|
||||
if [[ "$1" ]]; then
|
||||
if [[ "$1" =~ \.pdf$ ]] && [[ -f "$1" ]]; then
|
||||
pdf_to_jpg "$1"
|
||||
tesseract_jpgs
|
||||
concat_pdfs "$1"
|
||||
else
|
||||
echo "The file you gave me isn't a pdf or doesn't exist, so I can't perform the convertion."
|
||||
fi
|
||||
else
|
||||
echo "You have to give me the name of the pdf you want to make editable."
|
||||
fi
|
Loading…
Reference in New Issue
Block a user