-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscan-it
More file actions
executable file
·140 lines (116 loc) · 3.77 KB
/
scan-it
File metadata and controls
executable file
·140 lines (116 loc) · 3.77 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#!/bin/bash
# Scanner Configuration
SCANNER_DEVICE="airscan:e0:Canon TS7400 series"
RESOLUTION=300
MODE="Color"
# Output Configuration
OUTPUT_FORMAT="jpeg"
OUTPUT_PREFIX="out%d" # scanimage adds extension automatically usually, but let's be safe.
# actually scanimage --batch=format%d.jpg works best.
# The original script used OUTPUT_PREFIX="out%d.$OUTPUT_FORMAT" which is correct for --batch argument format specifier
BATCH_PREFIX="out"
FILE_PATTERN="${BATCH_PREFIX}%d.${OUTPUT_FORMAT}"
# Helper to print steps
function log() {
echo -e "\033[1;32m[SCAN]\033[0m $1"
}
function error() {
echo -e "\033[1;31m[ERROR]\033[0m $1"
}
scan_batch() {
local start=$1
local inc=$2
# Capture stderr to count pages, while also showing it to user
# We use tee to show output, but we need to capture it.
# A temp file is cleaner than a pipe for capturing reliably while streaming.
local logfile=$(mktemp)
scanimage -d "$SCANNER_DEVICE" \
--source "ADF" \
--format "$OUTPUT_FORMAT" \
--resolution "$RESOLUTION" \
--mode "$MODE" \
--batch="${BATCH_PREFIX}%d.${OUTPUT_FORMAT}" \
--batch-start="$start" \
--batch-increment="$inc" \
-x 215 -y 297 2> >(tee "$logfile" >&2)
# Count how many "Scanned page" or scanning events occurred.
# scanimage output format: "Scanned page 1. (scanner specific)"
# Reliability depends on scanimage version/driver.
# Alternative: Count created files.
rm -f "$logfile"
}
scan_single_sided() {
log "Starting Single Sided Scan..."
scan_batch 1 1
}
scan_double_sided() {
log "Phase 1: Scanning Front Sides (Odds)..."
scan_batch 1 2
# Robust page counting by counting odd-numbered files generated
# (Assuming we started at 1 and incremented by 2)
# We look for the highest numbered odd file to determine count,
# or just count how many files look like out*.jpeg
# Let's simple-count the files that match the pattern for this session.
# Warning: this counts all jpegs.
local count=$(ls ${BATCH_PREFIX}*.${OUTPUT_FORMAT} 2>/dev/null | wc -l)
if [ "$count" -eq 0 ]; then
error "No pages detected. Exiting."
exit 1
fi
log "Batch 1 complete. Detected $count pages."
log "Please flip the document stack."
log "Ensure the page that was scanned LAST (top of output) is fed FIRST (top of input)."
read -p "Press Enter to continue..."
# Calculate start page for reverse side
# Fronts: 1, 3, 5 (3 pages)
# Backs should be: 6, 4, 2
# Start = count * 2
local start_back=$((count * 2))
log "Phase 2: Scanning Back Sides (Evens)..."
scan_batch "$start_back" "-2"
}
check_blank_pages() {
log "Checking for potentially blank pages..."
# Check if tesseract is installed
if ! command -v tesseract &> /dev/null; then
echo "Tesseract not found, skipping blank page check."
return
fi
for file in ${BATCH_PREFIX}*.${OUTPUT_FORMAT}; do
if [ -f "$file" ]; then
text=$(tesseract "$file" stdout 2>/dev/null)
# If text is empty or very short?
# Empty check:
if [ -z "${text//[[:space:]]/}" ]; then
echo " ? Potential blank page: $file"
fi
fi
done
}
# CLI Argument Parsing
MODE_FLAG="double" # default
while getopts ":hsd" opt; do
case $opt in
h)
echo "Usage: $0 [-h] [-s (single)] [-d (double, default)]"
exit 0
;;
s)
MODE_FLAG="single"
;;
d)
MODE_FLAG="double"
;;
\?)
echo "Invalid option: -$OPTARG"
exit 1
;;
esac
done
if [ "$MODE_FLAG" == "single" ]; then
scan_single_sided
else
scan_double_sided
fi
check_blank_pages
log "Done."