From ca686a281abc6f044afb81e395cf8c754b16fe47 Mon Sep 17 00:00:00 2001 From: Damian Blatt Date: Wed, 24 Jun 2026 11:09:30 +0200 Subject: [PATCH 1/3] feat: add ignore area Ignored area is defined by passing position (from the upper left corner), width and length of the rectangle. --- diff-pdf.cpp | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 74 insertions(+), 3 deletions(-) diff --git a/diff-pdf.cpp b/diff-pdf.cpp index 0c24358..e405633 100644 --- a/diff-pdf.cpp +++ b/diff-pdf.cpp @@ -41,6 +41,7 @@ #include #include #include +#include enum DisplayMode @@ -64,6 +65,11 @@ bool g_grayscale = false; #define DEFAULT_RESOLUTION 300 long g_resolution = DEFAULT_RESOLUTION; +// Rectangle (in PDF points, origin top-left) within which differences are +// ignored entirely. +struct IgnoreArea { double x, y, w, h; bool valid; }; +IgnoreArea g_ignore_area = { 0, 0, 0, 0, false }; + inline unsigned char to_grayscale(unsigned char r, unsigned char g, unsigned char b) { return (unsigned char)(0.2126 * r + 0.7152 * g + 0.0722 * b); @@ -174,6 +180,22 @@ cairo_surface_t *diff_images(int page, cairo_surface_t *s1, cairo_surface_t *s2, const unsigned char *data2 = s2 ? cairo_image_surface_get_data(s2) : NULL; unsigned char *datadiff = cairo_image_surface_get_data(diff); + // convert the ignore area from PDF points to pixel coordinates in the + // rdiff coordinate space: + wxRect ignore_px; + if ( g_ignore_area.valid ) + { + const double scale = (int)g_resolution / 72.0; + // round the edges individually (rather than rounding width/height + // separately) so the rectangle covers every pixel that the + // rasterizer may have touched, including anti-aliased borders + const int px1 = wxRound(g_ignore_area.x * scale); + const int py1 = wxRound(g_ignore_area.y * scale); + const int px2 = wxRound((g_ignore_area.x + g_ignore_area.w) * scale); + const int py2 = wxRound((g_ignore_area.y + g_ignore_area.h) * scale); + ignore_px = wxRect(px1 + r1.x, py1 + r1.y, px2 - px1, py2 - py1); + } + // we visualize the differences by taking one channel from s1 // and the other two channels from s2: @@ -210,9 +232,13 @@ cairo_surface_t *diff_images(int page, cairo_surface_t *s1, cairo_surface_t *s2, unsigned char cg2 = *(data2 + x + 1); unsigned char cb2 = *(data2 + x + 2); - if ( cr1 > (cr2+g_channel_tolerance) || cr1 < (cr2-g_channel_tolerance) - || cg1 > (cg2+g_channel_tolerance) || cg1 < (cg2-g_channel_tolerance) - || cb1 > (cb2+g_channel_tolerance) || cb1 < (cb2-g_channel_tolerance) + const bool ignored = g_ignore_area.valid && ignore_px.Contains(r2.x + x/4, r2.y + y); + + if ( ( cr1 > (cr2+g_channel_tolerance) || cr1 < (cr2-g_channel_tolerance) + || cg1 > (cg2+g_channel_tolerance) || cg1 < (cg2-g_channel_tolerance) + || cb1 > (cb2+g_channel_tolerance) || cb1 < (cb2-g_channel_tolerance) + ) + && !ignored ) { pixel_diff_count++; @@ -330,6 +356,20 @@ cairo_surface_t *diff_images(int page, cairo_surface_t *s1, cairo_surface_t *s2, // If we specified a tolerance, then return if we have exceeded that for this page if ( g_per_page_pixel_tolerance == 0 ? changes : pixel_diff_count > g_per_page_pixel_tolerance) { + if ( g_ignore_area.valid ) + { + // mark the ignored area with a dashed gray outline, so it's + // clear when reviewing the diff that it was excluded on purpose + cairo_t *cr = cairo_create(diff); + static const double dashes[] = { 4.0, 4.0 }; + cairo_set_dash(cr, dashes, 2, 0); + cairo_set_line_width(cr, 2.0); + cairo_set_source_rgb(cr, 0.5, 0.5, 0.5); + cairo_rectangle(cr, ignore_px.x, ignore_px.y, ignore_px.width, ignore_px.height); + cairo_stroke(cr); + cairo_destroy(cr); + } + return diff; } else @@ -991,6 +1031,10 @@ int main(int argc, char *argv[]) NULL, "per-page-pixel-tolerance", "total number of pixels allowed to be different per page before specifying the page is different", wxCMD_LINE_VAL_NUMBER }, + { wxCMD_LINE_OPTION, + NULL, "ignore-area", "ignore differences inside a rectangle, given as X,Y,WIDTH,HEIGHT in PDF points: X,Y is the top-left corner of the rectangle (measured from the page's top-left corner)", + wxCMD_LINE_VAL_STRING }, + { wxCMD_LINE_OPTION, NULL, "dpi", "rasterization resolution (default: " wxSTRINGIZE(DEFAULT_RESOLUTION) " dpi)", wxCMD_LINE_VAL_NUMBER }, @@ -1065,6 +1109,33 @@ int main(int argc, char *argv[]) } } + wxString ignore_area_str; + if ( parser.Found("ignore-area", &ignore_area_str) ) + { + wxStringTokenizer tokenizer(ignore_area_str, ","); + double values[4]; + bool ok = true; + size_t count = 0; + while ( ok && tokenizer.HasMoreTokens() && count < 4 ) + ok = tokenizer.GetNextToken().ToCDouble(&values[count++]); + ok = ok && (count == 4) && !tokenizer.HasMoreTokens(); + + if ( !ok || values[2] <= 0 || values[3] <= 0 ) + { + fprintf(stderr, "Invalid ignore-area: %s. Expected X,Y,WIDTH,HEIGHT in PDF points " + "(X,Y = top-left corner of the rectangle, measured from the page's top-left " + "corner)\n", + (const char*) ignore_area_str.c_str()); + return 2; + } + + g_ignore_area.x = values[0]; + g_ignore_area.y = values[1]; + g_ignore_area.w = values[2]; + g_ignore_area.h = values[3]; + g_ignore_area.valid = true; + } + if ( parser.Found("channel-tolerance", &g_channel_tolerance) ) { if (g_channel_tolerance < 0 || g_channel_tolerance > 255) { From 9a4c1767c9b16a1613040397fab0f3252715c1c8 Mon Sep 17 00:00:00 2001 From: Damian Blatt Date: Wed, 24 Jun 2026 11:20:51 +0200 Subject: [PATCH 2/3] fix: no color diff overlay in ignored area --- diff-pdf.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/diff-pdf.cpp b/diff-pdf.cpp index e405633..d88a61c 100644 --- a/diff-pdf.cpp +++ b/diff-pdf.cpp @@ -262,6 +262,13 @@ cairo_surface_t *diff_images(int page, cairo_surface_t *s1, cairo_surface_t *s2, } } + // leave pixels in the ignored area showing s1's original + // image, undistorted by the diff color overlay + if ( ignored ) + { + continue; + } + if (g_grayscale) { // convert both images to grayscale, use blue for s1, red for s2 From ea7a9f5067026dbfeec4c561f54bada19d0ac577 Mon Sep 17 00:00:00 2001 From: Damian Blatt Date: Wed, 24 Jun 2026 12:08:40 +0200 Subject: [PATCH 3/3] feat: allow multiple areas separated by semicolon --- diff-pdf.cpp | 103 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 62 insertions(+), 41 deletions(-) diff --git a/diff-pdf.cpp b/diff-pdf.cpp index d88a61c..0ba9bc0 100644 --- a/diff-pdf.cpp +++ b/diff-pdf.cpp @@ -65,10 +65,10 @@ bool g_grayscale = false; #define DEFAULT_RESOLUTION 300 long g_resolution = DEFAULT_RESOLUTION; -// Rectangle (in PDF points, origin top-left) within which differences are +// Rectangles (in PDF points, origin top-left) within which differences are // ignored entirely. -struct IgnoreArea { double x, y, w, h; bool valid; }; -IgnoreArea g_ignore_area = { 0, 0, 0, 0, false }; +struct IgnoreArea { double x, y, w, h; }; +std::vector g_ignore_areas; inline unsigned char to_grayscale(unsigned char r, unsigned char g, unsigned char b) { @@ -180,22 +180,35 @@ cairo_surface_t *diff_images(int page, cairo_surface_t *s1, cairo_surface_t *s2, const unsigned char *data2 = s2 ? cairo_image_surface_get_data(s2) : NULL; unsigned char *datadiff = cairo_image_surface_get_data(diff); - // convert the ignore area from PDF points to pixel coordinates in the + // convert the ignore areas from PDF points to pixel coordinates in the // rdiff coordinate space: - wxRect ignore_px; - if ( g_ignore_area.valid ) + std::vector ignore_px; { const double scale = (int)g_resolution / 72.0; - // round the edges individually (rather than rounding width/height - // separately) so the rectangle covers every pixel that the - // rasterizer may have touched, including anti-aliased borders - const int px1 = wxRound(g_ignore_area.x * scale); - const int py1 = wxRound(g_ignore_area.y * scale); - const int px2 = wxRound((g_ignore_area.x + g_ignore_area.w) * scale); - const int py2 = wxRound((g_ignore_area.y + g_ignore_area.h) * scale); - ignore_px = wxRect(px1 + r1.x, py1 + r1.y, px2 - px1, py2 - py1); + for ( size_t i = 0; i < g_ignore_areas.size(); i++ ) + { + const IgnoreArea& a = g_ignore_areas[i]; + // round the edges individually (rather than rounding width/height + // separately) so the rectangle covers every pixel that the + // rasterizer may have touched, including anti-aliased borders + const int px1 = wxRound(a.x * scale); + const int py1 = wxRound(a.y * scale); + const int px2 = wxRound((a.x + a.w) * scale); + const int py2 = wxRound((a.y + a.h) * scale); + ignore_px.push_back(wxRect(px1 + r1.x, py1 + r1.y, px2 - px1, py2 - py1)); + } } + auto is_ignored = [&ignore_px](int x, int y) -> bool + { + for ( size_t i = 0; i < ignore_px.size(); i++ ) + { + if ( ignore_px[i].Contains(x, y) ) + return true; + } + return false; + }; + // we visualize the differences by taking one channel from s1 // and the other two channels from s2: @@ -232,7 +245,7 @@ cairo_surface_t *diff_images(int page, cairo_surface_t *s1, cairo_surface_t *s2, unsigned char cg2 = *(data2 + x + 1); unsigned char cb2 = *(data2 + x + 2); - const bool ignored = g_ignore_area.valid && ignore_px.Contains(r2.x + x/4, r2.y + y); + const bool ignored = is_ignored(r2.x + x/4, r2.y + y); if ( ( cr1 > (cr2+g_channel_tolerance) || cr1 < (cr2-g_channel_tolerance) || cg1 > (cg2+g_channel_tolerance) || cg1 < (cg2-g_channel_tolerance) @@ -363,17 +376,20 @@ cairo_surface_t *diff_images(int page, cairo_surface_t *s1, cairo_surface_t *s2, // If we specified a tolerance, then return if we have exceeded that for this page if ( g_per_page_pixel_tolerance == 0 ? changes : pixel_diff_count > g_per_page_pixel_tolerance) { - if ( g_ignore_area.valid ) + if ( !ignore_px.empty() ) { - // mark the ignored area with a dashed gray outline, so it's - // clear when reviewing the diff that it was excluded on purpose + // mark the ignored areas with a dashed gray outline, so it's + // clear when reviewing the diff that they were excluded on purpose cairo_t *cr = cairo_create(diff); static const double dashes[] = { 4.0, 4.0 }; cairo_set_dash(cr, dashes, 2, 0); cairo_set_line_width(cr, 2.0); cairo_set_source_rgb(cr, 0.5, 0.5, 0.5); - cairo_rectangle(cr, ignore_px.x, ignore_px.y, ignore_px.width, ignore_px.height); - cairo_stroke(cr); + for ( size_t i = 0; i < ignore_px.size(); i++ ) + { + cairo_rectangle(cr, ignore_px[i].x, ignore_px[i].y, ignore_px[i].width, ignore_px[i].height); + cairo_stroke(cr); + } cairo_destroy(cr); } @@ -1039,7 +1055,9 @@ int main(int argc, char *argv[]) wxCMD_LINE_VAL_NUMBER }, { wxCMD_LINE_OPTION, - NULL, "ignore-area", "ignore differences inside a rectangle, given as X,Y,WIDTH,HEIGHT in PDF points: X,Y is the top-left corner of the rectangle (measured from the page's top-left corner)", + NULL, "ignore-area", "ignore differences inside one or more rectangles, each given as X,Y,WIDTH,HEIGHT in PDF points" + "(X,Y is the rectangle's top-left corner, measured from the page's top-left corner); separate multiple rectangles with ';'," + "e.g. --ignore-area=\"20,750,80,80;480,700,40,40\"", wxCMD_LINE_VAL_STRING }, { wxCMD_LINE_OPTION, @@ -1119,28 +1137,31 @@ int main(int argc, char *argv[]) wxString ignore_area_str; if ( parser.Found("ignore-area", &ignore_area_str) ) { - wxStringTokenizer tokenizer(ignore_area_str, ","); - double values[4]; - bool ok = true; - size_t count = 0; - while ( ok && tokenizer.HasMoreTokens() && count < 4 ) - ok = tokenizer.GetNextToken().ToCDouble(&values[count++]); - ok = ok && (count == 4) && !tokenizer.HasMoreTokens(); - - if ( !ok || values[2] <= 0 || values[3] <= 0 ) + wxStringTokenizer area_tokenizer(ignore_area_str, ";"); + while ( area_tokenizer.HasMoreTokens() ) { - fprintf(stderr, "Invalid ignore-area: %s. Expected X,Y,WIDTH,HEIGHT in PDF points " - "(X,Y = top-left corner of the rectangle, measured from the page's top-left " - "corner)\n", - (const char*) ignore_area_str.c_str()); - return 2; - } + const wxString one_area = area_tokenizer.GetNextToken(); + + wxStringTokenizer tokenizer(one_area, ","); + double values[4]; + bool ok = true; + size_t count = 0; + while ( ok && tokenizer.HasMoreTokens() && count < 4 ) + ok = tokenizer.GetNextToken().ToCDouble(&values[count++]); + ok = ok && (count == 4) && !tokenizer.HasMoreTokens(); - g_ignore_area.x = values[0]; - g_ignore_area.y = values[1]; - g_ignore_area.w = values[2]; - g_ignore_area.h = values[3]; - g_ignore_area.valid = true; + if ( !ok || values[2] <= 0 || values[3] <= 0 ) + { + fprintf(stderr, "Invalid ignore-area: %s. Expected one or more X,Y,WIDTH,HEIGHT " + "rectangles (in PDF points; X,Y = top-left corner, measured from the " + "page's top-left corner; WIDTH,HEIGHT > 0), separated by ';', e.g. 20,750,80,80;480,700,40,40\n", + (const char*) ignore_area_str.c_str()); + return 2; + } + + IgnoreArea area = { values[0], values[1], values[2], values[3] }; + g_ignore_areas.push_back(area); + } } if ( parser.Found("channel-tolerance", &g_channel_tolerance) )