From 6bfd79d84b04b38a1a5ae44d241d216bc55a6435 Mon Sep 17 00:00:00 2001 From: Michael Dorf Date: Mon, 22 Jun 2026 20:40:26 -0700 Subject: [PATCH 1/4] Improve RankSolrPropagator: progress logging, commitWithin, skip-unchanged Large ontologies previously produced no output until fully done and piled up uncommitted Solr updates, so the job looked hung and slowed down over time. This adds: - per-ontology and per-batch progress logging (flushed), with an up-front doc count, so activity is visible during big ontologies - commitWithin (60s) instead of commit:false, bounding Solr's transaction log and making partial runs durable; final hard commit is retained - batch size 1000 -> 5000 to cut round-trips - skip-unchanged: ontologies whose rank is unchanged since the last propagation (tracked per acronym in Redis) are skipped; force: true re-propagates everything (e.g. after a collection rebuild) Refs ncbo/ncbo_cron#132 --- .../services/rank_solr_propagator.rb | 94 ++++++++++++++++--- test/services/test_rank_solr_propagator.rb | 42 +++++++-- 2 files changed, 116 insertions(+), 20 deletions(-) diff --git a/lib/ontologies_linked_data/services/rank_solr_propagator.rb b/lib/ontologies_linked_data/services/rank_solr_propagator.rb index 0afc5602..bb98c73f 100644 --- a/lib/ontologies_linked_data/services/rank_solr_propagator.rb +++ b/lib/ontologies_linked_data/services/rank_solr_propagator.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require 'redis' + module LinkedData module Services # Propagates the current ontology rank values into the denormalized @@ -14,21 +16,32 @@ module Services # bioportalScore/umlsScore using the configured weights. # # Updates use true Solr atomic updates ({ id, ontologyRank: { set: score } }) - # so no per-class triplestore read is needed. + # so no per-class triplestore read is needed. Batches use commitWithin so + # Solr soft-commits on a bounded schedule instead of accumulating an + # unbounded transaction log; a final hard commit makes everything durable. + # + # Ontologies whose rank is unchanged since the last successful propagation + # are skipped (tracked per acronym in Redis). Pass force: true to ignore the + # skip cache and re-propagate everything (e.g. after a collection rebuild). # # See https://github.com/ncbo/ncbo_cron/issues/132 class RankSolrPropagator - DEFAULT_BATCH_SIZE = 1000 + DEFAULT_BATCH_SIZE = 5_000 + DEFAULT_COMMIT_WITHIN = 60_000 # ms; Solr soft-commit window during bulk updates + PROGRESS_EVERY = 50_000 # log per-ontology progress every N docs + # Redis key holding the last successfully propagated rank per acronym. + LAST_PROPAGATED_REDIS_FIELD = 'ontology_rank_solr_propagated' - def initialize(logger: nil, batch_size: DEFAULT_BATCH_SIZE) + def initialize(logger: nil, batch_size: DEFAULT_BATCH_SIZE, commit_within: DEFAULT_COMMIT_WITHIN) @logger = logger || Logger.new($stdout) @batch_size = batch_size + @commit_within = commit_within end - # Reads the rank map and writes each ontology's normalizedScore onto all - # of its term_search docs. Returns the count of ontologies whose docs were - # updated. - def propagate(rank_map = nil) + # Reads the rank map and writes each ontology's normalizedScore onto all of + # its term_search docs. Unchanged ontologies are skipped unless force: true. + # Returns the number of ontologies whose docs were updated. + def propagate(rank_map = nil, force: false) rank_map ||= LinkedData::Models::Ontology.rank if rank_map.nil? || rank_map.empty? @@ -36,16 +49,29 @@ def propagate(rank_map = nil) return 0 end + last_propagated = force ? {} : load_last_propagated + total_onts = rank_map.size updated = 0 - rank_map.each do |acronym, rank_info| + skipped = 0 + + rank_map.each_with_index do |(acronym, rank_info), i| score = rank_info[:normalizedScore].to_f + + if !force && last_propagated[acronym] == score + skipped += 1 + next + end + + log("[#{i + 1}/#{total_onts}] #{acronym} rank=#{score}") count = update_ontology_rank(acronym, score) - @logger.info("RankSolrPropagator: #{acronym} rank=#{score} (#{count} docs)") + + last_propagated[acronym] = score + save_last_propagated(last_propagated) updated += 1 if count.positive? end LinkedData::Models::Class.indexCommit(nil) - @logger.info("RankSolrPropagator: committed; updated #{updated} ontologies") + log("final commit done; updated #{updated}, skipped #{skipped} unchanged (of #{total_onts})") updated end @@ -54,21 +80,36 @@ def propagate(rank_map = nil) # Cursor-scans all term_search docs for the acronym and issues batched # atomic updates to ontologyRank. Returns the number of docs updated. def update_ontology_rank(acronym, score) + query = "submissionAcronym:\"#{acronym}\"" + num_found = (LinkedData::Models::Class.search(query, { rows: 0 }).dig('response', 'numFound') || 0).to_i + + if num_found.zero? + log(" #{acronym}: no term_search docs; skipping") + return 0 + end + log(" #{acronym}: #{num_found} docs to update") + cursor = '*' total = 0 - query = "submissionAcronym:\"#{acronym}\"" + last_logged = 0 loop do resp = LinkedData::Models::Class.search( - query, - { fl: 'id', rows: @batch_size, sort: 'id asc', cursorMark: cursor } + query, { fl: 'id', rows: @batch_size, sort: 'id asc', cursorMark: cursor } ) docs = resp.dig('response', 'docs') || [] unless docs.empty? batch = docs.map { |d| { id: d['id'], ontologyRank: { set: score } } } - LinkedData::Models::Class.search_client.index_document(batch, commit: false) + LinkedData::Models::Class.search_client.index_document( + batch, commit: true, commit_within: @commit_within + ) total += batch.size + + if total - last_logged >= PROGRESS_EVERY + log(" #{acronym}: #{total}/#{num_found} docs…") + last_logged = total + end end next_cursor = resp['nextCursorMark'] @@ -77,8 +118,33 @@ def update_ontology_rank(acronym, score) cursor = next_cursor end + log(" #{acronym}: done (#{total} docs)") total end + + def log(msg) + @logger.info("RankSolrPropagator: #{msg}") + @logger.flush if @logger.respond_to?(:flush) + end + + def redis + @redis ||= Redis.new(host: LinkedData.settings.ontology_analytics_redis_host, + port: LinkedData.settings.ontology_analytics_redis_port) + end + + def load_last_propagated + raw = redis.get(LAST_PROPAGATED_REDIS_FIELD) + raw ? Marshal.load(raw) : {} + rescue StandardError => e + @logger.warn("RankSolrPropagator: could not load skip cache (#{e.class}: #{e.message}); propagating all") + {} + end + + def save_last_propagated(map) + redis.set(LAST_PROPAGATED_REDIS_FIELD, Marshal.dump(map)) + rescue StandardError => e + @logger.warn("RankSolrPropagator: could not save skip cache (#{e.class}: #{e.message})") + end end end end diff --git a/test/services/test_rank_solr_propagator.rb b/test/services/test_rank_solr_propagator.rb index 3b83cb80..e14ddeaa 100644 --- a/test/services/test_rank_solr_propagator.rb +++ b/test/services/test_rank_solr_propagator.rb @@ -12,6 +12,7 @@ def self.after_suite def setup self.class.after_suite + clear_propagation_cache # Index a small ontology's terms into the real local term_search collection. submission_parse(ACRONYM, 'BRO Ontology', './test/data/ontology_files/BRO_v3.5.owl', 1, @@ -22,6 +23,19 @@ def setup def teardown self.class.after_suite + clear_propagation_cache + end + + def clear_propagation_cache + Redis.new(host: LinkedData.settings.ontology_analytics_redis_host, + port: LinkedData.settings.ontology_analytics_redis_port) + .del(LinkedData::Services::RankSolrPropagator::LAST_PROPAGATED_REDIS_FIELD) + end + + def stub_rank(score) + LinkedData::Models::Ontology.stubs(:rank).returns( + { ACRONYM => { bioportalScore: 0.5, umlsScore: 0.0, normalizedScore: score } } + ) end def term_docs @@ -35,9 +49,7 @@ def test_propagate_writes_normalized_score_to_all_term_docs docs_before = term_docs refute_empty docs_before, 'expected BRO terms to be indexed in term_search' - LinkedData::Models::Ontology.stubs(:rank).returns( - { ACRONYM => { bioportalScore: 0.5, umlsScore: 0.0, normalizedScore: 0.642 } } - ) + stub_rank(0.642) updated = LinkedData::Services::RankSolrPropagator.new.propagate assert_equal 1, updated @@ -57,9 +69,7 @@ def test_propagate_preserves_searchable_fields )['response']['numFound'] refute_equal 0, before, 'expected a prefLabel match before propagation' - LinkedData::Models::Ontology.stubs(:rank).returns( - { ACRONYM => { bioportalScore: 0.5, umlsScore: 0.0, normalizedScore: 0.642 } } - ) + stub_rank(0.642) LinkedData::Services::RankSolrPropagator.new.propagate # The same search still works after atomic updates (copyField targets and @@ -75,4 +85,24 @@ def test_propagate_returns_zero_for_empty_rank_map LinkedData::Models::Ontology.stubs(:rank).returns({}) assert_equal 0, LinkedData::Services::RankSolrPropagator.new.propagate end + + def test_propagate_skips_unchanged_ontology_on_second_run + stub_rank(0.642) + + first = LinkedData::Services::RankSolrPropagator.new.propagate + assert_equal 1, first, 'first run should propagate the ontology' + + # Rank unchanged -> the second run should skip it entirely. + second = LinkedData::Services::RankSolrPropagator.new.propagate + assert_equal 0, second, 'unchanged ontology should be skipped on the second run' + end + + def test_force_repropagates_even_when_unchanged + stub_rank(0.642) + + LinkedData::Services::RankSolrPropagator.new.propagate + # force: true ignores the skip cache and re-propagates. + forced = LinkedData::Services::RankSolrPropagator.new.propagate(nil, force: true) + assert_equal 1, forced, 'force should re-propagate even when rank is unchanged' + end end From c3a52dfdd0278859a5968b9f5668c5fcd427f854 Mon Sep 17 00:00:00 2001 From: Michael Dorf Date: Mon, 22 Jun 2026 22:11:26 -0700 Subject: [PATCH 2/4] Fix RankSolrPropagator Solr stalls: commit between ontologies, add retry The weekly run stalled on staging with HTTP 500 'distributed update stalled' errors. Cause: commitWithin issued soft commits *during* the update stream, pausing replicas while the leader's forwarding queue backed up. Fixes: - drop commitWithin; send batches with commit:false and issue one commit *between* ontologies, when no updates are in flight - retry transient Solr errors (stalls, timeouts) with exponential backoff instead of aborting the run; combined with the skip cache an unrecoverable failure resumes where it left off - batch size default 2500, overridable via RANK_SOLR_BATCH_SIZE for staging tuning without a deploy Refs ncbo/ncbo_cron#132 --- .../services/rank_solr_propagator.rb | 71 +++++++++++++------ 1 file changed, 51 insertions(+), 20 deletions(-) diff --git a/lib/ontologies_linked_data/services/rank_solr_propagator.rb b/lib/ontologies_linked_data/services/rank_solr_propagator.rb index bb98c73f..0b6b1684 100644 --- a/lib/ontologies_linked_data/services/rank_solr_propagator.rb +++ b/lib/ontologies_linked_data/services/rank_solr_propagator.rb @@ -16,9 +16,13 @@ module Services # bioportalScore/umlsScore using the configured weights. # # Updates use true Solr atomic updates ({ id, ontologyRank: { set: score } }) - # so no per-class triplestore read is needed. Batches use commitWithin so - # Solr soft-commits on a bounded schedule instead of accumulating an - # unbounded transaction log; a final hard commit makes everything durable. + # so no per-class triplestore read is needed. Batches are sent without a + # commit; a single commit is issued *between* ontologies (when no updates are + # in flight) to keep the transaction log bounded without pausing replicas + # mid-stream — committing during the update stream backs up SolrCloud's + # leader->replica forwarding queue and causes "distributed update stalled" + # 500s. Each Solr call is retried with backoff so a transient stall is + # survived rather than aborting the whole run. # # Ontologies whose rank is unchanged since the last successful propagation # are skipped (tracked per acronym in Redis). Pass force: true to ignore the @@ -26,16 +30,18 @@ module Services # # See https://github.com/ncbo/ncbo_cron/issues/132 class RankSolrPropagator - DEFAULT_BATCH_SIZE = 5_000 - DEFAULT_COMMIT_WITHIN = 60_000 # ms; Solr soft-commit window during bulk updates - PROGRESS_EVERY = 50_000 # log per-ontology progress every N docs + DEFAULT_BATCH_SIZE = 2_500 + PROGRESS_EVERY = 50_000 # log per-ontology progress every N docs + MAX_RETRIES = 5 + RETRY_BASE_SLEEP = 5 # seconds; exponential backoff: 5, 10, 20, 40, 80 # Redis key holding the last successfully propagated rank per acronym. LAST_PROPAGATED_REDIS_FIELD = 'ontology_rank_solr_propagated' - def initialize(logger: nil, batch_size: DEFAULT_BATCH_SIZE, commit_within: DEFAULT_COMMIT_WITHIN) + # batch_size precedence: explicit arg > RANK_SOLR_BATCH_SIZE env > default. + # The env override lets the batch size be tuned on staging without a deploy. + def initialize(logger: nil, batch_size: nil) @logger = logger || Logger.new($stdout) - @batch_size = batch_size - @commit_within = commit_within + @batch_size = (batch_size || ENV['RANK_SOLR_BATCH_SIZE'] || DEFAULT_BATCH_SIZE).to_i end # Reads the rank map and writes each ontology's normalizedScore onto all of @@ -70,18 +76,18 @@ def propagate(rank_map = nil, force: false) updated += 1 if count.positive? end - LinkedData::Models::Class.indexCommit(nil) - log("final commit done; updated #{updated}, skipped #{skipped} unchanged (of #{total_onts})") + log("done; updated #{updated}, skipped #{skipped} unchanged (of #{total_onts})") updated end private - # Cursor-scans all term_search docs for the acronym and issues batched - # atomic updates to ontologyRank. Returns the number of docs updated. + # Cursor-scans all term_search docs for the acronym, issues batched atomic + # updates to ontologyRank (no commit), then commits once. Returns the + # number of docs updated. def update_ontology_rank(acronym, score) query = "submissionAcronym:\"#{acronym}\"" - num_found = (LinkedData::Models::Class.search(query, { rows: 0 }).dig('response', 'numFound') || 0).to_i + num_found = (solr_search(query, rows: 0).dig('response', 'numFound') || 0).to_i if num_found.zero? log(" #{acronym}: no term_search docs; skipping") @@ -94,16 +100,14 @@ def update_ontology_rank(acronym, score) last_logged = 0 loop do - resp = LinkedData::Models::Class.search( - query, { fl: 'id', rows: @batch_size, sort: 'id asc', cursorMark: cursor } - ) + resp = solr_search(query, fl: 'id', rows: @batch_size, sort: 'id asc', cursorMark: cursor) docs = resp.dig('response', 'docs') || [] unless docs.empty? batch = docs.map { |d| { id: d['id'], ontologyRank: { set: score } } } - LinkedData::Models::Class.search_client.index_document( - batch, commit: true, commit_within: @commit_within - ) + with_retry("update #{acronym}") do + LinkedData::Models::Class.search_client.index_document(batch, commit: false) + end total += batch.size if total - last_logged >= PROGRESS_EVERY @@ -118,10 +122,37 @@ def update_ontology_rank(acronym, score) cursor = next_cursor end + # Commit between ontologies (no updates in flight) — keeps the tlog + # bounded without stalling replica forwarding mid-stream. + log(" #{acronym}: committing #{total} docs…") + with_retry("commit #{acronym}") { LinkedData::Models::Class.indexCommit(nil) } log(" #{acronym}: done (#{total} docs)") total end + def solr_search(query, **params) + with_retry('search') { LinkedData::Models::Class.search(query, params) } + end + + # Retries transient Solr errors (e.g. distributed-update stalls under load) + # with exponential backoff. Re-raises once retries are exhausted. + def with_retry(what) + attempts = 0 + begin + yield + rescue RSolr::Error::Http, Net::ReadTimeout, Net::OpenTimeout, + Errno::ECONNRESET, Errno::EPIPE => e + attempts += 1 + raise if attempts > MAX_RETRIES + + wait = RETRY_BASE_SLEEP * (2**(attempts - 1)) + first_line = e.message.to_s.lines.first&.strip + log("#{what} failed (attempt #{attempts}/#{MAX_RETRIES}); retrying in #{wait}s — #{first_line}") + sleep(wait) + retry + end + end + def log(msg) @logger.info("RankSolrPropagator: #{msg}") @logger.flush if @logger.respond_to?(:flush) From c36ef753ef567d2a4001f83ebc8ff20a4544423d Mon Sep 17 00:00:00 2001 From: Michael Dorf Date: Mon, 22 Jun 2026 22:23:12 -0700 Subject: [PATCH 3/4] Make RankSolrPropagator backpressure visible: WARN on retry, count in summary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Retries were logged at INFO, buried among progress lines. Now each retry logs at WARN with a greppable BACKPRESSURE marker (ERROR when retries are exhausted), and the final summary reports 'Solr retries: N' — 0 means the retry/backoff path never ran (clean environment), and a non-zero count is flagged at WARN with a hint to lower RANK_SOLR_BATCH_SIZE. Adds tests that force a transient stall (asserting recovery, the warning, and the count) and that a clean run reports 0. Refs ncbo/ncbo_cron#132 --- .../services/rank_solr_propagator.rb | 26 +++++++++++--- test/services/test_rank_solr_propagator.rb | 34 +++++++++++++++++++ 2 files changed, 56 insertions(+), 4 deletions(-) diff --git a/lib/ontologies_linked_data/services/rank_solr_propagator.rb b/lib/ontologies_linked_data/services/rank_solr_propagator.rb index 0b6b1684..01b3509b 100644 --- a/lib/ontologies_linked_data/services/rank_solr_propagator.rb +++ b/lib/ontologies_linked_data/services/rank_solr_propagator.rb @@ -42,6 +42,7 @@ class RankSolrPropagator def initialize(logger: nil, batch_size: nil) @logger = logger || Logger.new($stdout) @batch_size = (batch_size || ENV['RANK_SOLR_BATCH_SIZE'] || DEFAULT_BATCH_SIZE).to_i + @retry_count = 0 end # Reads the rank map and writes each ontology's normalizedScore onto all of @@ -59,6 +60,7 @@ def propagate(rank_map = nil, force: false) total_onts = rank_map.size updated = 0 skipped = 0 + @retry_count = 0 rank_map.each_with_index do |(acronym, rank_info), i| score = rank_info[:normalizedScore].to_f @@ -76,7 +78,14 @@ def propagate(rank_map = nil, force: false) updated += 1 if count.positive? end - log("done; updated #{updated}, skipped #{skipped} unchanged (of #{total_onts})") + summary = "done; updated #{updated}, skipped #{skipped} unchanged (of #{total_onts}); Solr retries: #{@retry_count}" + if @retry_count.zero? + log(summary) + else + @logger.warn("RankSolrPropagator: BACKPRESSURE — #{summary}. " \ + 'Solr stalled and was retried; consider a smaller RANK_SOLR_BATCH_SIZE.') + @logger.flush if @logger.respond_to?(:flush) + end updated end @@ -143,11 +152,20 @@ def with_retry(what) rescue RSolr::Error::Http, Net::ReadTimeout, Net::OpenTimeout, Errno::ECONNRESET, Errno::EPIPE => e attempts += 1 - raise if attempts > MAX_RETRIES + @retry_count += 1 + first_line = e.message.to_s.lines.first&.strip + + if attempts > MAX_RETRIES + @logger.error("RankSolrPropagator: BACKPRESSURE — #{what} still failing after " \ + "#{MAX_RETRIES} retries; aborting — #{first_line}") + @logger.flush if @logger.respond_to?(:flush) + raise + end wait = RETRY_BASE_SLEEP * (2**(attempts - 1)) - first_line = e.message.to_s.lines.first&.strip - log("#{what} failed (attempt #{attempts}/#{MAX_RETRIES}); retrying in #{wait}s — #{first_line}") + @logger.warn("RankSolrPropagator: BACKPRESSURE — #{what} failed (attempt " \ + "#{attempts}/#{MAX_RETRIES}); retrying in #{wait}s — #{first_line}") + @logger.flush if @logger.respond_to?(:flush) sleep(wait) retry end diff --git a/test/services/test_rank_solr_propagator.rb b/test/services/test_rank_solr_propagator.rb index e14ddeaa..bdd9bc8e 100644 --- a/test/services/test_rank_solr_propagator.rb +++ b/test/services/test_rank_solr_propagator.rb @@ -1,5 +1,6 @@ require_relative '../models/test_ontology_common' require 'mocha/minitest' +require 'stringio' class TestRankSolrPropagator < LinkedData::TestOntologyCommon ACRONYM = 'BRO' @@ -105,4 +106,37 @@ def test_force_repropagates_even_when_unchanged forced = LinkedData::Services::RankSolrPropagator.new.propagate(nil, force: true) assert_equal 1, forced, 'force should re-propagate even when rank is unchanged' end + + # Manifests the Solr-stall condition: the first atomic-update POST raises a + # retryable error, and we assert the run recovers AND emits a visible signal + # (a BACKPRESSURE warning plus a non-zero retry count in the summary). + def test_backpressure_is_retried_and_logged + stub_rank(0.642) + + log_io = StringIO.new + propagator = LinkedData::Services::RankSolrPropagator.new(logger: Logger.new(log_io)) + propagator.stubs(:sleep) # skip the backoff wait in the test + + client = LinkedData::Models::Class.search_client + client.stubs(:index_document) + .raises(Errno::ECONNRESET.new('simulated Solr stall')) + .then.returns(true) + + updated = propagator.propagate + assert_equal 1, updated, 'run should recover from the transient error' + + log = log_io.string + assert_match(/BACKPRESSURE/, log, 'a retry must emit a filterable BACKPRESSURE warning') + assert_match(/Solr retries: 1/, log, 'summary must report the retry count') + end + + # A clean run reports zero retries, so "Solr retries: 0" is the all-clear. + def test_clean_run_reports_zero_retries + stub_rank(0.642) + + log_io = StringIO.new + LinkedData::Services::RankSolrPropagator.new(logger: Logger.new(log_io)).propagate + + assert_match(/Solr retries: 0/, log_io.string, 'clean run must report zero retries') + end end From e559fe9cf97fb6c7fcc061420b1ecc652b6a3de5 Mon Sep 17 00:00:00 2001 From: Michael Dorf Date: Tue, 23 Jun 2026 08:56:02 -0700 Subject: [PATCH 4/4] Retry RSolr ConnectionRefused and broader transient errors in RankSolrPropagator A momentary ConnectionRefused (Solr stayed up; transient connection hiccup) aborted a live run because with_retry only caught RSolr::Error::Http and a few Errno types. In rsolr's hierarchy ConnectionRefused < Errno::ECONNREFUSED, a separate family from Http, so it was never retried. Broaden the rescue list to cover the connection/timeout families (ConnectionRefused, ECONNREFUSED, ETIMEDOUT, Net::*Timeout, SocketError, ...) so transient blips are waited out and surface as BACKPRESSURE/retry-count instead of killing the run. Test now raises a real RSolr::Error::ConnectionRefused. Refs ncbo/ncbo_cron#132 --- .../services/rank_solr_propagator.rb | 6 ++++-- test/services/test_rank_solr_propagator.rb | 7 ++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/lib/ontologies_linked_data/services/rank_solr_propagator.rb b/lib/ontologies_linked_data/services/rank_solr_propagator.rb index 01b3509b..dfd6c09c 100644 --- a/lib/ontologies_linked_data/services/rank_solr_propagator.rb +++ b/lib/ontologies_linked_data/services/rank_solr_propagator.rb @@ -149,8 +149,10 @@ def with_retry(what) attempts = 0 begin yield - rescue RSolr::Error::Http, Net::ReadTimeout, Net::OpenTimeout, - Errno::ECONNRESET, Errno::EPIPE => e + rescue RSolr::Error::Http, RSolr::Error::ConnectionRefused, + Errno::ECONNREFUSED, Errno::ECONNRESET, Errno::EPIPE, Errno::ETIMEDOUT, + Net::ReadTimeout, Net::OpenTimeout, Net::WriteTimeout, + SocketError => e attempts += 1 @retry_count += 1 first_line = e.message.to_s.lines.first&.strip diff --git a/test/services/test_rank_solr_propagator.rb b/test/services/test_rank_solr_propagator.rb index bdd9bc8e..1c13c0dd 100644 --- a/test/services/test_rank_solr_propagator.rb +++ b/test/services/test_rank_solr_propagator.rb @@ -117,10 +117,11 @@ def test_backpressure_is_retried_and_logged propagator = LinkedData::Services::RankSolrPropagator.new(logger: Logger.new(log_io)) propagator.stubs(:sleep) # skip the backoff wait in the test + # RSolr::Error::ConnectionRefused is the error that previously slipped past + # the retry list and aborted a live run; assert it is now retried. client = LinkedData::Models::Class.search_client - client.stubs(:index_document) - .raises(Errno::ECONNRESET.new('simulated Solr stall')) - .then.returns(true) + conn_refused = RSolr::Error::ConnectionRefused.new(uri: URI('http://solr.test/term_search/select')) + client.stubs(:index_document).raises(conn_refused).then.returns(true) updated = propagator.propagate assert_equal 1, updated, 'run should recover from the transient error'