From 351ce663e83f3eeecce8e689c59b3bea83125577 Mon Sep 17 00:00:00 2001
From: fabrizzio-dotCMS
Date: Tue, 23 Jun 2026 10:56:22 -0600
Subject: [PATCH 1/9] fix(sitesearch): vendor-neutral aggregation abstraction
(#35786)
Decouple SiteSearchAPI/SiteSearchWebAPI from Elasticsearch aggregation
types so Site Search can be served by OpenSearch in Phase 3.
- Reuse the existing neutral com.dotcms.content.index.domain.Aggregation
/ AggregationBucket DTOs (from #36026) instead of a new IndexAggregation
- Add neutral DotSearchException (unchecked) to replace ElasticsearchException
on the public API surface
- SiteSearchAPI: drop org.elasticsearch.* imports; neutral Aggregation
return type; createSiteSearchIndex throws DotSearchException
- SiteSearchWebAPI: remove InternalDateHistogram/StringTerms/Bucket casts
and the Joda DateTime import; getFacets distinguishes histogram vs terms
by aggregation type and feeds the legacy wrappers neutral buckets
- ESSiteSearchAPI: adapt ES results via Aggregation.from(); ES exception
throws -> DotSearchException
- Add date/numeric histogram support to the neutral Aggregation ES factory
(also fixes a latent CCE: the old getFacets cast the histogram key to
Joda DateTime, which is a java.time.ZonedDateTime in ES 7.x)
OSSiteSearchAPI is deferred to #34609 (not yet in the codebase);
Aggregation.fromOS() is already in place for it.
Co-Authored-By: Claude Opus 4.8 (1M context)
---
.../sitesearch/ESSiteSearchAPI.java | 19 ++++----
.../content/index/domain/Aggregation.java | 6 +++
.../index/domain/AggregationBucket.java | 28 +++++++++++
.../index/domain/DotSearchException.java | 32 +++++++++++++
.../sitesearch/business/SiteSearchAPI.java | 6 +--
.../sitesearch/viewtool/SiteSearchWebAPI.java | 47 +++++++++++--------
6 files changed, 106 insertions(+), 32 deletions(-)
create mode 100644 dotCMS/src/main/java/com/dotcms/content/index/domain/DotSearchException.java
diff --git a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/ESSiteSearchAPI.java b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/ESSiteSearchAPI.java
index 09f1f54c96b7..b8b0dc3cd8ce 100644
--- a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/ESSiteSearchAPI.java
+++ b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/ESSiteSearchAPI.java
@@ -14,6 +14,8 @@
import com.dotcms.content.elasticsearch.business.*;
import com.dotcms.content.elasticsearch.util.RestHighLevelClientProvider;
import com.dotcms.content.index.IndexAPI;
+import com.dotcms.content.index.domain.Aggregation;
+import com.dotcms.content.index.domain.DotSearchException;
import com.dotcms.enterprise.LicenseUtil;
import com.dotcms.enterprise.license.LicenseLevel;
import com.dotcms.enterprise.priv.util.SearchSourceBuilderUtil;
@@ -64,7 +66,6 @@
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
-import org.elasticsearch.search.aggregations.Aggregation;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
@@ -351,7 +352,7 @@ public void deactivateIndex(String indexName) throws DotDataException, IOExcepti
}
@Override
- public synchronized boolean createSiteSearchIndex(String indexName, String alias, int shards) throws ElasticsearchException, IOException {
+ public synchronized boolean createSiteSearchIndex(String indexName, String alias, int shards) throws DotSearchException, IOException {
if(indexName==null){
return false;
}
@@ -379,7 +380,7 @@ public synchronized boolean createSiteSearchIndex(String indexName, String alias
}
if(i++ > 300){
- throw new ElasticsearchException("index timed out creating");
+ throw new DotSearchException("index timed out creating");
}
}
@@ -634,7 +635,7 @@ public Map getAggregations ( String indexName, String query
}
if ( indexName == null || !IndexType.SITE_SEARCH.is(indexName) ) {
- throw new ElasticsearchException( indexName + " is not a sitesearch index or alias" );
+ throw new DotSearchException( indexName + " is not a sitesearch index or alias" );
}
//https://github.com/elasticsearch/elasticsearch/issues/2980
@@ -648,10 +649,10 @@ public Map getAggregations ( String indexName, String query
.timeout(TimeValue.timeValueMillis(INDEX_OPERATIONS_TIMEOUT_IN_MS)));
final SearchResponse response = client.search(request, RequestOptions.DEFAULT);
- return response.getAggregations().asMap();
+ return Aggregation.from(response.getAggregations());
} catch ( ElasticsearchException | IOException e ) {
Logger.error( this.getClass(), "Error getting aggregations for query.\n" + e.getMessage(), e );
- throw new ElasticsearchException( "Error getting aggregations for query.\n" + e.getMessage(), e );
+ throw new DotSearchException( "Error getting aggregations for query.\n" + e.getMessage(), e );
}
}
@@ -669,7 +670,7 @@ public Map getFacets ( String indexName, String query ) thr
}
if ( indexName == null || !IndexType.SITE_SEARCH.is(indexName ) ) {
- throw new ElasticsearchException( indexName + " is not a sitesearch index or alias" );
+ throw new DotSearchException( indexName + " is not a sitesearch index or alias" );
}
//https://github.com/elasticsearch/elasticsearch/issues/2980
@@ -683,10 +684,10 @@ public Map getFacets ( String indexName, String query ) thr
.timeout(TimeValue.timeValueMillis(INDEX_OPERATIONS_TIMEOUT_IN_MS)));
final SearchResponse response = client.search(request, RequestOptions.DEFAULT);
- return response.getAggregations().asMap();
+ return Aggregation.from(response.getAggregations());
} catch ( ElasticsearchException | IOException e ) {
Logger.error( this.getClass(), "Error getting Facets for query.\n" + e.getMessage(), e );
- throw new ElasticsearchException( "Error getting Facets for query.\n" + e.getMessage(), e );
+ throw new DotSearchException( "Error getting Facets for query.\n" + e.getMessage(), e );
}
}
diff --git a/dotCMS/src/main/java/com/dotcms/content/index/domain/Aggregation.java b/dotCMS/src/main/java/com/dotcms/content/index/domain/Aggregation.java
index 48db102f1ed9..e94984f6ed3c 100644
--- a/dotCMS/src/main/java/com/dotcms/content/index/domain/Aggregation.java
+++ b/dotCMS/src/main/java/com/dotcms/content/index/domain/Aggregation.java
@@ -88,6 +88,12 @@ private static Aggregation fromSingle(final org.elasticsearch.search.aggregation
builder.buckets(terms.getBuckets().stream()
.map(AggregationBucket::from)
.collect(Collectors.toList()));
+ } else if (esAgg instanceof org.elasticsearch.search.aggregations.bucket.histogram.Histogram) {
+ final org.elasticsearch.search.aggregations.bucket.histogram.Histogram histogram =
+ (org.elasticsearch.search.aggregations.bucket.histogram.Histogram) esAgg;
+ builder.buckets(histogram.getBuckets().stream()
+ .map(AggregationBucket::fromHistogram)
+ .collect(Collectors.toList()));
} else if (esAgg instanceof org.elasticsearch.search.aggregations.metrics.TopHits) {
final org.elasticsearch.search.aggregations.metrics.TopHits topHits =
(org.elasticsearch.search.aggregations.metrics.TopHits) esAgg;
diff --git a/dotCMS/src/main/java/com/dotcms/content/index/domain/AggregationBucket.java b/dotCMS/src/main/java/com/dotcms/content/index/domain/AggregationBucket.java
index c8904dcc8c34..79929696d59d 100644
--- a/dotCMS/src/main/java/com/dotcms/content/index/domain/AggregationBucket.java
+++ b/dotCMS/src/main/java/com/dotcms/content/index/domain/AggregationBucket.java
@@ -92,6 +92,34 @@ public static AggregationBucket from(
.build();
}
+ /**
+ * Creates a bucket from an Elasticsearch histogram bucket (date or numeric), including its
+ * sub-aggregations. The key is normalized to its numeric form so {@link #getKeyAsNumber()}
+ * returns the epoch-millis (date histogram) or the numeric interval (numeric histogram):
+ * a date-histogram key is a {@code java.time.ZonedDateTime} in ES 7.x, not a number, so it is
+ * converted to epoch-millis here rather than via {@code getKeyAsString()} (which yields a
+ * formatted date).
+ */
+ public static AggregationBucket fromHistogram(
+ final org.elasticsearch.search.aggregations.bucket.histogram.Histogram.Bucket esBucket) {
+ return builder()
+ .key(histogramKey(esBucket.getKey()))
+ .docCount(esBucket.getDocCount())
+ .subAggregations(Aggregation.from(esBucket.getAggregations()))
+ .build();
+ }
+
+ /** Normalizes a histogram bucket key to a numeric String ({@link #getKeyAsNumber()}-friendly). */
+ private static String histogramKey(final Object key) {
+ if (key instanceof java.time.ZonedDateTime) {
+ return String.valueOf(((java.time.ZonedDateTime) key).toInstant().toEpochMilli());
+ }
+ if (key instanceof Number) {
+ return String.valueOf(((Number) key).longValue());
+ }
+ return String.valueOf(key);
+ }
+
// -------------------------------------------------------------------------
// OS factories
// -------------------------------------------------------------------------
diff --git a/dotCMS/src/main/java/com/dotcms/content/index/domain/DotSearchException.java b/dotCMS/src/main/java/com/dotcms/content/index/domain/DotSearchException.java
new file mode 100644
index 000000000000..6a45e5d0186b
--- /dev/null
+++ b/dotCMS/src/main/java/com/dotcms/content/index/domain/DotSearchException.java
@@ -0,0 +1,32 @@
+package com.dotcms.content.index.domain;
+
+import com.dotmarketing.exception.DotRuntimeException;
+
+/**
+ * Vendor-neutral search exception for the index abstraction layer.
+ *
+ *
Replaces {@code org.elasticsearch.ElasticsearchException} on the public surface of the
+ * search/site-search APIs so that callers — and the interfaces themselves — no longer couple to
+ * Elasticsearch (or any other engine) types. It is the neutral failure signal raised by both the
+ * Elasticsearch and OpenSearch providers when a search or index operation cannot be completed.
+ *
+ *
It extends {@link DotRuntimeException} (and therefore is unchecked) to mirror the unchecked
+ * nature of {@code ElasticsearchException}: existing callers that never declared a {@code catch}
+ * for the vendor exception keep compiling unchanged.
+ */
+public class DotSearchException extends DotRuntimeException {
+
+ private static final long serialVersionUID = 1L;
+
+ public DotSearchException(final String message) {
+ super(message);
+ }
+
+ public DotSearchException(final Throwable cause) {
+ super(cause);
+ }
+
+ public DotSearchException(final String message, final Throwable cause) {
+ super(message, cause);
+ }
+}
\ No newline at end of file
diff --git a/dotCMS/src/main/java/com/dotmarketing/sitesearch/business/SiteSearchAPI.java b/dotCMS/src/main/java/com/dotmarketing/sitesearch/business/SiteSearchAPI.java
index ac2031f1ac73..7a13c33847b1 100644
--- a/dotCMS/src/main/java/com/dotmarketing/sitesearch/business/SiteSearchAPI.java
+++ b/dotCMS/src/main/java/com/dotmarketing/sitesearch/business/SiteSearchAPI.java
@@ -5,10 +5,10 @@
import java.util.List;
import java.util.Map;
-import org.elasticsearch.ElasticsearchException;
-import org.elasticsearch.search.aggregations.Aggregation;
import org.quartz.SchedulerException;
+import com.dotcms.content.index.domain.Aggregation;
+import com.dotcms.content.index.domain.DotSearchException;
import com.dotcms.enterprise.publishing.sitesearch.SiteSearchConfig;
import com.dotcms.enterprise.publishing.sitesearch.SiteSearchPublishStatus;
import com.dotcms.enterprise.publishing.sitesearch.SiteSearchResult;
@@ -36,7 +36,7 @@ public interface SiteSearchAPI {
void deactivateIndex(String indexName) throws DotDataException, IOException;
- boolean createSiteSearchIndex(String indexName, String alias, int shards) throws ElasticsearchException, IOException;
+ boolean createSiteSearchIndex(String indexName, String alias, int shards) throws DotSearchException, IOException;
boolean setAlias(String indexName, final String alias);
diff --git a/dotCMS/src/main/java/com/dotmarketing/sitesearch/viewtool/SiteSearchWebAPI.java b/dotCMS/src/main/java/com/dotmarketing/sitesearch/viewtool/SiteSearchWebAPI.java
index de09cbcff072..ed3bf36bb8dd 100644
--- a/dotCMS/src/main/java/com/dotmarketing/sitesearch/viewtool/SiteSearchWebAPI.java
+++ b/dotCMS/src/main/java/com/dotmarketing/sitesearch/viewtool/SiteSearchWebAPI.java
@@ -1,6 +1,8 @@
package com.dotmarketing.sitesearch.viewtool;
import com.dotcms.content.index.IndexAPI;
+import com.dotcms.content.index.domain.Aggregation;
+import com.dotcms.content.index.domain.AggregationBucket;
import com.dotcms.enterprise.publishing.sitesearch.SiteSearchResults;
import com.dotmarketing.beans.Host;
import com.dotmarketing.business.APILocator;
@@ -13,16 +15,11 @@
import com.dotmarketing.util.StringUtils;
import org.apache.velocity.tools.view.context.ViewContext;
import org.apache.velocity.tools.view.tools.ViewTool;
-import org.elasticsearch.search.aggregations.Aggregation;
-import org.elasticsearch.search.aggregations.bucket.histogram.InternalDateHistogram;
-import org.elasticsearch.search.aggregations.bucket.terms.StringTerms;
-import org.elasticsearch.search.aggregations.bucket.terms.StringTerms.Bucket;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
import java.util.*;
-import org.joda.time.DateTime;
public class SiteSearchWebAPI implements ViewTool {
@@ -173,15 +170,16 @@ public Map getFacets(final String indexName, final String query)
for (String key : aggregations.keySet()) {
final Aggregation aggregation = aggregations.get(key);
+ final String type = aggregation.getType();
- if (aggregation instanceof InternalDateHistogram) {
+ if (isHistogram(type)) {
internalFacet = new InternalWrapperCountDateHistogramFacet(aggregation.getName(),
- aggregation.getType(), ((InternalDateHistogram) aggregation).getBuckets());
- } else if (aggregation instanceof StringTerms) {
+ type, aggregation.getBuckets());
+ } else if (!aggregation.getBuckets().isEmpty()) {
internalFacet = new InternalWrapperStringTermsFacet(aggregation.getName(),
- aggregation.getType(), ((StringTerms) aggregation).getBuckets());
+ type, aggregation.getBuckets());
} else {
- internalFacet = new Facet(aggregation.getName(), aggregation.getType());
+ internalFacet = new Facet(aggregation.getName(), type);
}
internalFacets.put(key, internalFacet);
}
@@ -189,23 +187,32 @@ public Map getFacets(final String indexName, final String query)
return internalFacets;
}
+ /**
+ * A histogram aggregation (date or numeric) reports a vendor type containing
+ * {@code "histogram"} (e.g. {@code date_histogram}); its buckets carry numeric keys.
+ */
+ private static boolean isHistogram(final String type) {
+ return type != null && type.contains("histogram");
+ }
+
/**
* Internal wrapper class for backwards compatibility with the new Elastic Search in Site
* Search.
*
- * @deprecated use ES Aggregations instead
+ * @deprecated use the vendor-neutral {@link #getAggregations(String, String)} instead
*/
public class InternalWrapperCountDateHistogramFacet extends Facet {
private final List entries;
public InternalWrapperCountDateHistogramFacet(final String name, final String type,
- List entries) {
+ List entries) {
super(name, type);
this.entries = new ArrayList<>();
- for (final InternalDateHistogram.Bucket entry : entries) {
- this.entries.add(new CountEntry(((DateTime) entry.getKey()).getMillis(),
- entry.getDocCount()));
+ for (final AggregationBucket entry : entries) {
+ final Number key = entry.getKeyAsNumber();
+ final long time = key != null ? key.longValue() : 0L;
+ this.entries.add(new CountEntry(time, entry.getDocCount()));
}
}
@@ -237,20 +244,20 @@ public long getCount() {
* Internal wrapper class for backwards compatibility with the new Elastic Search in Site
* Search.
*
- * @deprecated use ES Aggregations instead
+ * @deprecated use the vendor-neutral {@link #getAggregations(String, String)} instead
*/
public class InternalWrapperStringTermsFacet extends Facet {
private List entries;
- public InternalWrapperStringTermsFacet(final String name, final String type, final List entries) {
+ public InternalWrapperStringTermsFacet(final String name, final String type, final List entries) {
super(name, type);
this.entries = new ArrayList<>();
- for (final Bucket entry : entries) {
+ for (final AggregationBucket entry : entries) {
this.entries
- .add(new InternalTermEntry(entry.getKey().toString(), entry.getDocCount()));
+ .add(new InternalTermEntry(entry.getKey(), entry.getDocCount()));
}
}
@@ -279,7 +286,7 @@ public long getCount() {
}
/**
- * @deprecated use ES Aggregations instead
+ * @deprecated use the vendor-neutral {@link #getAggregations(String, String)} instead
*/
public class Facet {
From b0646854130ce26fa2cb7823bbd281da08491f6b Mon Sep 17 00:00:00 2001
From: fabrizzio-dotCMS
Date: Tue, 23 Jun 2026 12:29:53 -0600
Subject: [PATCH 2/9] feat(sitesearch): OpenSearch impl + phase-aware router
for Site Search (#35786)
Completes the vendor-neutral Site Search extraction begun in #35786 by adding
the OpenSearch implementation and a phase-aware router, so Site Search dual-writes
and reads correctly across the ES -> OS migration phases.
- OSSiteSearchAPI: @ApplicationScoped @Default OpenSearch implementation of
SiteSearchAPI. Search/aggregations via the generic client -> ContentSearchResponse
(mirrors OSSearchAPIImpl); doc put/delete via _doc PUT/DELETE; get via typed
client.get(...). Default site-search index resolved from VersionedIndicesAPI
(not the deprecated IndiciesAPI). Index names handled in logical space; the
.os tag forced by VersionedIndicesAPI is stripped on read.
- SiteSearchAPIImpl: PhaseRouter router mirroring IndexAPIImpl and
acting as the single fan-out point. Reads -> read provider; doc/index writes ->
write fan-out; listIndices/listClosedIndices merge in dual-write; Quartz task
methods route to a single provider (fan-out would double-schedule jobs).
- ESSiteSearchAPI: use raw ESIndexAPI instead of the IndexAPI router so the
SiteSearch router is the only fan-out point (avoids double dual-write).
- APILocator: SITESEARCH_API now returns SiteSearchAPIImpl.
- OSSiteSearchAPIIntegrationTest: lifecycle, doc round-trip, aggregations, and
default-index activation; registered in OpenSearchUpgradeSuite.
Co-Authored-By: Claude Opus 4.8 (1M context)
---
.../sitesearch/ESSiteSearchAPI.java | 6 +-
.../sitesearch/OSSiteSearchAPI.java | 824 ++++++++++++++++++
.../sitesearch/SiteSearchAPIImpl.java | 288 ++++++
.../com/dotmarketing/business/APILocator.java | 3 +-
.../com/dotcms/OpenSearchUpgradeSuite.java | 4 +-
.../OSSiteSearchAPIIntegrationTest.java | 300 +++++++
6 files changed, 1422 insertions(+), 3 deletions(-)
create mode 100644 dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java
create mode 100644 dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/SiteSearchAPIImpl.java
create mode 100644 dotcms-integration/src/test/java/com/dotcms/content/index/opensearch/OSSiteSearchAPIIntegrationTest.java
diff --git a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/ESSiteSearchAPI.java b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/ESSiteSearchAPI.java
index b8b0dc3cd8ce..3197bd4b0735 100644
--- a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/ESSiteSearchAPI.java
+++ b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/ESSiteSearchAPI.java
@@ -89,7 +89,11 @@ public ESSiteSearchAPI(final IndexAPI indexApi,
}
public ESSiteSearchAPI() {
- this(APILocator.getESIndexAPI(), new ESMappingAPIImpl(), APILocator.getIndiciesAPI());
+ // Use the vendor-specific ESIndexAPI directly (NOT APILocator.getESIndexAPI(), which returns
+ // the phase-aware IndexAPIImpl router). The SiteSearchAPIImpl router is the single fan-out
+ // point for the ES → OS migration; routing index ops through the neutral router here as well
+ // would dual-write a second time and create duplicate OpenSearch indices.
+ this(new ESIndexAPI(), new ESMappingAPIImpl(), APILocator.getIndiciesAPI());
}
/**
diff --git a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java
new file mode 100644
index 000000000000..c1d226d23460
--- /dev/null
+++ b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java
@@ -0,0 +1,824 @@
+/*
+*
+* Copyright (c) 2025 dotCMS LLC
+* Use of this software is governed by the Business Source License included
+* in the LICENSE file found at in the root directory of software.
+* SPDX-License-Identifier: BUSL-1.1
+*
+*/
+
+package com.dotcms.enterprise.publishing.sitesearch;
+
+import com.dotcms.cdi.CDIUtils;
+import com.dotcms.content.elasticsearch.business.ContentletIndexAPIImpl;
+import com.dotcms.content.elasticsearch.business.ESMappingAPIImpl;
+import com.dotcms.content.elasticsearch.business.IndexType;
+import com.dotcms.content.index.IndexAPI;
+import com.dotcms.content.index.IndexTag;
+import com.dotcms.content.index.VersionedIndices;
+import com.dotcms.content.index.VersionedIndicesAPI;
+import com.dotcms.content.index.VersionedIndicesImpl;
+import com.dotcms.content.index.domain.Aggregation;
+import com.dotcms.content.index.domain.ContentSearchResponse;
+import com.dotcms.content.index.domain.DotSearchException;
+import com.dotcms.content.index.domain.SearchHit;
+import com.dotcms.content.index.domain.SearchHits;
+import com.dotcms.content.index.opensearch.MappingOperationsOS;
+import com.dotcms.content.index.opensearch.OSClientProvider;
+import com.dotcms.content.index.opensearch.OSIndexAPIImpl;
+import com.dotcms.enterprise.LicenseUtil;
+import com.dotcms.enterprise.license.LicenseLevel;
+import com.dotcms.publishing.job.SiteSearchJobProxy;
+import com.dotmarketing.business.APILocator;
+import com.dotmarketing.exception.DotDataException;
+import com.dotmarketing.exception.DotRuntimeException;
+import com.dotmarketing.quartz.CronScheduledTask;
+import com.dotmarketing.quartz.QuartzUtils;
+import com.dotmarketing.quartz.ScheduledTask;
+import com.dotmarketing.quartz.TaskRuntimeValues;
+import com.dotmarketing.sitesearch.business.SiteSearchAPI;
+import com.dotmarketing.util.Logger;
+import com.dotmarketing.util.StringUtils;
+import com.dotmarketing.util.UUIDGenerator;
+import com.dotmarketing.util.UtilMethods;
+import com.dotmarketing.util.json.JSONArray;
+import com.dotmarketing.util.json.JSONException;
+import com.dotmarketing.util.json.JSONObject;
+import com.google.common.annotations.VisibleForTesting;
+import io.vavr.control.Try;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.time.Duration;
+import java.time.Instant;
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.stream.Collectors;
+import javax.enterprise.context.ApplicationScoped;
+import javax.enterprise.inject.Default;
+import javax.inject.Inject;
+import org.opensearch.client.json.JsonpDeserializer;
+import org.opensearch.client.json.JsonpMapper;
+import org.opensearch.client.opensearch.OpenSearchClient;
+import org.opensearch.client.opensearch.core.GetResponse;
+import org.opensearch.client.opensearch.core.SearchResponse;
+import org.opensearch.client.opensearch.generic.Bodies;
+import org.opensearch.client.opensearch.generic.Body;
+import org.opensearch.client.opensearch.generic.Requests;
+import org.opensearch.client.opensearch.generic.Response;
+import org.quartz.SchedulerException;
+
+/**
+ * OpenSearch implementation of {@link SiteSearchAPI}.
+ *
+ *
Vendor-specific counterpart to {@link ESSiteSearchAPI}. The two implementations are kept
+ * functionally symmetric and are selected at runtime by the {@link SiteSearchAPIImpl} router based
+ * on the migration phase. This class confines every {@code org.opensearch.*} type to its private
+ * helpers — the {@link SiteSearchAPI} contract it implements is vendor-neutral.
+ *
+ *
Index source of truth
+ *
Where {@link ESSiteSearchAPI} reads the active site-search index name from the legacy
+ * {@code IndiciesAPI}, this class uses {@link VersionedIndicesAPI} — the canonical OpenSearch index
+ * registry — via the {@code siteSearch} slot of the default ({@link VersionedIndices#OPENSEARCH_3X})
+ * versioned indices. Index lifecycle operations (create/list/delete/alias) are delegated to
+ * the OpenSearch {@link IndexAPI} provider ({@link OSIndexAPIImpl}) directly rather than the neutral
+ * router, because the {@link SiteSearchAPIImpl} router is already the single phase-aware fan-out point
+ * — routing through the neutral {@code IndexAPI} router here would dual-write a second time.
+ *
+ *
Index naming
+ *
Site-search index names are handled as plain logical names (e.g. {@code sitesearch_1718000000000}),
+ * exactly as in {@link ESSiteSearchAPI}: the cluster-id prefix is added only when a name reaches the
+ * OpenSearch client (via {@link IndexAPI#getNameWithClusterIDPrefix(String)}). The {@code .os}
+ * {@link com.dotcms.content.index.IndexTag} is intentionally not applied to site-search indices —
+ * production ES and OS run on separate clusters, and the site-search pointer lives in its own
+ * {@code siteSearch} slot, so there is no shared-name collision to disambiguate.
+ * TODO OS: revisit if single-cluster dual-write of site-search is ever required (then tag with
+ * {@code IndexTag.OS}).
+ *
+ * @author Fabrizio Araya
+ * @see ESSiteSearchAPI
+ * @see SiteSearchAPIImpl
+ * @see com.dotcms.content.index.opensearch.OSSearchAPIImpl
+ */
+@ApplicationScoped
+@Default
+public class OSSiteSearchAPI implements SiteSearchAPI {
+
+ /**
+ * Response deserializer with {@code TDocument} bound to {@code Object} (JSON objects become
+ * {@code Map}). The query body is sent through the low-level (generic) client so nested
+ * sub-aggregations are preserved; the bare {@code SearchResponse._DESERIALIZER} has no document
+ * deserializer bound and would fail on a hit carrying a {@code _source}. Mirrors
+ * {@link com.dotcms.content.index.opensearch.OSSearchAPIImpl}.
+ */
+ private static final JsonpDeserializer> SEARCH_RESPONSE_DESERIALIZER =
+ SearchResponse.createSearchResponseDeserializer(JsonpDeserializer.of(Object.class));
+
+ private final OSClientProvider clientProvider;
+ private final IndexAPI indexApi;
+ private final MappingOperationsOS mappingOperations;
+
+ /** CDI-managed constructor. */
+ @Inject
+ public OSSiteSearchAPI() {
+ this(CDIUtils.getBeanThrows(OSClientProvider.class),
+ CDIUtils.getBeanThrows(OSIndexAPIImpl.class),
+ CDIUtils.getBeanThrows(MappingOperationsOS.class));
+ }
+
+ /** Package-private constructor for testing. */
+ @VisibleForTesting
+ OSSiteSearchAPI(final OSClientProvider clientProvider,
+ final IndexAPI indexApi,
+ final MappingOperationsOS mappingOperations) {
+ this.clientProvider = clientProvider;
+ this.indexApi = indexApi;
+ this.mappingOperations = mappingOperations;
+ }
+
+ // =========================================================================
+ // Index listing
+ // =========================================================================
+
+ @Override
+ public List listIndices() {
+ if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) {
+ return Collections.emptyList();
+ }
+ final List indices = indexApi.listIndices().stream()
+ .filter(IndexType.SITE_SEARCH::is)
+ .collect(Collectors.toList());
+
+ Collections.sort(indices);
+ Collections.reverse(indices);
+ setDefaultToSpecificPosition(indices, 0);
+ return indices;
+ }
+
+ /**
+ * Moves the active (default) site-search index to {@code indexPosition} of the list, mirroring
+ * {@link ESSiteSearchAPI} but resolving the default from {@link VersionedIndicesAPI}.
+ */
+ private void setDefaultToSpecificPosition(final List list, final int indexPosition) {
+ if (list == null || list.size() <= 1) {
+ return;
+ }
+ final String defaultIndice = defaultSiteSearchIndex().orElse(null);
+ if (UtilMethods.isSet(defaultIndice) && !list.isEmpty()) {
+ final int index = list.indexOf(defaultIndice);
+ if (index < 0) {
+ Logger.warn(this.getClass(), String.format(
+ "The default site search '%s' index was not found in the list of indices.",
+ defaultIndice));
+ } else {
+ list.remove(index);
+ list.add(indexPosition, defaultIndice);
+ }
+ }
+ }
+
+ @Override
+ public List listClosedIndices() {
+ final List indices = new ArrayList<>();
+ if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) {
+ return indices;
+ }
+ for (final String indexName : indexApi.getClosedIndexes()) {
+ if (IndexType.SITE_SEARCH.is(indexName)) {
+ indices.add(indexName);
+ }
+ }
+ Collections.sort(indices);
+ Collections.reverse(indices);
+ return indices;
+ }
+
+ // =========================================================================
+ // Search & aggregations
+ // =========================================================================
+
+ @Override
+ public SiteSearchResults search(final String query, final int start, final int rows) {
+ final SiteSearchResults results = new SiteSearchResults();
+ if (query == null) {
+ results.setError("null query");
+ return results;
+ }
+ if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) {
+ return results;
+ }
+ try {
+ return search(defaultSiteSearchIndex().orElse(null), query, start, rows);
+ } catch (final Exception e) {
+ results.setError(e.getMessage());
+ }
+ return results;
+ }
+
+ @Override
+ public SiteSearchResults search(String indexName, String query, final int offset, final int limit) {
+ if (!UtilMethods.isSet(query)) {
+ query = "*";
+ }
+ final SiteSearchResults results = new SiteSearchResults();
+
+ if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) {
+ return results;
+ }
+
+ final boolean isJson = StringUtils.isJson(query);
+
+ //https://github.com/elasticsearch/elasticsearch/issues/2980
+ if (query.contains("/")) {
+ query = query.replaceAll("/", "\\\\/");
+ }
+
+ results.setQuery(query);
+ results.setLimit(limit);
+ results.setOffset(offset);
+
+ try {
+ if (indexName == null) {
+ indexName = defaultSiteSearchIndex().orElse(null);
+ }
+ if (!IndexType.SITE_SEARCH.is(indexName)) {
+ throw new DotSearchException(indexName + " is not a sitesearch index");
+ }
+ results.setIndex(indexName);
+
+ final JSONObject body;
+ if (!isJson) {
+ body = new JSONObject();
+ body.put("query", new JSONObject().put("query_string",
+ new JSONObject().put("query", query).put("default_field", "*")));
+ if (limit > 0) {
+ body.put("size", limit);
+ }
+ if (offset > 0) {
+ body.put("from", offset);
+ }
+ body.put("highlight", new JSONObject().put("fields",
+ new JSONObject().put("content", new JSONObject().put("fragment_size", 255))));
+ } else {
+ body = new JSONObject(query);
+ }
+
+ final ContentSearchResponse response = rawSearch(physicalName(indexName), body);
+ results.setTook(response.tookMillis() + "ms");
+ if (!isJson) {
+ results.setQuery(body.toString());
+ }
+
+ final SearchHits hits = response.hits();
+ results.setTotalResults(hits.getTotalHits().value());
+
+ float maxScore = 0f;
+ for (final SearchHit hit : hits) {
+ final SiteSearchResult ssr = new SiteSearchResult(new HashMap<>(hit.getSourceAsMap()));
+ ssr.setScore(hit.getScore());
+ maxScore = Math.max(maxScore, hit.getScore());
+ // TODO OS: the neutral SearchHit DTO does not carry per-field highlights yet.
+ // Site-search highlights are a best-effort extra (the ES path also swallows
+ // highlight failures); set empty until the neutral hit exposes highlight fragments.
+ ssr.setHighLight(new String[0]);
+ results.getResults().add(ssr);
+ }
+ results.setMaxScore(maxScore);
+
+ } catch (final Exception e) {
+ Logger.error(OSSiteSearchAPI.class, e.getMessage(), e);
+ results.setError(e.getMessage());
+ }
+
+ return results;
+ }
+
+ @Override
+ public Map getAggregations(String indexName, String query)
+ throws DotDataException {
+ indexName = resolveIndexOrAlias(indexName);
+ if (indexName == null || !IndexType.SITE_SEARCH.is(indexName)) {
+ throw new DotSearchException(indexName + " is not a sitesearch index or alias");
+ }
+
+ //https://github.com/elasticsearch/elasticsearch/issues/2980
+ if (query.contains("/")) {
+ query = query.replaceAll("/", "\\\\\\\\/");
+ }
+
+ try {
+ final ContentSearchResponse response = rawSearch(physicalName(indexName), new JSONObject(query));
+ return response.aggregationTree();
+ } catch (final Exception e) {
+ Logger.error(this.getClass(), "Error getting aggregations for query.\n" + e.getMessage(), e);
+ throw new DotSearchException("Error getting aggregations for query.\n" + e.getMessage(), e);
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ *
+ * @deprecated use {@link #getAggregations(String, String)} instead.
+ */
+ @Deprecated
+ @Override
+ public Map getFacets(String indexName, String query) throws DotDataException {
+ indexName = resolveIndexOrAlias(indexName);
+ if (indexName == null || !IndexType.SITE_SEARCH.is(indexName)) {
+ throw new DotSearchException(indexName + " is not a sitesearch index or alias");
+ }
+
+ //https://github.com/elasticsearch/elasticsearch/issues/2980
+ if (query.contains("/")) {
+ query = query.replaceAll("/", "\\\\\\\\/");
+ }
+
+ try {
+ final ContentSearchResponse response = rawSearch(physicalName(indexName), new JSONObject(query));
+ return response.aggregationTree();
+ } catch (final Exception e) {
+ Logger.error(this.getClass(), "Error getting Facets for query.\n" + e.getMessage(), e);
+ throw new DotSearchException("Error getting Facets for query.\n" + e.getMessage(), e);
+ }
+ }
+
+ // =========================================================================
+ // Default index activation / inspection
+ // =========================================================================
+
+ @Override
+ public boolean isDefaultIndex(final String indexName) throws DotDataException {
+ return indexName != null && indexName.equals(defaultSiteSearchIndex().orElse(null));
+ }
+
+ @Override
+ public void activateIndex(final String indexName) throws DotDataException {
+ if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) {
+ return;
+ }
+ if (!IndexType.SITE_SEARCH.is(indexName)) {
+ return;
+ }
+ final VersionedIndicesImpl.Builder builder = copyDefaultIndices();
+ builder.siteSearch(indexName);
+ saveDefaultIndices(builder);
+ }
+
+ @Override
+ public void deactivateIndex(final String indexName) throws DotDataException, IOException {
+ if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) {
+ return;
+ }
+ if (!IndexType.SITE_SEARCH.is(indexName)) {
+ return;
+ }
+ // Rebuild the default indices without the site-search slot. saveIndices() does a
+ // delete-by-version then re-insert, so omitting the slot clears the pointer while preserving
+ // the content live/working rows. If site-search was the ONLY slot for this version, the
+ // rebuilt info would be empty (saveIndices rejects empty), so drop the version row instead.
+ final VersionedIndicesImpl rebuilt = copyDefaultIndicesExceptSiteSearch().build();
+ final VersionedIndicesAPI api = APILocator.getVersionedIndicesAPI();
+ if (rebuilt.hasAnyIndex()) {
+ api.saveIndices(rebuilt);
+ } else {
+ api.removeVersion(rebuilt.version());
+ }
+ api.clearCache();
+ }
+
+ // =========================================================================
+ // Index creation / mapping
+ // =========================================================================
+
+ @Override
+ public synchronized boolean createSiteSearchIndex(String indexName, final String alias, final int shards)
+ throws DotSearchException, IOException {
+ if (indexName == null) {
+ return false;
+ }
+ if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) {
+ return false;
+ }
+
+ indexName = indexName.toLowerCase();
+ final ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
+ URL url = classLoader.getResource("es-sitesearch-settings.json");
+ final String settings = new String(com.liferay.util.FileUtil.getBytes(new File(url.getPath())));
+ url = classLoader.getResource("es-sitesearch-mapping.json");
+ final String mapping = new String(com.liferay.util.FileUtil.getBytes(new File(url.getPath())));
+
+ try {
+ indexApi.createIndex(indexName, settings, shards);
+ } catch (final Exception e) {
+ throw new DotSearchException("Error creating OpenSearch site search index: " + e.getMessage(), e);
+ }
+
+ if (UtilMethods.isSet(alias)) {
+ indexApi.createAlias(indexName, alias);
+ }
+
+ try {
+ mappingOperations.putMapping(List.of(indexName), mapping);
+ } catch (final IOException e) {
+ throw new DotSearchException("Error applying mapping to OpenSearch site search index: "
+ + e.getMessage(), e);
+ }
+
+ return true;
+ }
+
+ @Override
+ public synchronized boolean setAlias(String indexName, final String alias) {
+ if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) {
+ return false;
+ }
+ if (UtilMethods.isNotSet(indexName) || UtilMethods.isNotSet(alias)) {
+ throw new IllegalArgumentException(String.format(
+ " either one or both params aren't set. index: `%s`, alias: `%s` ", indexName, alias));
+ }
+ indexName = indexName.toLowerCase();
+ indexApi.createAlias(indexName, alias);
+ return false;
+ }
+
+ /**
+ * Mirrors {@link ESSiteSearchAPI#deleteOldSiteSearchIndices()} but resolves the active index from
+ * {@link VersionedIndicesAPI} and deletes through the OpenSearch {@link IndexAPI} provider.
+ */
+ @Override
+ public void deleteOldSiteSearchIndices() {
+ final List indicesToRemove = new ArrayList<>(listIndices());
+
+ // Keep the default (active) site-search index.
+ defaultSiteSearchIndex().ifPresent(indicesToRemove::remove);
+
+ // Keep any index that backs an alias.
+ final List indicesWithAlias =
+ new ArrayList<>(indexApi.getIndexAlias(indicesToRemove).keySet());
+ indicesToRemove.removeAll(indicesWithAlias);
+
+ // Keep indices created within the last 24 hours.
+ final Date yesterday = Date.from(Instant.now().minus(Duration.ofDays(1)));
+ final long yesterdayTimestamp =
+ Long.parseLong(ContentletIndexAPIImpl.timestampFormatter.format(yesterday));
+
+ final List recent = new ArrayList<>();
+ for (final String index : indicesToRemove) {
+ try {
+ final long indexTimestamp = Long.parseLong(index.split("_")[1]);
+ if (indexTimestamp >= yesterdayTimestamp) {
+ recent.add(index);
+ }
+ } catch (final RuntimeException e) {
+ Logger.warn(this.getClass(),
+ "Unable to parse timestamp from site search index '" + index + "': " + e.getMessage());
+ }
+ }
+ indicesToRemove.removeAll(recent);
+
+ if (!indicesToRemove.isEmpty()) {
+ Logger.info(this.getClass(),
+ "The following indices will be deleted: " + String.join(",", indicesToRemove));
+ indexApi.deleteMultiple(indicesToRemove.toArray(new String[0]));
+ }
+ }
+
+ // =========================================================================
+ // Document operations
+ // =========================================================================
+
+ @Override
+ public void putToIndex(final String idx, final SiteSearchResult res, final String resultType) {
+ if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) {
+ return;
+ }
+ try {
+ if (res.getContentLength() == 0) {
+ return;
+ }
+ if (res.getTitle() == null && res.getFileName() != null) {
+ res.setTitle(res.getFileName());
+ }
+
+ // Strip HTML out of text content.
+ if (res.getContent() != null && UtilMethods.isSet(res.getMimeType())
+ && res.getMimeType().contains("text/")) {
+ res.getMap().put("content_raw", res.getContent());
+ res.setContent(res.getContent().replaceAll("\\<.*?\\>", ""));
+ }
+
+ String desc = res.getDescription();
+ if (!UtilMethods.isSet(res.getDescription()) && UtilMethods.isSet(res.getContent())) {
+ desc = UtilMethods.prettyShortenString(res.getContent(), 500);
+ }
+ res.setDescription(desc);
+
+ if (res.getMap().containsKey("keywords") && res.getMap().containsKey("seokeywords")) {
+ res.setKeywords((String) res.getMap().get("seokeywords"));
+ } else {
+ res.setKeywords((String) res.getMap().get("keywords"));
+ }
+
+ Logger.info(this.getClass(),
+ "writing from : " + idx + " type: " + resultType + " url:" + res.getUrl());
+ final String json = new ESMappingAPIImpl().toJsonString(res.getMap());
+
+ final String endpoint = "/" + physicalName(idx) + "/_doc/" + res.getId();
+ try (final Response response = clientProvider.getClient().generic()
+ .execute(Requests.builder()
+ .method("PUT")
+ .endpoint(endpoint)
+ .query(Map.of("refresh", "true"))
+ .body(Bodies.json(json))
+ .build())) {
+ final int status = response.getStatus();
+ if (status < 200 || status >= 300) {
+ Logger.error(this.getClass(), "putToIndex failed for doc " + res.getId()
+ + " — HTTP " + status);
+ }
+ }
+ } catch (final Exception e) {
+ Logger.error(OSSiteSearchAPI.class, e.getMessage(), e);
+ }
+ }
+
+ @Override
+ public void putToIndex(final String idx, final List res, final String resultType) {
+ if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) {
+ return;
+ }
+ for (final SiteSearchResult r : res) {
+ putToIndex(idx, r, resultType);
+ }
+ }
+
+ @Override
+ @SuppressWarnings({"unchecked", "rawtypes"})
+ public SiteSearchResult getFromIndex(final String index, final String id) {
+ if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) {
+ return null;
+ }
+ try {
+ final String physical = physicalName(index);
+ final GetResponse
+ *
+ * @author Fabrizio Araya
+ */
+@ApplicationScoped
+@RunWith(DataProviderWeldRunner.class)
+public class OSSiteSearchAPIIntegrationTest extends IntegrationTestBase {
+
+ private static final String RUN_ID =
+ UUID.randomUUID().toString().replace("-", "").substring(0, 8);
+
+ /** Numeric suffix so names match the {@code sitesearch_} convention. */
+ private static final String SUFFIX = String.valueOf(Math.abs((long) RUN_ID.hashCode()));
+
+ private static final String IDX_ONE = "sitesearch_" + SUFFIX;
+ private static final String IDX_TWO = "sitesearch_" + (Long.parseLong(SUFFIX) + 1);
+
+ private static final String DOC_ID = "os-ss-it-" + RUN_ID;
+
+ @Inject
+ private OSSiteSearchAPI osSiteSearchAPI;
+
+ @Inject
+ private OSIndexAPIImpl osIndexAPI;
+
+ // =======================================================================
+ // Lifecycle
+ // =======================================================================
+
+ @BeforeClass
+ public static void prepare() throws Exception {
+ IntegrationTestInitService.getInstance().init();
+ LicenseTestUtil.getLicense();
+ }
+
+ @Before
+ public void setUp() {
+ cleanupTestData();
+ }
+
+ @After
+ public void tearDown() {
+ cleanupTestData();
+ }
+
+ // =======================================================================
+ // Section 1 — Core index lifecycle
+ // =======================================================================
+
+ /**
+ * Given scenario: a fresh site-search index name that does not yet exist in OpenSearch.
+ * Expected: createSiteSearchIndex creates it, indexExists reports it, and it shows up in
+ * listIndices.
+ */
+ @Test
+ public void test_createSiteSearchIndex_shouldExistAndBeListed() throws Exception {
+ assertFalse("Pre-condition: index must not exist yet", osIndexAPI.indexExists(IDX_ONE));
+
+ final boolean created = osSiteSearchAPI.createSiteSearchIndex(IDX_ONE, null, 1);
+
+ assertTrue("createSiteSearchIndex must return true", created);
+ assertTrue("Index must exist in OpenSearch after creation", osIndexAPI.indexExists(IDX_ONE));
+ assertTrue("Index must be returned by listIndices",
+ osSiteSearchAPI.listIndices().contains(IDX_ONE));
+
+ Logger.info(this, "✅ test_createSiteSearchIndex_shouldExistAndBeListed passed – index: " + IDX_ONE);
+ }
+
+ /**
+ * Given scenario: an existing site-search index.
+ * Expected: deleting it through the OpenSearch index API removes it from the cluster.
+ */
+ @Test
+ public void test_deleteSiteSearchIndex_shouldRemoveIt() throws Exception {
+ osSiteSearchAPI.createSiteSearchIndex(IDX_ONE, null, 1);
+ assertTrue("Pre-condition: index must exist", osIndexAPI.indexExists(IDX_ONE));
+
+ osIndexAPI.delete(IDX_ONE);
+
+ assertFalse("Index must be gone after deletion", osIndexAPI.indexExists(IDX_ONE));
+ Logger.info(this, "✅ test_deleteSiteSearchIndex_shouldRemoveIt passed");
+ }
+
+ // =======================================================================
+ // Section 2 — Document round-trip (put / get / search / delete)
+ // =======================================================================
+
+ /**
+ * Given scenario: an empty site-search index.
+ * Expected: a document put to the index is retrievable by id, discoverable by search, and gone
+ * after deleteFromIndex.
+ */
+ @Test
+ public void test_putGetSearchDelete_documentRoundTrip() throws Exception {
+ osSiteSearchAPI.createSiteSearchIndex(IDX_ONE, null, 1);
+ assertNull("Pre-condition: document must not exist yet",
+ osSiteSearchAPI.getFromIndex(IDX_ONE, DOC_ID));
+
+ final SiteSearchResult doc = new SiteSearchResult();
+ doc.setId(DOC_ID);
+ doc.setUrl("/os-site-search-it/" + RUN_ID);
+ doc.setTitle("OpenSearch Site Search IT " + RUN_ID);
+ doc.setMimeType("text/html");
+ doc.setContent("dotcms opensearch site search integration roundtrip " + RUN_ID);
+ doc.setContentLength(doc.getContent().length());
+
+ osSiteSearchAPI.putToIndex(IDX_ONE, doc, "content");
+
+ final SiteSearchResult fetched = osSiteSearchAPI.getFromIndex(IDX_ONE, DOC_ID);
+ assertNotNull("Document must be retrievable after put", fetched);
+ assertEquals("Fetched document id must match", DOC_ID, fetched.getId());
+
+ final SiteSearchResults results = osSiteSearchAPI.search(IDX_ONE, "roundtrip", 0, 10);
+ assertNull("Search must not return an error: " + results.getError(), results.getError());
+ assertTrue("Search must find the indexed document", results.getTotalResults() >= 1);
+
+ osSiteSearchAPI.deleteFromIndex(IDX_ONE, DOC_ID);
+ assertNull("Document must be gone after deleteFromIndex",
+ osSiteSearchAPI.getFromIndex(IDX_ONE, DOC_ID));
+
+ Logger.info(this, "✅ test_putGetSearchDelete_documentRoundTrip passed – hits: "
+ + results.getTotalResults());
+ }
+
+ /**
+ * Given scenario: an index holding a few documents that share a common term.
+ * Expected: a terms aggregation query returns a non-null aggregation tree keyed by the
+ * aggregation name.
+ */
+ @Test
+ public void test_getAggregations_shouldReturnBuckets() throws Exception {
+ osSiteSearchAPI.createSiteSearchIndex(IDX_ONE, null, 1);
+
+ for (int i = 0; i < 3; i++) {
+ final SiteSearchResult doc = new SiteSearchResult();
+ doc.setId(DOC_ID + "-" + i);
+ doc.setUrl("/agg/" + RUN_ID + "/" + i);
+ doc.setTitle("Aggregation doc " + i);
+ doc.setMimeType("text/html");
+ doc.setContent("aggregation bucket sample " + RUN_ID);
+ doc.setContentLength(doc.getContent().length());
+ osSiteSearchAPI.putToIndex(IDX_ONE, doc, "content");
+ }
+
+ final String aggQuery = new JSONObject()
+ .put("size", 0)
+ .put("aggs", new JSONObject().put("by_mime",
+ new JSONObject().put("terms",
+ new JSONObject().put("field", "mimeType")))).toString();
+
+ final Map aggregations =
+ osSiteSearchAPI.getAggregations(IDX_ONE, aggQuery);
+
+ assertNotNull("Aggregations map must not be null", aggregations);
+ assertTrue("Aggregation 'by_mime' must be present", aggregations.containsKey("by_mime"));
+
+ Logger.info(this, "✅ test_getAggregations_shouldReturnBuckets passed – keys: "
+ + aggregations.keySet());
+ }
+
+ // =======================================================================
+ // Section 3 — Default index activation (VersionedIndicesAPI)
+ // =======================================================================
+
+ /**
+ * Given scenario: a created site-search index that is not yet the default.
+ * Expected: activateIndex makes isDefaultIndex true and orders it first in listIndices;
+ * deactivateIndex clears the default.
+ */
+ @Test
+ public void test_activateDeactivate_shouldToggleDefault() throws Exception {
+ osSiteSearchAPI.createSiteSearchIndex(IDX_ONE, null, 1);
+ assertFalse("Pre-condition: index must not be default yet",
+ osSiteSearchAPI.isDefaultIndex(IDX_ONE));
+
+ osSiteSearchAPI.activateIndex(IDX_ONE);
+ assertTrue("Index must be the default after activation",
+ osSiteSearchAPI.isDefaultIndex(IDX_ONE));
+
+ osSiteSearchAPI.deactivateIndex(IDX_ONE);
+ assertFalse("Index must no longer be the default after deactivation",
+ osSiteSearchAPI.isDefaultIndex(IDX_ONE));
+
+ Logger.info(this, "✅ test_activateDeactivate_shouldToggleDefault passed");
+ }
+
+ /**
+ * Given scenario: two created site-search indices with the second activated as default.
+ * Expected: listIndices returns both and places the active (default) index first.
+ */
+ @Test
+ public void test_listIndices_shouldPlaceDefaultFirst() throws Exception {
+ osSiteSearchAPI.createSiteSearchIndex(IDX_ONE, null, 1);
+ osSiteSearchAPI.createSiteSearchIndex(IDX_TWO, null, 1);
+
+ osSiteSearchAPI.activateIndex(IDX_TWO);
+
+ final List indices = osSiteSearchAPI.listIndices();
+ assertTrue("Both indices must be listed",
+ indices.contains(IDX_ONE) && indices.contains(IDX_TWO));
+ assertEquals("The default index must be first", IDX_TWO, indices.get(0));
+
+ Logger.info(this, "✅ test_listIndices_shouldPlaceDefaultFirst passed – order: " + indices);
+ }
+
+ // =======================================================================
+ // Section 4 — Additional interface methods
+ // =======================================================================
+
+ /**
+ * Given scenario: no closed site-search indices for this run.
+ * Expected: listClosedIndices returns a non-null list without raising.
+ */
+ @Test
+ public void test_listClosedIndices_shouldNotFail() {
+ final List closed = osSiteSearchAPI.listClosedIndices();
+ assertNotNull("listClosedIndices must never return null", closed);
+ Logger.info(this, "✅ test_listClosedIndices_shouldNotFail passed – count: " + closed.size());
+ }
+
+ // =======================================================================
+ // Cleanup helpers
+ // =======================================================================
+
+ private synchronized void cleanupTestData() {
+ for (final String name : List.of(IDX_ONE, IDX_TWO)) {
+ try {
+ if (osIndexAPI.indexExists(name)) {
+ osIndexAPI.delete(name);
+ }
+ } catch (final Exception e) {
+ Logger.warn(this, "Cleanup: error removing OS index '" + name + "': " + e.getMessage());
+ }
+ }
+ cleanupVersionedRows();
+ }
+
+ private void cleanupVersionedRows() {
+ try {
+ new DotConnect()
+ .setSQL("DELETE FROM indicies WHERE index_name LIKE ?")
+ .addParam("%" + SUFFIX + "%")
+ .loadResult();
+ APILocator.getVersionedIndicesAPI().clearCache();
+ } catch (final Exception e) {
+ Logger.warn(this, "Cleanup: error removing versioned DB rows: " + e.getMessage());
+ }
+ }
+}
From 3ebdc53f85d7baa5d19967156f99954c5c87c165 Mon Sep 17 00:00:00 2001
From: fabrizzio-dotCMS
Date: Tue, 23 Jun 2026 14:15:57 -0600
Subject: [PATCH 3/9] fix(sitesearch): use OpenSearch-format index settings for
OS site search
CI (OpenSearch Upgrade Suite) failed: every OSSiteSearchAPIIntegrationTest that
creates an index errored with "Failed to parse index settings". The OS impl was
loading es-sitesearch-settings.json, whose ES-only token-filter syntax (edgeNGram,
side) is rejected by the typed OpenSearch IndexSettings deserializer in
OSIndexAPIImpl.createIndex.
Add os-sitesearch-settings.json declaring the same analyzers (standard_content,
partial_content, comma_analyzer) in OpenSearch syntax (edge_ngram, no side), and
load it from OSSiteSearchAPI.createSiteSearchIndex. The mapping is vendor-neutral
and reused as-is.
Co-Authored-By: Claude Opus 4.8 (1M context)
---
.../sitesearch/OSSiteSearchAPI.java | 6 ++-
.../resources/os-sitesearch-settings.json | 39 +++++++++++++++++++
2 files changed, 44 insertions(+), 1 deletion(-)
create mode 100644 dotCMS/src/main/resources/os-sitesearch-settings.json
diff --git a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java
index c1d226d23460..3a5afe625bfe 100644
--- a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java
+++ b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java
@@ -411,7 +411,11 @@ public synchronized boolean createSiteSearchIndex(String indexName, final String
indexName = indexName.toLowerCase();
final ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
- URL url = classLoader.getResource("es-sitesearch-settings.json");
+ // OpenSearch-format settings: the legacy es-sitesearch-settings.json uses ES-only token
+ // filter syntax (e.g. edgeNGram / side) that the typed OpenSearch IndexSettings deserializer
+ // rejects. os-sitesearch-settings.json declares the same analyzers (standard_content,
+ // partial_content) in OpenSearch syntax. The mapping is vendor-neutral and is reused as-is.
+ URL url = classLoader.getResource("os-sitesearch-settings.json");
final String settings = new String(com.liferay.util.FileUtil.getBytes(new File(url.getPath())));
url = classLoader.getResource("es-sitesearch-mapping.json");
final String mapping = new String(com.liferay.util.FileUtil.getBytes(new File(url.getPath())));
diff --git a/dotCMS/src/main/resources/os-sitesearch-settings.json b/dotCMS/src/main/resources/os-sitesearch-settings.json
new file mode 100644
index 000000000000..168e3e0bcb1c
--- /dev/null
+++ b/dotCMS/src/main/resources/os-sitesearch-settings.json
@@ -0,0 +1,39 @@
+{
+ "analysis": {
+ "filter": {
+ "content_ngrams": {
+ "type": "edge_ngram",
+ "min_gram": 1,
+ "max_gram": 10
+ },
+ "content_stemmer": {
+ "type": "stemmer",
+ "name": "english"
+ }
+ },
+ "analyzer": {
+ "standard_content": {
+ "type": "custom",
+ "tokenizer": "standard",
+ "filter": [
+ "lowercase",
+ "asciifolding",
+ "content_stemmer"
+ ]
+ },
+ "partial_content": {
+ "type": "custom",
+ "tokenizer": "standard",
+ "filter": [
+ "lowercase",
+ "asciifolding",
+ "content_ngrams"
+ ]
+ },
+ "comma_analyzer": {
+ "type": "pattern",
+ "pattern": ","
+ }
+ }
+ }
+}
From c0fb1ac336b98c8b05f0bcfcfed507b84f07b0f7 Mon Sep 17 00:00:00 2001
From: fabrizzio-dotCMS
Date: Tue, 23 Jun 2026 14:51:12 -0600
Subject: [PATCH 4/9] fix(sitesearch): apply OS site-search mapping to the
untagged physical index
The aggregation IT failed: mimeType aggregation hit "Text fields are not optimised
... use a keyword field". Root cause: createSiteSearchIndex delegated the mapping
PUT to MappingOperationsOS, which force-tags the physical name with `.os`. Site
search uses untagged logical names, so the mapping landed on a different (`.os`)
index while the real index kept the dynamic default mapping (string -> text),
breaking keyword aggregations.
Apply the mapping with a raw PUT //_mapping against the same untagged
physical name used by createIndex/search/put, and drop the MappingOperationsOS
dependency.
Co-Authored-By: Claude Opus 4.8 (1M context)
---
.../sitesearch/OSSiteSearchAPI.java | 42 ++++++++++++++-----
1 file changed, 31 insertions(+), 11 deletions(-)
diff --git a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java
index 3a5afe625bfe..3196bb222d3f 100644
--- a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java
+++ b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java
@@ -23,7 +23,6 @@
import com.dotcms.content.index.domain.DotSearchException;
import com.dotcms.content.index.domain.SearchHit;
import com.dotcms.content.index.domain.SearchHits;
-import com.dotcms.content.index.opensearch.MappingOperationsOS;
import com.dotcms.content.index.opensearch.OSClientProvider;
import com.dotcms.content.index.opensearch.OSIndexAPIImpl;
import com.dotcms.enterprise.LicenseUtil;
@@ -125,24 +124,20 @@ public class OSSiteSearchAPI implements SiteSearchAPI {
private final OSClientProvider clientProvider;
private final IndexAPI indexApi;
- private final MappingOperationsOS mappingOperations;
/** CDI-managed constructor. */
@Inject
public OSSiteSearchAPI() {
this(CDIUtils.getBeanThrows(OSClientProvider.class),
- CDIUtils.getBeanThrows(OSIndexAPIImpl.class),
- CDIUtils.getBeanThrows(MappingOperationsOS.class));
+ CDIUtils.getBeanThrows(OSIndexAPIImpl.class));
}
/** Package-private constructor for testing. */
@VisibleForTesting
OSSiteSearchAPI(final OSClientProvider clientProvider,
- final IndexAPI indexApi,
- final MappingOperationsOS mappingOperations) {
+ final IndexAPI indexApi) {
this.clientProvider = clientProvider;
this.indexApi = indexApi;
- this.mappingOperations = mappingOperations;
}
// =========================================================================
@@ -430,14 +425,39 @@ public synchronized boolean createSiteSearchIndex(String indexName, final String
indexApi.createAlias(indexName, alias);
}
- try {
- mappingOperations.putMapping(List.of(indexName), mapping);
+ putMapping(indexName, mapping);
+
+ return true;
+ }
+
+ /**
+ * Applies the mapping to the site-search index via a raw {@code PUT //_mapping}.
+ *
+ *
Done here rather than via {@code MappingOperationsOS} on purpose: that helper force-tags the
+ * physical name with {@code .os}, which would target a different index than the untagged one this
+ * class creates and queries (see the class "Index naming" note), leaving the real index on the
+ * dynamic default mapping (string fields become {@code text}, which then breaks keyword
+ * aggregations such as {@code mimeType}). Forwarding to the same untagged physical name used by
+ * {@code createIndex}/search/put keeps the mapping on the index that is actually hit.
+ */
+ private void putMapping(final String indexName, final String mapping) throws DotSearchException {
+ final String endpoint = "/" + physicalName(indexName) + "/_mapping";
+ try (final Response response = clientProvider.getClient().generic()
+ .execute(Requests.builder()
+ .method("PUT")
+ .endpoint(endpoint)
+ .body(Bodies.json(mapping))
+ .build())) {
+ final int status = response.getStatus();
+ if (status < 200 || status >= 300) {
+ throw new DotSearchException("Error applying mapping to OpenSearch site search index "
+ + indexName + " — HTTP " + status + " — "
+ + response.getBody().map(Body::bodyAsString).orElse(""));
+ }
} catch (final IOException e) {
throw new DotSearchException("Error applying mapping to OpenSearch site search index: "
+ e.getMessage(), e);
}
-
- return true;
}
@Override
From aadbb3bc5ec4b80d6e773eb28a146959895e2db7 Mon Sep 17 00:00:00 2001
From: fabrizzio-dotCMS
Date: Tue, 23 Jun 2026 16:19:15 -0600
Subject: [PATCH 5/9] test(sitesearch): integration test for SiteSearchWebAPI
view tool (#35786)
Adds SiteSearchWebAPITest covering the view-tool surface affected by the
neutral-aggregation refactor: search() (default-index, alias, pagination, empty
and error paths) with full SiteSearchResults/SiteSearchResult field assertions;
getAggregations() over the neutral Aggregation/AggregationBucket tree (terms,
nested top_hits, numeric-histogram getKeyAsNumber); and getFacets() across all
three legacy wrappers (string-terms, count-histogram, plain Facet fallback).
Registered in MainSuite1b alongside ContentSearchToolTest.
Also a minor List.getFirst() cleanup in SiteSearchAPIImpl.
Co-Authored-By: Claude Opus 4.8 (1M context)
---
.../sitesearch/SiteSearchAPIImpl.java | 4 +-
.../src/test/java/com/dotcms/MainSuite1b.java | 1 +
.../viewtool/SiteSearchWebAPITest.java | 494 ++++++++++++++++++
3 files changed, 497 insertions(+), 2 deletions(-)
create mode 100644 dotcms-integration/src/test/java/com/dotmarketing/sitesearch/viewtool/SiteSearchWebAPITest.java
diff --git a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/SiteSearchAPIImpl.java b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/SiteSearchAPIImpl.java
index 750d93d16bc0..43e020d2a2ff 100644
--- a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/SiteSearchAPIImpl.java
+++ b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/SiteSearchAPIImpl.java
@@ -103,7 +103,7 @@ public SiteSearchAPIImpl() {
public List listIndices() {
final List providers = router.writeProviders();
if (providers.size() == 1) {
- return providers.get(0).listIndices();
+ return providers.getFirst().listIndices();
}
final Set merged = new LinkedHashSet<>(esImpl.listIndices());
merged.addAll(osImpl.listIndices());
@@ -114,7 +114,7 @@ public List listIndices() {
public List listClosedIndices() {
final List providers = router.writeProviders();
if (providers.size() == 1) {
- return providers.get(0).listClosedIndices();
+ return providers.getFirst().listClosedIndices();
}
final Set merged = new LinkedHashSet<>(esImpl.listClosedIndices());
merged.addAll(osImpl.listClosedIndices());
diff --git a/dotcms-integration/src/test/java/com/dotcms/MainSuite1b.java b/dotcms-integration/src/test/java/com/dotcms/MainSuite1b.java
index b1e5bf853a22..fa9b83f71785 100644
--- a/dotcms-integration/src/test/java/com/dotcms/MainSuite1b.java
+++ b/dotcms-integration/src/test/java/com/dotcms/MainSuite1b.java
@@ -52,6 +52,7 @@
com.dotcms.rendering.velocity.viewtools.content.ContentMapTest.class,
com.dotcms.rendering.velocity.viewtools.content.ContentToolTest.class,
com.dotcms.rendering.velocity.viewtools.ContentSearchToolTest.class,
+ com.dotmarketing.sitesearch.viewtool.SiteSearchWebAPITest.class,
com.dotcms.rendering.velocity.viewtools.WorkflowToolTest.class,
com.dotcms.rendering.velocity.viewtools.WebsiteToolTest.class,
com.dotcms.rendering.velocity.viewtools.LanguageWebAPITest.class,
diff --git a/dotcms-integration/src/test/java/com/dotmarketing/sitesearch/viewtool/SiteSearchWebAPITest.java b/dotcms-integration/src/test/java/com/dotmarketing/sitesearch/viewtool/SiteSearchWebAPITest.java
new file mode 100644
index 000000000000..215d94a58fa2
--- /dev/null
+++ b/dotcms-integration/src/test/java/com/dotmarketing/sitesearch/viewtool/SiteSearchWebAPITest.java
@@ -0,0 +1,494 @@
+package com.dotmarketing.sitesearch.viewtool;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import com.dotcms.IntegrationTestBase;
+import com.dotcms.LicenseTestUtil;
+import com.dotcms.content.index.domain.Aggregation;
+import com.dotcms.content.index.domain.AggregationBucket;
+import com.dotcms.content.index.domain.SearchHit;
+import com.dotcms.enterprise.publishing.sitesearch.SiteSearchResult;
+import com.dotcms.enterprise.publishing.sitesearch.SiteSearchResults;
+import com.dotcms.util.IntegrationTestInitService;
+import com.dotmarketing.business.APILocator;
+import com.dotmarketing.sitesearch.business.SiteSearchAPI;
+import com.dotmarketing.sitesearch.viewtool.SiteSearchWebAPI.Facet;
+import com.dotmarketing.sitesearch.viewtool.SiteSearchWebAPI.InternalWrapperCountDateHistogramFacet;
+import com.dotmarketing.sitesearch.viewtool.SiteSearchWebAPI.InternalWrapperStringTermsFacet;
+import com.dotmarketing.util.Logger;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+import org.apache.velocity.tools.view.context.ViewContext;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * Integration test for the {@link SiteSearchWebAPI} Velocity view tool, modelled on
+ * {@code ContentSearchToolTest}.
+ *
+ *
Exercises the public view-tool surface end-to-end against a live search backend after the
+ * Elasticsearch → OpenSearch neutral-aggregation refactor (#35786), with emphasis on the fields of
+ * the POJOs returned by the refactored methods:
+ *
+ *
{@code search(...)} → {@link SiteSearchResults} / {@link SiteSearchResult} fields, the
+ * alias path, the default-index path, pagination and error states.
+ *
{@code getAggregations(...)} → the neutral {@link Aggregation} / {@link AggregationBucket}
+ * tree: name/type/buckets, doc counts, {@code getKeyAsNumber} (numeric histogram), and the
+ * nested {@code top_hits} {@link SearchHit}s.
+ *
{@code getFacets(...)} → all three legacy wrappers: string-terms, date/numeric-histogram and
+ * the plain {@link Facet} fallback, plus their entry POJOs.
The tool resolves its backend through {@code APILocator.getSiteSearchAPI()} — now the
+ * {@code SiteSearchAPIImpl} phase router — so this also proves the router wiring did not break the
+ * legacy view-tool contract. Runs in the default integration profile (migration Phase 0 →
+ * Elasticsearch), like {@code ContentSearchToolTest}; no OpenSearch container is required.
+ *
+ * @author Fabrizio Araya
+ */
+public class SiteSearchWebAPITest extends IntegrationTestBase {
+
+ private static final long SUFFIX = System.currentTimeMillis();
+ private static final String IDX = "sitesearch_" + SUFFIX;
+ private static final String ALIAS = "ss_it_alias_" + SUFFIX;
+
+ /** Unique token embedded in every indexed doc so the text query matches only this run's data. */
+ private static final String TOKEN = "ssqa" + SUFFIX;
+
+ private static final String MIME_HTML = "text/html";
+ private static final String MIME_PDF = "application/pdf";
+ private static final Set EXPECTED_MIMES = Set.of(MIME_HTML, MIME_PDF);
+
+ /** 3 html docs + 2 pdf docs = 5 docs, all carrying TOKEN. */
+ private static final int HTML_DOCS = 3;
+ private static final int PDF_DOCS = 2;
+ private static final int TOTAL_DOCS = HTML_DOCS + PDF_DOCS;
+
+ // ---- Queries (JSON, so search() skips the request-host lookup) -----------------------------
+
+ private static final String SEARCH_TOKEN =
+ "{\"query\":{\"query_string\":{\"query\":\"" + "TOKEN_PLACEHOLDER"
+ + "\",\"default_field\":\"*\"}}}";
+
+ private static final String TERMS_AGG =
+ "{\"size\":0,\"aggs\":{\"by_mime\":{\"terms\":{\"field\":\"mimeType\",\"size\":10}}}}";
+
+ private static final String NESTED_AGG =
+ "{\"size\":0,\"aggs\":{\"by_mime\":{\"terms\":{\"field\":\"mimeType\",\"size\":10},"
+ + "\"aggs\":{\"top_docs\":{\"top_hits\":{\"size\":2}}}}}}";
+
+ private static final String HISTO_AGG =
+ "{\"size\":0,\"aggs\":{\"by_len\":{\"histogram\":{\"field\":\"contentLength\","
+ + "\"interval\":25}}}}";
+
+ /** Query matches no doc, so the terms aggregation comes back with empty buckets. */
+ private static final String EMPTY_AGG =
+ "{\"size\":0,\"query\":{\"term\":{\"mimeType\":\"zzz/none\"}},"
+ + "\"aggs\":{\"empty\":{\"terms\":{\"field\":\"mimeType\",\"size\":10}}}}";
+
+ private static SiteSearchAPI siteSearchAPI;
+
+ @BeforeClass
+ public static void prepare() throws Exception {
+ IntegrationTestInitService.getInstance().init();
+ LicenseTestUtil.getLicense();
+
+ siteSearchAPI = APILocator.getSiteSearchAPI();
+
+ // Create the index WITH an alias (so the alias search path is exercised) and activate it as
+ // the default (so the default-index search path is exercised).
+ siteSearchAPI.createSiteSearchIndex(IDX, ALIAS, 1);
+ siteSearchAPI.activateIndex(IDX);
+
+ for (int i = 0; i < TOTAL_DOCS; i++) {
+ final boolean html = i < HTML_DOCS;
+ final SiteSearchResult doc = new SiteSearchResult();
+ doc.setId("ss-it-" + SUFFIX + "-" + i);
+ doc.setUrl("/site-search-webapi-it/" + i);
+ doc.setTitle("Site Search WebAPI IT doc " + i);
+ doc.setHost("demo.dotcms.com");
+ doc.setAuthor("qa-author-" + i);
+ doc.setMimeType(html ? MIME_HTML : MIME_PDF);
+ // Vary the body length so the numeric histogram on contentLength spreads over buckets.
+ doc.setContent("dotcms site search viewtool integration " + TOKEN
+ + " ".repeat(i * 30));
+ doc.setContentLength(doc.getContent().length());
+ siteSearchAPI.putToIndex(IDX, doc, "content");
+ }
+ }
+
+ @AfterClass
+ public static void cleanup() {
+ try {
+ siteSearchAPI.deactivateIndex(IDX);
+ } catch (final Exception e) {
+ Logger.warn(SiteSearchWebAPITest.class, "Cleanup: deactivate failed: " + e.getMessage());
+ }
+ try {
+ APILocator.getESIndexAPI()
+ .delete(APILocator.getESIndexAPI().getNameWithClusterIDPrefix(IDX));
+ } catch (final Exception e) {
+ Logger.warn(SiteSearchWebAPITest.class, "Cleanup: delete failed: " + e.getMessage());
+ }
+ }
+
+ /** Builds a {@link SiteSearchWebAPI} initialized with a mock request/response. */
+ private SiteSearchWebAPI siteSearchWebAPI() {
+ final ViewContext viewContext = mock(ViewContext.class);
+ final HttpServletRequest request = mock(HttpServletRequest.class);
+ final HttpServletResponse response = mock(HttpServletResponse.class);
+ when(viewContext.getRequest()).thenReturn(request);
+ when(viewContext.getResponse()).thenReturn(response);
+
+ final SiteSearchWebAPI tool = new SiteSearchWebAPI();
+ tool.init(viewContext);
+ return tool;
+ }
+
+ private static String searchToken() {
+ return SEARCH_TOKEN.replace("TOKEN_PLACEHOLDER", TOKEN);
+ }
+
+ // =========================================================================
+ // listSearchIndicies
+ // =========================================================================
+
+ /**
+ * Given scenario: a populated, active site-search index.
+ * Expected: listSearchIndicies() (and its legacy-typo alias) returns the created index.
+ */
+ @Test
+ public void listSearchIndicies_containsCreatedIndex() {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ assertTrue("listSearchIndicies() must contain the created index",
+ tool.listSearchIndicies().contains(IDX));
+ assertTrue("legacy-typo alias listSearchIncidies() must behave identically",
+ tool.listSearchIncidies().contains(IDX));
+
+ Logger.info(this, "✅ listSearchIndicies_containsCreatedIndex passed");
+ }
+
+ // =========================================================================
+ // search — SiteSearchResults / SiteSearchResult field coverage
+ // =========================================================================
+
+ /**
+ * Given scenario: 5 docs carrying TOKEN in the default (active) index.
+ * Expected: the default-index search (3-arg) populates every SiteSearchResults field and each
+ * SiteSearchResult exposes id/url/title/mimeType/score.
+ */
+ @Test
+ public void search_defaultIndex_populatesResultFields() throws Exception {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final SiteSearchResults results = tool.search(searchToken(), 0, 10);
+
+ assertNull("Search must not return an error: " + results.getError(), results.getError());
+ assertEquals("All TOKEN docs must be counted", TOTAL_DOCS, results.getTotalResults());
+ assertEquals("getTotalHits() alias must match getTotalResults()",
+ results.getTotalResults(), results.getTotalHits());
+ assertEquals("Result rows must match the total (under the page size)",
+ TOTAL_DOCS, results.getResults().size());
+ assertTrue("maxScore must be positive for a matching query", results.getMaxScore() > 0);
+ assertEquals("offset must reflect the requested start", 0, results.getOffset());
+ assertEquals("start alias must match offset", results.getOffset(), results.getStart());
+ assertEquals("limit must reflect the requested rows", 10, results.getLimit());
+ assertNotNull("query echo must be set", results.getQuery());
+ assertNotNull("took must be set", results.getTook());
+
+ for (final SiteSearchResult hit : results.getResults()) {
+ assertNotNull("each hit must carry an id", hit.getId());
+ assertTrue("each hit id must belong to this run", hit.getId().startsWith("ss-it-" + SUFFIX));
+ assertNotNull("each hit must carry a url", hit.getUrl());
+ assertNotNull("each hit must carry a title", hit.getTitle());
+ assertTrue("each hit mimeType must be one of the indexed types",
+ EXPECTED_MIMES.contains(hit.getMimeType()));
+ assertTrue("each hit must have a positive score", hit.getScore() > 0);
+ }
+
+ Logger.info(this, "✅ search_defaultIndex_populatesResultFields passed – hits: "
+ + results.getTotalResults());
+ }
+
+ /**
+ * Given scenario: the index was created with an alias.
+ * Expected: the 4-arg alias search resolves the alias to the backing index and returns the docs.
+ */
+ @Test
+ public void search_byAlias_resolvesIndex() {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final SiteSearchResults results = tool.search(ALIAS, searchToken(), 0, 10);
+
+ assertNull("Alias search must not return an error: " + results.getError(),
+ results.getError());
+ assertEquals("Alias search must reach the same docs", TOTAL_DOCS, results.getTotalResults());
+
+ Logger.info(this, "✅ search_byAlias_resolvesIndex passed");
+ }
+
+ /**
+ * Given scenario: a JSON body that caps the page size to 2.
+ * Expected: the returned rows are capped to the page size while the total still reflects all
+ * matches — covering the offset/limit/totalResults fields together.
+ */
+ @Test
+ public void search_pagination_capsReturnedRows() throws Exception {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final String paged = "{\"size\":2,\"query\":{\"query_string\":{\"query\":\"" + TOKEN
+ + "\",\"default_field\":\"*\"}}}";
+ final SiteSearchResults results = tool.search(paged, 0, 2);
+
+ assertNull("Paged search must not error: " + results.getError(), results.getError());
+ assertEquals("Total must still reflect every match", TOTAL_DOCS, results.getTotalResults());
+ assertTrue("Returned rows must be capped by the page size",
+ results.getResults().size() <= 2);
+
+ Logger.info(this, "✅ search_pagination_capsReturnedRows passed – returned: "
+ + results.getResults().size());
+ }
+
+ /**
+ * Given scenario: a query for a token that matches nothing.
+ * Expected: zero results, an empty result list and no error (a clean empty response).
+ */
+ @Test
+ public void search_noMatch_returnsEmptyWithoutError() throws Exception {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final String noMatch = "{\"query\":{\"query_string\":{\"query\":\"zzznomatchzzz" + SUFFIX
+ + "\",\"default_field\":\"*\"}}}";
+ final SiteSearchResults results = tool.search(noMatch, 0, 10);
+
+ assertNull("No-match search must not error", results.getError());
+ assertEquals("No-match search must count zero", 0, results.getTotalResults());
+ assertTrue("No-match search must return no rows", results.getResults().isEmpty());
+
+ Logger.info(this, "✅ search_noMatch_returnsEmptyWithoutError passed");
+ }
+
+ /**
+ * Given scenario: a null query.
+ * Expected: the tool reports an error on the SiteSearchResults rather than throwing.
+ */
+ @Test
+ public void search_nullQuery_setsError() throws Exception {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final SiteSearchResults results = tool.search(null, 0, 10);
+
+ assertNotNull("A null query must surface an error", results.getError());
+ Logger.info(this, "✅ search_nullQuery_setsError passed – error: " + results.getError());
+ }
+
+ // =========================================================================
+ // getAggregations — Aggregation / AggregationBucket field coverage
+ // =========================================================================
+
+ /**
+ * Given scenario: 3 html + 2 pdf docs.
+ * Expected: the terms aggregation on mimeType exposes a populated neutral Aggregation — name,
+ * type, two buckets with correct doc counts, string keys, null numeric keys (non-numeric) and no
+ * top-hits — covering the multi-bucket AggregationBucket accessors.
+ */
+ @Test
+ public void getAggregations_termsBuckets_fieldsPopulated() throws Exception {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final Map aggregations = tool.getAggregations(IDX, TERMS_AGG);
+
+ assertNotNull("Aggregations map must not be null", aggregations);
+ final Aggregation byMime = aggregations.get("by_mime");
+ assertNotNull("'by_mime' aggregation must be present", byMime);
+ assertEquals("aggregation name must round-trip", "by_mime", byMime.getName());
+ assertNotNull("aggregation type must be reported", byMime.getType());
+ assertNull("a terms aggregation carries no top-hits", byMime.getHits());
+ assertEquals("there must be one bucket per mimeType", 2, byMime.getBuckets().size());
+
+ long htmlCount = -1;
+ long pdfCount = -1;
+ for (final AggregationBucket bucket : byMime.getBuckets()) {
+ assertTrue("bucket key must be a known mimeType",
+ EXPECTED_MIMES.contains(bucket.getKey()));
+ assertEquals("getKeyAsString must mirror getKey", bucket.getKey(),
+ bucket.getKeyAsString());
+ assertNull("a non-numeric key must yield a null number", bucket.getKeyAsNumber());
+ assertTrue("each bucket must carry documents", bucket.getDocCount() > 0);
+ assertTrue("a terms bucket has no sub-aggregations here",
+ bucket.getAggregations().isEmpty());
+ if (MIME_HTML.equals(bucket.getKey())) {
+ htmlCount = bucket.getDocCount();
+ } else if (MIME_PDF.equals(bucket.getKey())) {
+ pdfCount = bucket.getDocCount();
+ }
+ }
+ assertEquals("html bucket must count the html docs", HTML_DOCS, htmlCount);
+ assertEquals("pdf bucket must count the pdf docs", PDF_DOCS, pdfCount);
+
+ Logger.info(this, "✅ getAggregations_termsBuckets_fieldsPopulated passed");
+ }
+
+ /**
+ * Given scenario: a terms aggregation with a nested top_hits sub-aggregation.
+ * Expected: the neutral tree preserves the nested {@code top_docs} as an Aggregation that carries
+ * SearchHits, and each SearchHit exposes id and source — covering getHits()/SearchHit fields and
+ * the nested getAggregations() path.
+ */
+ @Test
+ public void getAggregations_nestedTopHits_preserved() throws Exception {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final Map aggregations = tool.getAggregations(IDX, NESTED_AGG);
+ final Aggregation byMime = aggregations.get("by_mime");
+ assertNotNull("'by_mime' aggregation must be present", byMime);
+ assertFalse("'by_mime' must have buckets", byMime.getBuckets().isEmpty());
+
+ final AggregationBucket firstBucket = byMime.getBuckets().getFirst();
+ final Aggregation topDocs = firstBucket.getAggregations().get("top_docs");
+ assertNotNull("nested top_hits sub-aggregation must be preserved", topDocs);
+ assertNotNull("top_hits must carry a SearchHits container", topDocs.getHits());
+
+ final List hits = topDocs.getHits().getHits();
+ assertFalse("top_hits must carry at least one hit", hits.isEmpty());
+ final SearchHit hit = hits.getFirst();
+ assertNotNull("each top-hit must expose an id", hit.getId());
+ assertFalse("each top-hit must expose its source document",
+ hit.getSourceAsMap().isEmpty());
+
+ Logger.info(this, "✅ getAggregations_nestedTopHits_preserved passed – topHits: " + hits.size());
+ }
+
+ /**
+ * Given scenario: a numeric histogram on the long field {@code contentLength}.
+ * Expected: the buckets carry numeric keys, so {@link AggregationBucket#getKeyAsNumber()} returns
+ * a non-null Number — covering the numeric-key path (distinct from the non-numeric terms keys).
+ */
+ @Test
+ public void getAggregations_numericHistogram_keyAsNumber() throws Exception {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final Map aggregations = tool.getAggregations(IDX, HISTO_AGG);
+ final Aggregation byLen = aggregations.get("by_len");
+ assertNotNull("'by_len' histogram aggregation must be present", byLen);
+ assertTrue("histogram type must be reported as a histogram",
+ byLen.getType().contains("histogram"));
+ assertFalse("histogram must produce buckets", byLen.getBuckets().isEmpty());
+
+ boolean sawPopulatedNumericBucket = false;
+ for (final AggregationBucket bucket : byLen.getBuckets()) {
+ assertNotNull("a histogram bucket key must be numeric", bucket.getKeyAsNumber());
+ if (bucket.getDocCount() > 0) {
+ sawPopulatedNumericBucket = true;
+ }
+ }
+ assertTrue("at least one histogram bucket must contain documents", sawPopulatedNumericBucket);
+
+ Logger.info(this, "✅ getAggregations_numericHistogram_keyAsNumber passed");
+ }
+
+ // =========================================================================
+ // getFacets — legacy wrapper coverage (terms / histogram / plain)
+ // =========================================================================
+
+ /**
+ * Given scenario: a terms aggregation with non-empty buckets.
+ * Expected: getFacets wraps it as an {@link InternalWrapperStringTermsFacet} exposing name/type
+ * and term entries with term + count — covering the legacy string-terms facet POJO.
+ */
+ @Test
+ public void getFacets_termsAggregation_wrapsAsStringTermsFacet() throws Exception {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final Map facets = tool.getFacets(IDX, TERMS_AGG);
+ assertNotNull("Facets map must not be null", facets);
+
+ final Facet facet = facets.get("by_mime");
+ assertNotNull("'by_mime' facet must be present", facet);
+ assertEquals("facet name must round-trip", "by_mime", facet.getName());
+ assertNotNull("facet type must be reported", facet.getType());
+ assertTrue("non-empty terms aggregation must map to InternalWrapperStringTermsFacet",
+ facet instanceof InternalWrapperStringTermsFacet);
+
+ final InternalWrapperStringTermsFacet termsFacet = (InternalWrapperStringTermsFacet) facet;
+ assertEquals("there must be one entry per bucket", 2, termsFacet.entries().size());
+
+ long htmlCount = -1;
+ for (final var entry : termsFacet.entries()) {
+ assertTrue("entry term must be a known mimeType", EXPECTED_MIMES.contains(entry.getTerm()));
+ assertTrue("entry count must be positive", entry.getCount() > 0);
+ if (MIME_HTML.equals(entry.getTerm())) {
+ htmlCount = entry.getCount();
+ }
+ }
+ assertEquals("html term entry must count the html docs", HTML_DOCS, htmlCount);
+
+ Logger.info(this, "✅ getFacets_termsAggregation_wrapsAsStringTermsFacet passed");
+ }
+
+ /**
+ * Given scenario: a numeric histogram aggregation.
+ * Expected: getFacets wraps it as an {@link InternalWrapperCountDateHistogramFacet} exposing
+ * CountEntry rows with time (the numeric key) and count — covering the legacy histogram facet
+ * POJO and the {@code isHistogram} branch.
+ */
+ @Test
+ public void getFacets_histogramAggregation_wrapsAsCountHistogramFacet() throws Exception {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final Map facets = tool.getFacets(IDX, HISTO_AGG);
+ final Facet facet = facets.get("by_len");
+ assertNotNull("'by_len' facet must be present", facet);
+ assertTrue("a histogram aggregation must map to InternalWrapperCountDateHistogramFacet",
+ facet instanceof InternalWrapperCountDateHistogramFacet);
+
+ final InternalWrapperCountDateHistogramFacet histoFacet =
+ (InternalWrapperCountDateHistogramFacet) facet;
+ assertFalse("histogram facet must expose count entries", histoFacet.entries().isEmpty());
+
+ boolean sawPopulatedEntry = false;
+ for (final var entry : histoFacet.entries()) {
+ assertTrue("entry time (numeric key) must be non-negative", entry.getTime() >= 0);
+ if (entry.getCount() > 0) {
+ sawPopulatedEntry = true;
+ }
+ }
+ assertTrue("at least one histogram entry must carry a count", sawPopulatedEntry);
+
+ Logger.info(this, "✅ getFacets_histogramAggregation_wrapsAsCountHistogramFacet passed");
+ }
+
+ /**
+ * Given scenario: a terms aggregation whose query matches no document (empty buckets).
+ * Expected: getFacets falls back to a plain {@link Facet} (neither wrapper), still exposing
+ * name and type — covering the empty-bucket branch.
+ */
+ @Test
+ public void getFacets_emptyBuckets_fallsBackToPlainFacet() throws Exception {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final Map facets = tool.getFacets(IDX, EMPTY_AGG);
+ final Facet facet = facets.get("empty");
+ assertNotNull("'empty' facet must be present", facet);
+ assertEquals("facet name must round-trip", "empty", facet.getName());
+ assertNotNull("facet type must be reported", facet.getType());
+ assertFalse("an empty terms aggregation must NOT be a string-terms wrapper",
+ facet instanceof InternalWrapperStringTermsFacet);
+ assertFalse("an empty terms aggregation must NOT be a histogram wrapper",
+ facet instanceof InternalWrapperCountDateHistogramFacet);
+
+ Logger.info(this, "✅ getFacets_emptyBuckets_fallsBackToPlainFacet passed");
+ }
+}
From 5c0a4df5e21c5f930426d9574d40d7bb9d872e4a Mon Sep 17 00:00:00 2001
From: fabrizzio-dotCMS
Date: Tue, 23 Jun 2026 17:30:52 -0600
Subject: [PATCH 6/9] fix(sitesearch): isolate dual-write fan-out per provider
and pin ES mapping
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Two OpenSearch site-search regressions surfaced by the dual-write fan-out:
1. Shared mutable result across the fan-out. SiteSearchAPIImpl.putToIndex
handed the same SiteSearchResult to both leaves. putToIndex mutates the
backing map (setKeywords rewrites "keywords" String -> List), so the first
leaf (ES) corrupted the input the second leaf (OS) then read, throwing
ClassCastException: EmptyList cannot be cast to String and silently dropping
every document from OpenSearch. The router now copies the result (and each
element of the batch overload) per provider.
2. Mapping fan-out leak. ESSiteSearchAPI.createSiteSearchIndex applied its
mapping through the phase-dispatched ESMappingAPIImpl.putMapping, which fanned
out a second time to OpenSearch using a .os-tagged physical name that
site-search OS indices never use -> HTTP 404. Pinned the ES leaf to
IndexTag.ES, restoring the single-fan-out invariant (SiteSearchAPIImpl already
drives OSSiteSearchAPI, which owns its own untagged OS index + mapping).
Adds SiteSearchDualWriteRouterIT (registered in OpenSearchUpgradeSuite) which
drives the router in Phase 1 dual-write and asserts documents reach OpenSearch
(single + batch) — the isolated OS-leaf IT cannot reproduce either bug.
Co-Authored-By: Claude Opus 4.8 (1M context)
---
.../sitesearch/ESSiteSearchAPI.java | 9 +-
.../sitesearch/SiteSearchAPIImpl.java | 31 ++-
.../com/dotcms/OpenSearchUpgradeSuite.java | 4 +-
.../SiteSearchDualWriteRouterIT.java | 246 ++++++++++++++++++
4 files changed, 285 insertions(+), 5 deletions(-)
create mode 100644 dotcms-integration/src/test/java/com/dotcms/enterprise/publishing/sitesearch/SiteSearchDualWriteRouterIT.java
diff --git a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/ESSiteSearchAPI.java b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/ESSiteSearchAPI.java
index 3197bd4b0735..25ab67a4ecff 100644
--- a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/ESSiteSearchAPI.java
+++ b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/ESSiteSearchAPI.java
@@ -14,6 +14,7 @@
import com.dotcms.content.elasticsearch.business.*;
import com.dotcms.content.elasticsearch.util.RestHighLevelClientProvider;
import com.dotcms.content.index.IndexAPI;
+import com.dotcms.content.index.IndexTag;
import com.dotcms.content.index.domain.Aggregation;
import com.dotcms.content.index.domain.DotSearchException;
import com.dotcms.enterprise.LicenseUtil;
@@ -392,8 +393,12 @@ public synchronized boolean createSiteSearchIndex(String indexName, String alias
indexApi.createAlias(indexName, alias);
}
- //put mappings
- mappingAPI.putMapping(indexName, mapping);
+ // Put mappings on the ES index only. ESMappingAPIImpl.putMapping(String, String) is
+ // phase-dispatched and would fan out to OpenSearch, but SiteSearchAPIImpl is already the
+ // single fan-out point for site search (it invokes OSSiteSearchAPI separately, which owns
+ // its own untagged OS index + mapping). Fanning out here too would re-issue the mapping to
+ // a `.os`-tagged physical name that site-search OS indices never use → HTTP 404. Pin to ES.
+ mappingAPI.putMapping(List.of(indexName), mapping, IndexTag.ES);
return true;
}
diff --git a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/SiteSearchAPIImpl.java b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/SiteSearchAPIImpl.java
index 43e020d2a2ff..7893f2b48dad 100644
--- a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/SiteSearchAPIImpl.java
+++ b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/SiteSearchAPIImpl.java
@@ -22,6 +22,7 @@
import java.io.IOException;
import java.text.ParseException;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
@@ -222,12 +223,38 @@ public void deactivateIndex(final String indexName) throws DotDataException, IOE
@Override
public void putToIndex(final String idx, final SiteSearchResult res, final String resultType) {
- router.write(impl -> impl.putToIndex(idx, res, resultType));
+ // Each provider gets its own copy: putToIndex mutates the result's backing map
+ // (e.g. SiteSearchResult.setKeywords rewrites the "keywords" entry String -> List), so a
+ // shared instance would let the first provider in the fan-out corrupt the input the next
+ // provider reads — producing a ClassCastException on the second leaf. The lambda is invoked
+ // once per provider, so copyOf(res) is evaluated fresh from the untouched original each time.
+ router.write(impl -> impl.putToIndex(idx, copyOf(res), resultType));
}
@Override
public void putToIndex(final String idx, final List res, final String resultType) {
- router.write(impl -> impl.putToIndex(idx, res, resultType));
+ // See single-result overload: copy per provider so the fan-out never shares mutable state.
+ router.write(impl -> impl.putToIndex(idx, copyOf(res), resultType));
+ }
+
+ /**
+ * Shallow-copies a {@link SiteSearchResult} so the fan-out can hand an independent instance to
+ * each write provider. {@code putToIndex} mutates the backing map in place (HTML stripping,
+ * description derivation, {@code keywords} String→List rewrite); copying the map prevents one
+ * provider's mutations from leaking into the next provider's input. A shallow map copy is
+ * sufficient because every mutation replaces a map entry rather than mutating a value object.
+ */
+ private static SiteSearchResult copyOf(final SiteSearchResult res) {
+ return new SiteSearchResult(new HashMap<>(res.getMap()));
+ }
+
+ /** Copies each element of a result batch — see {@link #copyOf(SiteSearchResult)}. */
+ private static List copyOf(final List results) {
+ final List copies = new ArrayList<>(results.size());
+ for (final SiteSearchResult r : results) {
+ copies.add(copyOf(r));
+ }
+ return copies;
}
@Override
diff --git a/dotcms-integration/src/test/java/com/dotcms/OpenSearchUpgradeSuite.java b/dotcms-integration/src/test/java/com/dotcms/OpenSearchUpgradeSuite.java
index 29aa50e3e430..ce000e4e7f0f 100644
--- a/dotcms-integration/src/test/java/com/dotcms/OpenSearchUpgradeSuite.java
+++ b/dotcms-integration/src/test/java/com/dotcms/OpenSearchUpgradeSuite.java
@@ -13,6 +13,7 @@
import com.dotcms.content.index.opensearch.OSClientProviderIntegrationTest;
import com.dotcms.content.index.opensearch.OSSearchAPIImplIntegrationTest;
import com.dotcms.content.index.opensearch.OSSiteSearchAPIIntegrationTest;
+import com.dotcms.enterprise.publishing.sitesearch.SiteSearchDualWriteRouterIT;
import com.dotcms.junit.MainBaseSuite;
import org.junit.runner.RunWith;
import org.junit.runners.Suite.SuiteClasses;
@@ -48,7 +49,8 @@
ContentletIndexAPIImplMigrationIntegrationTest.class,
ContentletIndexAPIImplPhaseSwitchIntegrationTest.class,
OSSearchAPIImplIntegrationTest.class,
- OSSiteSearchAPIIntegrationTest.class
+ OSSiteSearchAPIIntegrationTest.class,
+ SiteSearchDualWriteRouterIT.class
})
public class OpenSearchUpgradeSuite {
}
\ No newline at end of file
diff --git a/dotcms-integration/src/test/java/com/dotcms/enterprise/publishing/sitesearch/SiteSearchDualWriteRouterIT.java b/dotcms-integration/src/test/java/com/dotcms/enterprise/publishing/sitesearch/SiteSearchDualWriteRouterIT.java
new file mode 100644
index 000000000000..a400ce4aae36
--- /dev/null
+++ b/dotcms-integration/src/test/java/com/dotcms/enterprise/publishing/sitesearch/SiteSearchDualWriteRouterIT.java
@@ -0,0 +1,246 @@
+package com.dotcms.enterprise.publishing.sitesearch;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assume.assumeFalse;
+
+import com.dotcms.DataProviderWeldRunner;
+import com.dotcms.IntegrationTestBase;
+import com.dotcms.LicenseTestUtil;
+import com.dotcms.content.elasticsearch.business.ESIndexAPI;
+import com.dotcms.content.index.IndexAPIImpl;
+import com.dotcms.content.index.IndexConfigHelper;
+import com.dotcms.content.index.opensearch.OSIndexAPIImpl;
+import com.dotcms.util.IntegrationTestInitService;
+import com.dotmarketing.business.APILocator;
+import com.dotmarketing.common.db.DotConnect;
+import com.dotmarketing.sitesearch.business.SiteSearchAPI;
+import com.dotmarketing.util.Config;
+import com.dotmarketing.util.Logger;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.UUID;
+import javax.enterprise.context.ApplicationScoped;
+import javax.inject.Inject;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+
+/**
+ * Integration tests that exercise Site Search through the phase-aware {@link SiteSearchAPIImpl}
+ * router in a dual-write phase, where every write fans out to both the
+ * Elasticsearch ({@link ESSiteSearchAPI}) and OpenSearch ({@link OSSiteSearchAPI}) leaves.
+ *
+ *
These tests guard two regressions that only reproduce through the router fan-out — the
+ * isolated {@link com.dotcms.content.index.opensearch.OSSiteSearchAPIIntegrationTest} (which calls
+ * the OS leaf directly) cannot catch them:
+ *
+ *
+ *
Shared mutable result across the fan-out. {@code putToIndex} mutates the
+ * {@link SiteSearchResult} map in place — notably {@link SiteSearchResult#setKeywords(String)}
+ * rewrites the {@code keywords} entry from a {@code String} to a {@code List}. With a single
+ * shared instance, the first leaf (ES) corrupted the input the second leaf (OS) then read,
+ * producing {@code ClassCastException: EmptyList cannot be cast to String} on the OS write —
+ * silently dropping every document from OpenSearch. The router now hands each
+ * provider its own copy. This test asserts the document actually lands in OpenSearch.
+ *
Mapping fan-out leak. {@code createSiteSearchIndex} on the ES leaf applied
+ * its mapping through the phase-dispatched {@code ESMappingAPIImpl.putMapping}, which fanned
+ * out a second time to OpenSearch using a {@code .os}-tagged physical name that site-search OS
+ * indices never use → HTTP 404. The create path is now ES-pinned; this test asserts a
+ * router-driven create yields a fully functional, queryable OS index.
+ *
+ *
+ *
Runs only when ES and OS are separate clusters (dual-write requires two endpoints); skipped
+ * via {@link org.junit.Assume#assumeFalse} on the single-cluster {@code opensearch-upgrade}
+ * profile. Registered in {@link com.dotcms.OpenSearchUpgradeSuite}. Run with:
+ *
+ *
+ * @author Fabrizio Araya
+ */
+@ApplicationScoped
+@RunWith(DataProviderWeldRunner.class)
+public class SiteSearchDualWriteRouterIT extends IntegrationTestBase {
+
+ /** Phase 1 — dual-write, ES reads. Writes fan out to [ES, OS]; reads come from ES. */
+ private static final int PHASE_DUAL_WRITE_ES_READS = 1;
+
+ private static final String RUN_ID =
+ UUID.randomUUID().toString().replace("-", "").substring(0, 8);
+
+ /** Numeric suffix so the name matches the {@code sitesearch_} convention. */
+ private static final String SUFFIX = String.valueOf(Math.abs((long) RUN_ID.hashCode()));
+
+ private static final String IDX = "sitesearch_" + SUFFIX;
+ private static final String DOC_ID = "ss-dualwrite-it-" + RUN_ID;
+
+ @Inject
+ private OSSiteSearchAPI osSiteSearchAPI;
+
+ @Inject
+ private OSIndexAPIImpl osIndexAPI;
+
+ /** The phase-aware fan-out router under test. */
+ private SiteSearchAPI router;
+
+ // =======================================================================
+ // Lifecycle
+ // =======================================================================
+
+ @BeforeClass
+ public static void prepare() throws Exception {
+ IntegrationTestInitService.getInstance().init();
+ LicenseTestUtil.getLicense();
+ }
+
+ @Before
+ public void setUp() {
+ // Dual-write fans out to both clusters; a single-cluster profile would collide on the
+ // shared untagged site-search name (and cannot host both leaves), so skip there.
+ assumeFalse("Requires separate ES and OS clusters for dual-write", esSameAsOs());
+ router = APILocator.getSiteSearchAPI();
+ cleanupTestData();
+ setPhase(PHASE_DUAL_WRITE_ES_READS);
+ }
+
+ @After
+ public void tearDown() {
+ setPhase(null);
+ cleanupTestData();
+ }
+
+ // =======================================================================
+ // Tests
+ // =======================================================================
+
+ /**
+ * Given scenario: Phase 1 (dual-write). An index and a single document with {@code keywords}
+ * set are written through the router, fanning out to ES then OS on the same result instance.
+ * Expected: the document reaches OpenSearch (no {@code ClassCastException} on the OS leaf) and
+ * is searchable through the router's ES read path — proving the dual-write completed on both
+ * backends. {@code keywords} round-trips as a {@code List}.
+ */
+ @Test
+ public void test_dualWritePutToIndex_documentReachesBothBackends() throws Exception {
+ router.createSiteSearchIndex(IDX, null, 1);
+
+ final SiteSearchResult doc = new SiteSearchResult();
+ doc.setId(DOC_ID);
+ doc.setUrl("/ss-dualwrite-it/" + RUN_ID);
+ doc.setTitle("Dual-write Site Search IT " + RUN_ID);
+ doc.setMimeType("text/html");
+ doc.setContent("dotcms dual write roundtrip " + RUN_ID);
+ doc.setContentLength(doc.getContent().length());
+ // The exact Bug 1 trigger: keywords enters the map as a raw String. The first leaf in the
+ // fan-out rewrites it to a List; the second leaf must not see that mutation.
+ doc.getMap().put("keywords", "alpha, beta");
+
+ router.putToIndex(IDX, doc, "content");
+
+ // Bug 1 — OpenSearch must have received the document (unpatched: ClassCastException → null).
+ final SiteSearchResult fromOs = osSiteSearchAPI.getFromIndex(IDX, DOC_ID);
+ assertNotNull("Document must be retrievable from OpenSearch after dual-write", fromOs);
+ assertEquals("Document id must match in OpenSearch", DOC_ID, fromOs.getId());
+ assertEquals("keywords must round-trip as a trimmed list",
+ List.of("alpha", "beta"), fromOs.getKeywords());
+
+ // The dual-write also reached ES: in Phase 1 the router reads from ES.
+ final SiteSearchResults esRead = router.search(IDX, "roundtrip", 0, 10);
+ assertNull("ES read must not error: " + esRead.getError(), esRead.getError());
+ assertTrue("Document must be searchable via the router's ES read path",
+ esRead.getTotalResults() >= 1);
+
+ Logger.info(this, "✅ test_dualWritePutToIndex_documentReachesBothBackends passed");
+ }
+
+ /**
+ * Given scenario: Phase 1 (dual-write). A batch of documents is written through the
+ * {@code putToIndex(String, List, String)} router overload. This exercises the list fan-out
+ * path, where each provider must receive its own copy of every result.
+ * Expected: every document lands in OpenSearch.
+ */
+ @Test
+ public void test_dualWriteBatchPutToIndex_allDocumentsReachOpenSearch() throws Exception {
+ router.createSiteSearchIndex(IDX, null, 1);
+
+ final List docs = new ArrayList<>();
+ for (int i = 0; i < 3; i++) {
+ final SiteSearchResult doc = new SiteSearchResult();
+ doc.setId(DOC_ID + "-" + i);
+ doc.setUrl("/ss-dualwrite-batch/" + RUN_ID + "/" + i);
+ doc.setTitle("Batch doc " + i);
+ doc.setMimeType("text/html");
+ doc.setContent("dotcms dual write batch sample " + RUN_ID);
+ doc.setContentLength(doc.getContent().length());
+ doc.getMap().put("keywords", "kw" + i + ", shared");
+ docs.add(doc);
+ }
+
+ router.putToIndex(IDX, docs, "content");
+
+ for (int i = 0; i < 3; i++) {
+ final String id = DOC_ID + "-" + i;
+ assertNotNull("Batch document '" + id + "' must reach OpenSearch",
+ osSiteSearchAPI.getFromIndex(IDX, id));
+ }
+
+ Logger.info(this, "✅ test_dualWriteBatchPutToIndex_allDocumentsReachOpenSearch passed");
+ }
+
+ // =======================================================================
+ // Helpers
+ // =======================================================================
+
+ /**
+ * True when the ES and OS clients are configured against the same cluster endpoint (the
+ * single-cluster {@code opensearch-upgrade} profile). Mirrors the gate used by the core
+ * migration ITs.
+ */
+ private static boolean esSameAsOs() {
+ final String esEndpoint = Config.getStringProperty("DOT_ES_ENDPOINTS",
+ "http://localhost:9207");
+ final String osEndpoint = Config.getStringProperty("OS_ENDPOINTS",
+ "http://localhost:9201");
+ return esEndpoint.trim().equalsIgnoreCase(osEndpoint.trim());
+ }
+
+ private static void setPhase(final Integer ordinal) {
+ Config.setProperty(IndexConfigHelper.MigrationPhase.FLAG_KEY,
+ ordinal == null ? null : String.valueOf(ordinal));
+ }
+
+ private synchronized void cleanupTestData() {
+ try {
+ if (osIndexAPI.indexExists(IDX)) {
+ osIndexAPI.delete(IDX);
+ }
+ } catch (final Exception e) {
+ Logger.warn(this, "Cleanup: error removing OS index '" + IDX + "': " + e.getMessage());
+ }
+ // The dual-write create also lands an ES index; remove it directly on the ES cluster.
+ try {
+ final ESIndexAPI esIndex = ((IndexAPIImpl) APILocator.getESIndexAPI()).esImpl();
+ if (esIndex.indexExists(IDX)) {
+ esIndex.delete(IDX);
+ }
+ } catch (final Exception e) {
+ Logger.warn(this, "Cleanup: error removing ES index '" + IDX + "': " + e.getMessage());
+ }
+ try {
+ new DotConnect()
+ .setSQL("DELETE FROM indicies WHERE index_name LIKE ?")
+ .addParam("%" + SUFFIX + "%")
+ .loadResult();
+ APILocator.getVersionedIndicesAPI().clearCache();
+ } catch (final Exception e) {
+ Logger.warn(this, "Cleanup: error removing versioned DB rows: " + e.getMessage());
+ }
+ }
+}
From ef98534ca9967ed3dc6adc264c7105fd9554b2b9 Mon Sep 17 00:00:00 2001
From: fabrizzio-dotCMS
Date: Wed, 24 Jun 2026 09:22:47 -0600
Subject: [PATCH 7/9] fix(sitesearch): add dedicated os-sitesearch-mapping.json
for the OS index path
The OpenSearch site-search create path loaded its settings from
os-sitesearch-settings.json but reused es-sitesearch-mapping.json for the
mapping. The mapping is functionally OS-compatible (its analyzers exist in the
OS settings), but reading an es-*.json resource from the OS lifecycle couples
the two vendors: a future ES-only mapping change would silently alter OS.
Adds os-sitesearch-mapping.json (identical content today) and points
OSSiteSearchAPI.createSiteSearchIndex at it, mirroring the settings split so
ES and OS own their resources independently.
Co-Authored-By: Claude Opus 4.8 (1M context)
---
.../sitesearch/OSSiteSearchAPI.java | 14 +++--
.../main/resources/os-sitesearch-mapping.json | 62 +++++++++++++++++++
2 files changed, 71 insertions(+), 5 deletions(-)
create mode 100644 dotCMS/src/main/resources/os-sitesearch-mapping.json
diff --git a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java
index 3196bb222d3f..c0693692528b 100644
--- a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java
+++ b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java
@@ -406,13 +406,17 @@ public synchronized boolean createSiteSearchIndex(String indexName, final String
indexName = indexName.toLowerCase();
final ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
- // OpenSearch-format settings: the legacy es-sitesearch-settings.json uses ES-only token
- // filter syntax (e.g. edgeNGram / side) that the typed OpenSearch IndexSettings deserializer
- // rejects. os-sitesearch-settings.json declares the same analyzers (standard_content,
- // partial_content) in OpenSearch syntax. The mapping is vendor-neutral and is reused as-is.
+ // OpenSearch-format resources, kept separate from their es-*.json counterparts so the OS
+ // index lifecycle never depends on an ES-named file. Settings: the legacy
+ // es-sitesearch-settings.json uses ES-only token filter syntax (e.g. edgeNGram / side) that
+ // the typed OpenSearch IndexSettings deserializer rejects; os-sitesearch-settings.json
+ // declares the same analyzers (standard_content, partial_content) in OpenSearch syntax.
+ // The mapping is functionally identical to es-sitesearch-mapping.json today, but owning a
+ // dedicated os-sitesearch-mapping.json decouples the two vendors — a future ES mapping
+ // change cannot silently alter OS behaviour.
URL url = classLoader.getResource("os-sitesearch-settings.json");
final String settings = new String(com.liferay.util.FileUtil.getBytes(new File(url.getPath())));
- url = classLoader.getResource("es-sitesearch-mapping.json");
+ url = classLoader.getResource("os-sitesearch-mapping.json");
final String mapping = new String(com.liferay.util.FileUtil.getBytes(new File(url.getPath())));
try {
diff --git a/dotCMS/src/main/resources/os-sitesearch-mapping.json b/dotCMS/src/main/resources/os-sitesearch-mapping.json
new file mode 100644
index 000000000000..c4d2c28d0235
--- /dev/null
+++ b/dotCMS/src/main/resources/os-sitesearch-mapping.json
@@ -0,0 +1,62 @@
+{
+
+ "properties": {
+ "content": {
+ "type": "text",
+ "analyzer": "standard_content",
+ "term_vector":"with_positions_offsets",
+ "fields": {
+ "untouched": {
+ "type": "keyword",
+ "ignore_above": 8191,
+ "doc_values" : true
+ },
+ "ngram": {
+ "search_analyzer": "standard_content",
+ "analyzer": "partial_content",
+ "type": "text"
+ }
+ }
+ },
+ "host": {
+ "type": "keyword",
+ "doc_values" : true
+ },
+ "contentLength": {
+ "type": "long"
+ },
+ "uri": {
+ "type": "keyword",
+ "doc_values" : true
+ },
+ "url": {
+ "type": "keyword",
+ "doc_values" : true
+ },
+ "mimeType": {
+ "type": "keyword",
+ "doc_values" : true
+ },
+ "title": {
+ "type": "text"
+ },
+ "description": {
+ "type": "text"
+ },
+ "modified": {
+ "type": "date",
+ "doc_values" : true
+ },
+ "keywords": {
+ "type": "keyword",
+ "doc_values" : true
+ },
+ "language": {
+ "type": "long"
+ },
+ "author": {
+ "type": "text"
+ }
+ }
+
+}
From ba8e9645cf1d37f8c4ee6b35964811614ee02ced Mon Sep 17 00:00:00 2001
From: fabrizzio-dotCMS
Date: Wed, 24 Jun 2026 14:05:00 -0600
Subject: [PATCH 8/9] fix(sitesearch): harden OS write error handling + close
getMetadata gap + aggregation tests (#35786)
Address PR #36282 review feedback and aggregation test gaps:
- OSSiteSearchAPI.putToIndex/deleteFromIndex now PROPAGATE DotSearchException
instead of swallowing, so PhaseRouter can apply its per-phase policy
(Phase 3 OS-primary -> surfaced; shadow phases -> logged WARN by the router)
- setAlias returns true on success (was incorrectly false)
- add requireValidIndexName guard (null/blank, non-lowercase, OS-forbidden chars)
before interpolating the index name into the REST endpoint
- read os-sitesearch mapping/settings via getResourceAsStream (JAR-safe; was
new File(url.getPath()) which NPEs if missing and fails inside a JAR)
- lower per-doc put/delete logs from info to debug (Supplier form)
- close the getMetadata() equivalence gap on the neutral Aggregation record
(rollback-safe: the ES Aggregation interface exposes it too), populated from
both the ES and OpenSearch factories
- tests: date_histogram (ZonedDateTime->epoch-millis) coverage in
AggregationDomainTest + SiteSearchWebAPITest; strengthen the OS IT to assert
bucket content/keys/counts + nested top_hits on the fromOS path; index-name
validation IT cases
Co-Authored-By: Claude Opus 4.8 (1M context)
---
.../sitesearch/OSSiteSearchAPI.java | 92 ++++++++--
.../content/index/domain/Aggregation.java | 89 +++++++---
.../index/domain/AggregationDomainTest.java | 161 ++++++++++++++++++
.../OSSiteSearchAPIIntegrationTest.java | 126 ++++++++++++--
.../viewtool/SiteSearchWebAPITest.java | 84 +++++++++
5 files changed, 506 insertions(+), 46 deletions(-)
diff --git a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java
index c0693692528b..836b56b575ee 100644
--- a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java
+++ b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java
@@ -45,10 +45,9 @@
import com.dotmarketing.util.json.JSONObject;
import com.google.common.annotations.VisibleForTesting;
import io.vavr.control.Try;
-import java.io.File;
import java.io.IOException;
import java.io.InputStream;
-import java.net.URL;
+import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.time.Duration;
@@ -414,10 +413,10 @@ public synchronized boolean createSiteSearchIndex(String indexName, final String
// The mapping is functionally identical to es-sitesearch-mapping.json today, but owning a
// dedicated os-sitesearch-mapping.json decouples the two vendors — a future ES mapping
// change cannot silently alter OS behaviour.
- URL url = classLoader.getResource("os-sitesearch-settings.json");
- final String settings = new String(com.liferay.util.FileUtil.getBytes(new File(url.getPath())));
- url = classLoader.getResource("os-sitesearch-mapping.json");
- final String mapping = new String(com.liferay.util.FileUtil.getBytes(new File(url.getPath())));
+ // Read via getResourceAsStream so the index lifecycle works when these resources are packaged
+ // inside a JAR (new File(url.getPath()) only works for filesystem URLs and NPEs if missing).
+ final String settings = readResource(classLoader, "os-sitesearch-settings.json");
+ final String mapping = readResource(classLoader, "os-sitesearch-mapping.json");
try {
indexApi.createIndex(indexName, settings, shards);
@@ -444,6 +443,25 @@ public synchronized boolean createSiteSearchIndex(String indexName, final String
* aggregations such as {@code mimeType}). Forwarding to the same untagged physical name used by
* {@code createIndex}/search/put keeps the mapping on the index that is actually hit.
*/
+ /**
+ * Reads a UTF-8 classpath resource fully into a String via {@code getResourceAsStream}, so it
+ * resolves whether the resource sits on the filesystem or inside a packaged JAR. Throws a clear
+ * {@link DotSearchException} when the resource is absent rather than NPE-ing on a null URL.
+ */
+ private static String readResource(final ClassLoader classLoader, final String resource)
+ throws DotSearchException {
+ try (final InputStream in = classLoader.getResourceAsStream(resource)) {
+ if (in == null) {
+ throw new DotSearchException(
+ "Required OpenSearch site search resource not found on the classpath: " + resource);
+ }
+ return new String(in.readAllBytes(), StandardCharsets.UTF_8);
+ } catch (final IOException e) {
+ throw new DotSearchException(
+ "Error reading OpenSearch site search resource " + resource + ": " + e.getMessage(), e);
+ }
+ }
+
private void putMapping(final String indexName, final String mapping) throws DotSearchException {
final String endpoint = "/" + physicalName(indexName) + "/_mapping";
try (final Response response = clientProvider.getClient().generic()
@@ -475,7 +493,10 @@ public synchronized boolean setAlias(String indexName, final String alias) {
}
indexName = indexName.toLowerCase();
indexApi.createAlias(indexName, alias);
- return false;
+ // createAlias is void and throws on failure, so reaching here means the alias was created.
+ // (Legacy ESSiteSearchAPI returns false here, but its only caller — ESSiteSearchPublisher —
+ // ignores the result, so the divergence is benign; reporting success honestly is correct.)
+ return true;
}
/**
@@ -529,6 +550,7 @@ public void putToIndex(final String idx, final SiteSearchResult res, final Strin
if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) {
return;
}
+ requireValidIndexName(idx);
try {
if (res.getContentLength() == 0) {
return;
@@ -556,8 +578,8 @@ public void putToIndex(final String idx, final SiteSearchResult res, final Strin
res.setKeywords((String) res.getMap().get("keywords"));
}
- Logger.info(this.getClass(),
- "writing from : " + idx + " type: " + resultType + " url:" + res.getUrl());
+ Logger.debug(this.getClass(),
+ () -> "writing to index " + idx + " type: " + resultType + " url:" + res.getUrl());
final String json = new ESMappingAPIImpl().toJsonString(res.getMap());
final String endpoint = "/" + physicalName(idx) + "/_doc/" + res.getId();
@@ -570,12 +592,19 @@ public void putToIndex(final String idx, final SiteSearchResult res, final Strin
.build())) {
final int status = response.getStatus();
if (status < 200 || status >= 300) {
- Logger.error(this.getClass(), "putToIndex failed for doc " + res.getId()
- + " — HTTP " + status);
+ throw new DotSearchException("putToIndex failed for doc " + res.getId()
+ + " on index " + idx + " — HTTP " + status);
}
}
+ } catch (final DotSearchException e) {
+ // Already a neutral failure signal — never swallow it. Propagating lets the phase
+ // router apply its per-phase policy: in Phase 3 (OS is primary) the failure is
+ // re-thrown so the publishing pipeline observes the data loss; in the shadow phases
+ // (1/2, OS secondary) PhaseRouter swallows it and logs at WARN, so ES stays unaffected.
+ throw e;
} catch (final Exception e) {
- Logger.error(OSSiteSearchAPI.class, e.getMessage(), e);
+ throw new DotSearchException("putToIndex failed for doc " + res.getId()
+ + " on index " + idx + ": " + e.getMessage(), e);
}
}
@@ -615,8 +644,9 @@ public void deleteFromIndex(final String idx, final String docId) {
if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) {
return;
}
+ requireValidIndexName(idx);
try {
- Logger.info(this.getClass(), "deleting from : " + idx + " url:" + docId);
+ Logger.debug(this.getClass(), () -> "deleting doc " + docId + " from index " + idx);
final String endpoint = "/" + physicalName(idx) + "/_doc/" + docId;
try (final Response response = clientProvider.getClient().generic()
.execute(Requests.builder()
@@ -625,14 +655,17 @@ public void deleteFromIndex(final String idx, final String docId) {
.query(Map.of("refresh", "true"))
.build())) {
final int status = response.getStatus();
- // 404 is benign — the document was already absent.
+ // 404 is benign — the document was already absent (idempotent delete).
if (status >= 400 && status != 404) {
- Logger.error(this.getClass(), "deleteFromIndex failed for doc " + docId
- + " — HTTP " + status);
+ throw new DotSearchException("deleteFromIndex failed for doc " + docId
+ + " on index " + idx + " — HTTP " + status);
}
}
+ } catch (final DotSearchException e) {
+ throw e; // propagate; PhaseRouter applies the per-phase primary/shadow policy
} catch (final Exception e) {
- Logger.error(OSSiteSearchAPI.class, e.getMessage(), e);
+ throw new DotSearchException("deleteFromIndex failed for doc " + docId
+ + " on index " + idx + ": " + e.getMessage(), e);
}
}
@@ -747,6 +780,31 @@ private String physicalName(final String indexName) {
return indexApi.getNameWithClusterIDPrefix(indexName);
}
+ /** Characters OpenSearch forbids in an index name (plus the space). */
+ private static final java.util.regex.Pattern INVALID_INDEX_NAME_CHARS =
+ java.util.regex.Pattern.compile("[\\\\/*?\"<>|,# ]");
+
+ /**
+ * Guards a caller-supplied site-search index name before it is interpolated into an OpenSearch
+ * REST endpoint (e.g. {@code //_doc/}). Fails fast with a clear
+ * {@link IllegalArgumentException} on a null/blank name (the NPE risk raised in review) or a name
+ * carrying characters OpenSearch rejects, instead of letting a malformed name reach the cluster
+ * as a cryptic HTTP 400.
+ */
+ private static void requireValidIndexName(final String idx) {
+ if (UtilMethods.isNotSet(idx)) {
+ throw new IllegalArgumentException("Site search index name must not be null or blank");
+ }
+ if (!idx.equals(idx.toLowerCase(java.util.Locale.ROOT))) {
+ throw new IllegalArgumentException(
+ "Site search index name must be lowercase: `" + idx + "`");
+ }
+ if (INVALID_INDEX_NAME_CHARS.matcher(idx).find()) {
+ throw new IllegalArgumentException(
+ "Site search index name contains characters OpenSearch forbids: `" + idx + "`");
+ }
+ }
+
/**
* Resolves a site-search index name or alias to the backing index name, mirroring the
* alias-fallback in {@link ESSiteSearchAPI#getAggregations(String, String)}.
diff --git a/dotCMS/src/main/java/com/dotcms/content/index/domain/Aggregation.java b/dotCMS/src/main/java/com/dotcms/content/index/domain/Aggregation.java
index e94984f6ed3c..aa79dab4b7fd 100644
--- a/dotCMS/src/main/java/com/dotcms/content/index/domain/Aggregation.java
+++ b/dotCMS/src/main/java/com/dotcms/content/index/domain/Aggregation.java
@@ -7,6 +7,7 @@
import java.util.Map;
import java.util.stream.Collectors;
import javax.annotation.Nullable;
+import org.jetbrains.annotations.NotNull;
/**
* Vendor-neutral representation of a single named aggregation result.
@@ -23,36 +24,44 @@
* / {@link #fromOS(Map)} to build that map from a vendor response.
*
*
The components are named {@code getName} / {@code getType} / {@code getBuckets} / {@code getHits}
- * so the canonical record accessors are bean-style; this keeps {@code $results.aggregations..buckets}
- * (property access, resolved via {@code getBuckets()}) working from Velocity.
+ * / {@code getMetadata} so the canonical record accessors are bean-style; this keeps
+ * {@code $results.aggregations..buckets} (property access, resolved via {@code getBuckets()})
+ * working from Velocity.
*
*
Factory methods are the only places where vendor imports are allowed in this file.
*
- * @param getName the aggregation name as declared in the query (e.g. {@code content_types})
- * @param getType the vendor-reported aggregation type (e.g. {@code sterms}, {@code lterms},
- * {@code top_hits}); defaults to {@code unknown}
- * @param getBuckets buckets for multi-bucket ({@code terms}) aggregations; empty for metric aggregations
- * @param getHits hits for the {@code top_hits} metric aggregation; {@code null} for other types
+ * @param getName the aggregation name as declared in the query (e.g. {@code content_types})
+ * @param getType the vendor-reported aggregation type (e.g. {@code sterms}, {@code lterms},
+ * {@code top_hits}); defaults to {@code unknown}
+ * @param getBuckets buckets for multi-bucket ({@code terms}) aggregations; empty for metric aggregations
+ * @param getHits hits for the {@code top_hits} metric aggregation; {@code null} for other types
+ * @param getMetadata the optional {@code meta} object attached to the aggregation in the query;
+ * mirrors {@code org.elasticsearch.search.aggregations.Aggregation#getMetadata()}
+ * (and OpenSearch's {@code Aggregate#meta()}) so it survives a rollback to the ES
+ * type, which exposes the same accessor; empty map when no {@code meta} was set
* @see AggregationBucket
*/
public record Aggregation(
String getName,
String getType,
List getBuckets,
- @Nullable SearchHits getHits) implements Iterable {
+ @Nullable SearchHits getHits,
+ Map getMetadata) implements Iterable {
/**
- * Canonical constructor. {@code getType} defaults to {@code "unknown"} and {@code getBuckets}
- * to an empty list when {@code null} (mirrors the previous Immutables defaults).
+ * Canonical constructor. {@code getType} defaults to {@code "unknown"}, {@code getBuckets}
+ * to an empty list and {@code getMetadata} to an empty map when {@code null} (mirrors the
+ * previous Immutables defaults).
*/
public Aggregation {
getType = getType == null ? "unknown" : getType;
getBuckets = getBuckets == null ? Collections.emptyList() : getBuckets;
+ getMetadata = getMetadata == null ? Collections.emptyMap() : getMetadata;
}
/** Iterate the buckets directly: {@code #foreach($bucket in $agg)}. */
@Override
- public Iterator iterator() {
+ public @NotNull Iterator iterator() {
return getBuckets().iterator();
}
@@ -80,7 +89,8 @@ public static Map from(
private static Aggregation fromSingle(final org.elasticsearch.search.aggregations.Aggregation esAgg) {
final Builder builder = builder()
.name(esAgg.getName())
- .type(esAgg.getType());
+ .type(esAgg.getType())
+ .metadata(esAgg.getMetadata());
if (esAgg instanceof org.elasticsearch.search.aggregations.bucket.terms.Terms) {
final org.elasticsearch.search.aggregations.bucket.terms.Terms terms =
@@ -131,36 +141,71 @@ private static Aggregation fromSingleOS(final String name,
final Builder builder = builder().name(name);
if (agg.isSterms()) {
+ final org.opensearch.client.opensearch._types.aggregations.StringTermsAggregate sterms =
+ agg.sterms();
return builder.type("sterms")
- .buckets(agg.sterms().buckets().array().stream()
+ .metadata(fromOSMeta(sterms.meta()))
+ .buckets(sterms.buckets().array().stream()
.map(AggregationBucket::fromOS)
.collect(Collectors.toList()))
.build();
} else if (agg.isLterms()) {
+ final org.opensearch.client.opensearch._types.aggregations.LongTermsAggregate lterms =
+ agg.lterms();
return builder.type("lterms")
- .buckets(agg.lterms().buckets().array().stream()
+ .metadata(fromOSMeta(lterms.meta()))
+ .buckets(lterms.buckets().array().stream()
.map(AggregationBucket::fromOS)
.collect(Collectors.toList()))
.build();
} else if (agg.isDterms()) {
+ final org.opensearch.client.opensearch._types.aggregations.DoubleTermsAggregate dterms =
+ agg.dterms();
return builder.type("dterms")
- .buckets(agg.dterms().buckets().array().stream()
+ .metadata(fromOSMeta(dterms.meta()))
+ .buckets(dterms.buckets().array().stream()
.map(AggregationBucket::fromOS)
.collect(Collectors.toList()))
.build();
} else if (agg.isTopHits()) {
+ final org.opensearch.client.opensearch._types.aggregations.TopHitsAggregate topHits =
+ agg.topHits();
return builder.type("top_hits")
- .hits(SearchHits.from(agg.topHits().hits()))
+ .metadata(fromOSMeta(topHits.meta()))
+ .hits(SearchHits.from(topHits.hits()))
.build();
}
return null;
}
+ /**
+ * Converts an OpenSearch aggregation {@code meta} map ({@code Map}) into the
+ * neutral plain-Java {@code Map} so it matches the shape Elasticsearch already
+ * returns from {@code Aggregation#getMetadata()}. Each {@code JsonData} is unwrapped to its
+ * closest plain value (Map/List/String/Number/Boolean); if a value cannot be mapped it falls
+ * back to its raw JSON string rather than failing the whole aggregation.
+ */
+ private static Map fromOSMeta(
+ final Map osMeta) {
+ if (osMeta == null || osMeta.isEmpty()) {
+ return Collections.emptyMap();
+ }
+ final Map meta = new LinkedHashMap<>();
+ for (final Map.Entry entry : osMeta.entrySet()) {
+ try {
+ meta.put(entry.getKey(), entry.getValue().to(Object.class));
+ } catch (final RuntimeException cannotMap) {
+ meta.put(entry.getKey(), entry.getValue().toJson().toString());
+ }
+ }
+ return meta;
+ }
+
/**
* Fluent builder for {@link Aggregation}. An unset {@code type} defaults to {@code "unknown"},
- * unset {@code buckets} to an empty list and {@code hits} to {@code null}, preserving the
- * lenient behaviour of the former Immutables builder.
+ * unset {@code buckets} to an empty list, {@code hits} to {@code null} and {@code metadata} to
+ * an empty map, preserving the lenient behaviour of the former Immutables builder.
*/
public static final class Builder {
@@ -168,6 +213,7 @@ public static final class Builder {
private String type;
private List buckets = Collections.emptyList();
private SearchHits hits;
+ private Map metadata = Collections.emptyMap();
public Builder name(final String name) {
this.name = name;
@@ -189,8 +235,13 @@ public Builder hits(final SearchHits hits) {
return this;
}
+ public Builder metadata(final Map metadata) {
+ this.metadata = metadata;
+ return this;
+ }
+
public Aggregation build() {
- return new Aggregation(name, type, buckets, hits);
+ return new Aggregation(name, type, buckets, hits, metadata);
}
}
}
diff --git a/dotCMS/src/test/java/com/dotcms/content/index/domain/AggregationDomainTest.java b/dotCMS/src/test/java/com/dotcms/content/index/domain/AggregationDomainTest.java
index a26ba95743d0..218061c4443b 100644
--- a/dotCMS/src/test/java/com/dotcms/content/index/domain/AggregationDomainTest.java
+++ b/dotCMS/src/test/java/com/dotcms/content/index/domain/AggregationDomainTest.java
@@ -5,12 +5,20 @@
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.doReturn;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.datatype.guava.GuavaModule;
+import java.time.ZoneOffset;
+import java.time.ZonedDateTime;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
+import org.elasticsearch.search.aggregations.Aggregations;
+import org.elasticsearch.search.aggregations.bucket.histogram.Histogram;
+import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.junit.Test;
/**
@@ -132,4 +140,157 @@ public void aggregationBucket_nestedSubAggregations_areReachable() {
assertNotNull(bucket.getAggregations().get("top_content"));
assertEquals("top_hits", bucket.getAggregations().get("top_content").getType());
}
+
+ // =========================================================================
+ // Elasticsearch factory conversion (Aggregation.from / AggregationBucket.from*)
+ // =========================================================================
+ //
+ // These exercise the vendor-specific ES → neutral conversion deterministically (no search
+ // engine, no container): the mocked ES aggregation objects mirror exactly what the live ES
+ // client hands the factories, so the conversion is locked down here and not only end-to-end.
+
+ /**
+ * A {@code date_histogram} bucket key is a {@link ZonedDateTime} in Elasticsearch 7.x — NOT a
+ * number. {@link AggregationBucket#fromHistogram} must normalize it to epoch-millis so that
+ * {@code getKeyAsNumber()} (and the legacy {@code InternalWrapperCountDateHistogramFacet} that
+ * reads it) returns a real timestamp rather than null/0. This is the trickiest branch of the
+ * neutral conversion and the one with no obvious end-to-end equivalent, so it is pinned here.
+ */
+ @Test
+ public void esFactory_dateHistogram_normalizesZonedDateTimeKeyToEpochMillis() {
+ final ZonedDateTime day = ZonedDateTime.of(2024, 1, 15, 0, 0, 0, 0, ZoneOffset.UTC);
+ final long expectedEpochMillis = day.toInstant().toEpochMilli();
+
+ final Aggregations emptySubAggs = emptyEsAggregations();
+ final Histogram.Bucket bucket = mock(Histogram.Bucket.class);
+ when(bucket.getKey()).thenReturn(day); // ES 7.x date-histogram key type
+ when(bucket.getDocCount()).thenReturn(4L);
+ when(bucket.getAggregations()).thenReturn(emptySubAggs);
+
+ final Histogram histogram = mock(Histogram.class);
+ when(histogram.getName()).thenReturn("by_day");
+ when(histogram.getType()).thenReturn("date_histogram");
+ doReturn(List.of(bucket)).when(histogram).getBuckets();
+
+ final Aggregations esAggs = mock(Aggregations.class);
+ when(esAggs.asList()).thenReturn(List.of(histogram));
+
+ final Aggregation byDay = Aggregation.from(esAggs).get("by_day");
+ assertNotNull("date_histogram aggregation must be mapped", byDay);
+ assertEquals("date_histogram", byDay.getType());
+ assertEquals("one bucket expected", 1, byDay.getBuckets().size());
+
+ final AggregationBucket b = byDay.getBuckets().get(0);
+ assertEquals("doc count must round-trip", 4L, b.getDocCount());
+ assertEquals("a ZonedDateTime key must become epoch-millis, not a formatted date",
+ expectedEpochMillis, b.getKeyAsNumber().longValue());
+ assertEquals("getKeyAsString must expose the same epoch-millis",
+ String.valueOf(expectedEpochMillis), b.getKeyAsString());
+ }
+
+ /**
+ * A numeric {@code histogram} bucket key is a {@link Number} (a {@code Double} in ES); the
+ * conversion must take the {@code longValue()} branch of {@code histogramKey} and yield that
+ * number as the key.
+ */
+ @Test
+ public void esFactory_numericHistogram_normalizesNumberKeyToLong() {
+ final Aggregations emptySubAggs = emptyEsAggregations();
+ final Histogram.Bucket bucket = mock(Histogram.Bucket.class);
+ when(bucket.getKey()).thenReturn(Double.valueOf(50.0)); // ES numeric-histogram key type
+ when(bucket.getDocCount()).thenReturn(2L);
+ when(bucket.getAggregations()).thenReturn(emptySubAggs);
+
+ final Histogram histogram = mock(Histogram.class);
+ when(histogram.getName()).thenReturn("by_len");
+ when(histogram.getType()).thenReturn("histogram");
+ doReturn(List.of(bucket)).when(histogram).getBuckets();
+
+ final Aggregations esAggs = mock(Aggregations.class);
+ when(esAggs.asList()).thenReturn(List.of(histogram));
+
+ final AggregationBucket b = Aggregation.from(esAggs).get("by_len").getBuckets().get(0);
+ assertEquals("a numeric key must be preserved as a long", 50L, b.getKeyAsNumber().longValue());
+ assertEquals("50", b.getKeyAsString());
+ }
+
+ /**
+ * A {@code terms} aggregation maps every bucket through {@link AggregationBucket#from}: the
+ * String key round-trips on {@code getKey()}/{@code getKeyAsString()}, a non-numeric key yields
+ * a null number, doc counts survive, and a metric-less terms aggregation carries no top-hits.
+ */
+ @Test
+ public void esFactory_terms_mapsBucketsAndIsHitsFree() {
+ final Aggregations emptySubAggs = emptyEsAggregations();
+ final Terms.Bucket esBucket = mock(Terms.Bucket.class);
+ when(esBucket.getKeyAsString()).thenReturn("text/html");
+ when(esBucket.getDocCount()).thenReturn(3L);
+ when(esBucket.getAggregations()).thenReturn(emptySubAggs);
+
+ final Terms terms = mock(Terms.class);
+ when(terms.getName()).thenReturn("by_mime");
+ when(terms.getType()).thenReturn("sterms");
+ doReturn(List.of(esBucket)).when(terms).getBuckets();
+
+ final Aggregations esAggs = mock(Aggregations.class);
+ when(esAggs.asList()).thenReturn(List.of(terms));
+
+ final Aggregation byMime = Aggregation.from(esAggs).get("by_mime");
+ assertNotNull(byMime);
+ assertEquals("sterms", byMime.getType());
+ assertNull("a terms aggregation carries no top-hits", byMime.getHits());
+ assertEquals(1, byMime.getBuckets().size());
+
+ final AggregationBucket b = byMime.getBuckets().get(0);
+ assertEquals("text/html", b.getKey());
+ assertEquals("text/html", b.getKeyAsString());
+ assertNull("a non-numeric key must yield a null number", b.getKeyAsNumber());
+ assertEquals(3L, b.getDocCount());
+ assertTrue("no nested sub-aggregations here", b.getAggregations().isEmpty());
+ }
+
+ /** A null Elasticsearch aggregation set maps to an empty tree rather than throwing. */
+ @Test
+ public void esFactory_nullAggregations_yieldEmptyMap() {
+ assertTrue("null ES aggregations must map to an empty tree",
+ Aggregation.from((Aggregations) null).isEmpty());
+ }
+
+ /**
+ * The {@code meta} object set on an aggregation in the query is preserved on the neutral type
+ * via {@code getMetadata()} — closing the last equivalence gap with the ES {@code Aggregation}
+ * interface ({@code getName}/{@code getType}/{@code getMetadata}). This accessor is rollback-safe
+ * because the ES type exposes the same method, so a template adopting {@code $agg.metadata}
+ * resolves on both N (neutral) and N-1 (ES).
+ */
+ @Test
+ public void esFactory_metadata_isPreserved() {
+ final Map meta = Map.of("unit", "days", "version", 2);
+ final Terms terms = mock(Terms.class);
+ when(terms.getName()).thenReturn("by_day");
+ when(terms.getType()).thenReturn("sterms");
+ when(terms.getMetadata()).thenReturn(meta);
+ doReturn(List.of()).when(terms).getBuckets();
+
+ final Aggregations esAggs = mock(Aggregations.class);
+ when(esAggs.asList()).thenReturn(List.of(terms));
+
+ assertEquals("the aggregation meta map must round-trip from ES",
+ meta, Aggregation.from(esAggs).get("by_day").getMetadata());
+ }
+
+ /** {@code getMetadata()} is never null — it defaults to an empty map when no meta was set. */
+ @Test
+ public void aggregation_metadata_defaultsToEmptyWhenUnset() {
+ final Aggregation agg = Aggregation.builder().name("x").type("sterms").build();
+ assertNotNull("metadata must never be null", agg.getMetadata());
+ assertTrue("metadata defaults to empty when unset", agg.getMetadata().isEmpty());
+ }
+
+ /** An empty (but non-null) Elasticsearch aggregation set whose buckets carry no sub-aggs. */
+ private static Aggregations emptyEsAggregations() {
+ final Aggregations aggs = mock(Aggregations.class);
+ when(aggs.asList()).thenReturn(List.of());
+ return aggs;
+ }
}
diff --git a/dotcms-integration/src/test/java/com/dotcms/content/index/opensearch/OSSiteSearchAPIIntegrationTest.java b/dotcms-integration/src/test/java/com/dotcms/content/index/opensearch/OSSiteSearchAPIIntegrationTest.java
index 631212d287a6..3e164f612cc1 100644
--- a/dotcms-integration/src/test/java/com/dotcms/content/index/opensearch/OSSiteSearchAPIIntegrationTest.java
+++ b/dotcms-integration/src/test/java/com/dotcms/content/index/opensearch/OSSiteSearchAPIIntegrationTest.java
@@ -9,6 +9,8 @@
import com.dotcms.DataProviderWeldRunner;
import com.dotcms.IntegrationTestBase;
import com.dotcms.content.index.domain.Aggregation;
+import com.dotcms.content.index.domain.AggregationBucket;
+import com.dotcms.content.index.domain.SearchHit;
import com.dotcms.enterprise.publishing.sitesearch.OSSiteSearchAPI;
import com.dotcms.enterprise.publishing.sitesearch.SiteSearchResult;
import com.dotcms.enterprise.publishing.sitesearch.SiteSearchResults;
@@ -20,6 +22,7 @@
import com.dotmarketing.util.json.JSONObject;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import java.util.UUID;
import javax.enterprise.context.ApplicationScoped;
import javax.inject.Inject;
@@ -174,20 +177,25 @@ public void test_putGetSearchDelete_documentRoundTrip() throws Exception {
}
/**
- * Given scenario: an index holding a few documents that share a common term.
- * Expected: a terms aggregation query returns a non-null aggregation tree keyed by the
- * aggregation name.
+ * Given scenario: an index holding 3 html + 2 pdf documents.
+ * Expected: a terms aggregation on {@code mimeType} maps through the OpenSearch
+ * {@code fromOS(StringTermsBucket)} factory to a neutral {@link Aggregation} with one bucket per
+ * mimeType — correct keys, doc counts, {@code getKeyAsString} mirroring {@code getKey}, a null
+ * numeric key for the non-numeric mimeType, and no top-hits — so the OS path produces the same
+ * neutral shape the ES path does (not merely a non-null map).
*/
@Test
- public void test_getAggregations_shouldReturnBuckets() throws Exception {
+ public void test_getAggregations_termsBucketsHaveCorrectKeysAndCounts() throws Exception {
osSiteSearchAPI.createSiteSearchIndex(IDX_ONE, null, 1);
- for (int i = 0; i < 3; i++) {
+ final int htmlDocs = 3;
+ final int pdfDocs = 2;
+ for (int i = 0; i < htmlDocs + pdfDocs; i++) {
final SiteSearchResult doc = new SiteSearchResult();
doc.setId(DOC_ID + "-" + i);
doc.setUrl("/agg/" + RUN_ID + "/" + i);
doc.setTitle("Aggregation doc " + i);
- doc.setMimeType("text/html");
+ doc.setMimeType(i < htmlDocs ? "text/html" : "application/pdf");
doc.setContent("aggregation bucket sample " + RUN_ID);
doc.setContentLength(doc.getContent().length());
osSiteSearchAPI.putToIndex(IDX_ONE, doc, "content");
@@ -197,16 +205,114 @@ public void test_getAggregations_shouldReturnBuckets() throws Exception {
.put("size", 0)
.put("aggs", new JSONObject().put("by_mime",
new JSONObject().put("terms",
- new JSONObject().put("field", "mimeType")))).toString();
+ new JSONObject().put("field", "mimeType").put("size", 10)))).toString();
final Map aggregations =
osSiteSearchAPI.getAggregations(IDX_ONE, aggQuery);
assertNotNull("Aggregations map must not be null", aggregations);
- assertTrue("Aggregation 'by_mime' must be present", aggregations.containsKey("by_mime"));
+ final Aggregation byMime = aggregations.get("by_mime");
+ assertNotNull("Aggregation 'by_mime' must be present", byMime);
+ assertEquals("aggregation name must round-trip", "by_mime", byMime.getName());
+ assertNull("a terms aggregation carries no top-hits", byMime.getHits());
+ assertEquals("there must be one bucket per mimeType", 2, byMime.getBuckets().size());
+
+ final Set expectedMimes = Set.of("text/html", "application/pdf");
+ long htmlCount = -1;
+ long pdfCount = -1;
+ for (final AggregationBucket bucket : byMime.getBuckets()) {
+ assertTrue("bucket key must be a known mimeType", expectedMimes.contains(bucket.getKey()));
+ assertEquals("getKeyAsString must mirror getKey", bucket.getKey(), bucket.getKeyAsString());
+ assertNull("a non-numeric key must yield a null number", bucket.getKeyAsNumber());
+ assertTrue("each bucket must carry documents", bucket.getDocCount() > 0);
+ if ("text/html".equals(bucket.getKey())) {
+ htmlCount = bucket.getDocCount();
+ } else if ("application/pdf".equals(bucket.getKey())) {
+ pdfCount = bucket.getDocCount();
+ }
+ }
+ assertEquals("html bucket must count the html docs", htmlDocs, htmlCount);
+ assertEquals("pdf bucket must count the pdf docs", pdfDocs, pdfCount);
+
+ Logger.info(this, "✅ test_getAggregations_termsBucketsHaveCorrectKeysAndCounts passed");
+ }
- Logger.info(this, "✅ test_getAggregations_shouldReturnBuckets passed – keys: "
- + aggregations.keySet());
+ /**
+ * Given scenario: a terms aggregation with a nested {@code top_hits} sub-aggregation.
+ * Expected: the OpenSearch path preserves the nested {@code top_docs} as a neutral
+ * {@link Aggregation} carrying {@link SearchHit}s (each with an id and a non-empty source),
+ * reachable via {@code bucket.getAggregations()} — exercising
+ * {@code AggregationBucket.fromOS} sub-aggregation nesting and {@code SearchHits.from(OS hits)},
+ * which the terms-only test does not reach.
+ */
+ @Test
+ public void test_getAggregations_nestedTopHits_preservedOnOpenSearchPath() throws Exception {
+ osSiteSearchAPI.createSiteSearchIndex(IDX_ONE, null, 1);
+
+ for (int i = 0; i < 3; i++) {
+ final SiteSearchResult doc = new SiteSearchResult();
+ doc.setId(DOC_ID + "-th-" + i);
+ doc.setUrl("/agg-th/" + RUN_ID + "/" + i);
+ doc.setTitle("Top hits doc " + i);
+ doc.setMimeType("text/html");
+ doc.setContent("top hits nested sample " + RUN_ID);
+ doc.setContentLength(doc.getContent().length());
+ osSiteSearchAPI.putToIndex(IDX_ONE, doc, "content");
+ }
+
+ final String aggQuery = new JSONObject()
+ .put("size", 0)
+ .put("aggs", new JSONObject().put("by_mime", new JSONObject()
+ .put("terms", new JSONObject().put("field", "mimeType").put("size", 10))
+ .put("aggs", new JSONObject().put("top_docs",
+ new JSONObject().put("top_hits",
+ new JSONObject().put("size", 2)))))).toString();
+
+ final Map aggregations =
+ osSiteSearchAPI.getAggregations(IDX_ONE, aggQuery);
+
+ final Aggregation byMime = aggregations.get("by_mime");
+ assertNotNull("'by_mime' aggregation must be present", byMime);
+ assertFalse("'by_mime' must have buckets", byMime.getBuckets().isEmpty());
+
+ final AggregationBucket firstBucket = byMime.getBuckets().get(0);
+ final Aggregation topDocs = firstBucket.getAggregations().get("top_docs");
+ assertNotNull("nested top_hits sub-aggregation must be preserved on the OS path", topDocs);
+ assertNotNull("top_hits must carry a SearchHits container", topDocs.getHits());
+
+ final List hits = topDocs.getHits().getHits();
+ assertFalse("top_hits must carry at least one hit", hits.isEmpty());
+ final SearchHit hit = hits.get(0);
+ assertNotNull("each top-hit must expose an id", hit.getId());
+ assertFalse("each top-hit must expose its source document", hit.getSourceAsMap().isEmpty());
+
+ Logger.info(this, "✅ test_getAggregations_nestedTopHits_preservedOnOpenSearchPath passed – "
+ + "hits: " + hits.size());
+ }
+
+ /**
+ * Given scenario: a document write targeting an index name carrying characters OpenSearch
+ * forbids.
+ * Expected: putToIndex fails fast with an IllegalArgumentException (the malformed name never
+ * reaches the cluster as a cryptic HTTP 400).
+ */
+ @Test(expected = IllegalArgumentException.class)
+ public void test_putToIndex_invalidIndexName_throwsFast() {
+ final SiteSearchResult doc = new SiteSearchResult();
+ doc.setId(DOC_ID);
+ doc.setContent("x");
+ doc.setContentLength(1);
+ osSiteSearchAPI.putToIndex("Invalid Name/With*Chars", doc, "content");
+ }
+
+ /**
+ * Given scenario: a delete targeting a blank index name.
+ * Expected: deleteFromIndex fails fast with an IllegalArgumentException rather than NPE-ing on
+ * the null/blank name.
+ */
+ @Test(expected = IllegalArgumentException.class)
+ public void test_deleteFromIndex_blankIndexName_throwsFast() {
+ osSiteSearchAPI.deleteFromIndex(" ", DOC_ID);
}
// =======================================================================
diff --git a/dotcms-integration/src/test/java/com/dotmarketing/sitesearch/viewtool/SiteSearchWebAPITest.java b/dotcms-integration/src/test/java/com/dotmarketing/sitesearch/viewtool/SiteSearchWebAPITest.java
index 215d94a58fa2..63e3b79bc6b6 100644
--- a/dotcms-integration/src/test/java/com/dotmarketing/sitesearch/viewtool/SiteSearchWebAPITest.java
+++ b/dotcms-integration/src/test/java/com/dotmarketing/sitesearch/viewtool/SiteSearchWebAPITest.java
@@ -22,6 +22,7 @@
import com.dotmarketing.sitesearch.viewtool.SiteSearchWebAPI.InternalWrapperCountDateHistogramFacet;
import com.dotmarketing.sitesearch.viewtool.SiteSearchWebAPI.InternalWrapperStringTermsFacet;
import com.dotmarketing.util.Logger;
+import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -92,6 +93,17 @@ public class SiteSearchWebAPITest extends IntegrationTestBase {
"{\"size\":0,\"aggs\":{\"by_len\":{\"histogram\":{\"field\":\"contentLength\","
+ "\"interval\":25}}}}";
+ /** Date histogram over the {@code modified} field — one bucket per UTC day. */
+ private static final String DATE_HISTO_AGG =
+ "{\"size\":0,\"aggs\":{\"by_day\":{\"date_histogram\":{\"field\":\"modified\","
+ + "\"calendar_interval\":\"day\"}}}}";
+
+ /** One day in millis, used to spread each doc's {@code modified} date into its own day bucket. */
+ private static final long DAY_MILLIS = 86_400_000L;
+
+ /** Fixed UTC-midnight base (2024-01-15T00:00:00Z) so the date-histogram keys are deterministic. */
+ private static final long BASE_MODIFIED = 1_705_276_800_000L;
+
/** Query matches no doc, so the terms aggregation comes back with empty buckets. */
private static final String EMPTY_AGG =
"{\"size\":0,\"query\":{\"term\":{\"mimeType\":\"zzz/none\"}},"
@@ -124,6 +136,9 @@ public static void prepare() throws Exception {
doc.setContent("dotcms site search viewtool integration " + TOKEN
+ " ".repeat(i * 30));
doc.setContentLength(doc.getContent().length());
+ // Distinct UTC-midnight day per doc so the date histogram on `modified` yields one
+ // populated bucket per doc (exercises the ZonedDateTime -> epoch-millis key conversion).
+ doc.setModified(new Date(BASE_MODIFIED + (i * DAY_MILLIS)));
siteSearchAPI.putToIndex(IDX, doc, "content");
}
}
@@ -399,6 +414,43 @@ public void getAggregations_numericHistogram_keyAsNumber() throws Exception {
Logger.info(this, "✅ getAggregations_numericHistogram_keyAsNumber passed");
}
+ /**
+ * Given scenario: a {@code date_histogram} on the {@code modified} date field.
+ * Expected: each bucket key is normalized to a numeric epoch-millis timestamp (the ES
+ * {@code date_histogram} key is a {@code ZonedDateTime} under the hood), so
+ * {@link AggregationBucket#getKeyAsNumber()} returns a real timestamp and {@code getKeyAsString}
+ * mirrors it — covering the date-histogram key path, which is distinct from the numeric-histogram
+ * one and was otherwise untested end-to-end.
+ */
+ @Test
+ public void getAggregations_dateHistogram_keyAsEpochMillis() throws Exception {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final Map aggregations = tool.getAggregations(IDX, DATE_HISTO_AGG);
+ final Aggregation byDay = aggregations.get("by_day");
+ assertNotNull("'by_day' date-histogram aggregation must be present", byDay);
+ assertTrue("date_histogram type must be reported as a histogram",
+ byDay.getType().contains("histogram"));
+ assertFalse("date histogram must produce buckets", byDay.getBuckets().isEmpty());
+
+ long totalDocs = 0;
+ for (final AggregationBucket bucket : byDay.getBuckets()) {
+ final Number key = bucket.getKeyAsNumber();
+ // The crux: a ZonedDateTime key must surface as numeric epoch-millis here, NOT a
+ // formatted date string (which would make getKeyAsNumber return null).
+ assertNotNull("a date-histogram bucket key must be numeric (epoch-millis)", key);
+ assertTrue("the key must be a real epoch-millis timestamp (>= the base UTC day)",
+ key.longValue() >= BASE_MODIFIED);
+ assertEquals("getKeyAsString must mirror the numeric epoch-millis",
+ String.valueOf(key.longValue()), bucket.getKeyAsString());
+ totalDocs += bucket.getDocCount();
+ }
+ assertEquals("every indexed doc must fall into exactly one day bucket", TOTAL_DOCS, totalDocs);
+
+ Logger.info(this, "✅ getAggregations_dateHistogram_keyAsEpochMillis passed – buckets: "
+ + byDay.getBuckets().size());
+ }
+
// =========================================================================
// getFacets — legacy wrapper coverage (terms / histogram / plain)
// =========================================================================
@@ -470,6 +522,38 @@ public void getFacets_histogramAggregation_wrapsAsCountHistogramFacet() throws E
Logger.info(this, "✅ getFacets_histogramAggregation_wrapsAsCountHistogramFacet passed");
}
+ /**
+ * Given scenario: a {@code date_histogram} on the {@code modified} field.
+ * Expected: getFacets wraps it as an {@link InternalWrapperCountDateHistogramFacet} whose
+ * CountEntry rows carry the day's epoch-millis as {@code time} — proving the legacy date-facet
+ * path (which reads {@code getKeyAsNumber().longValue()}) surfaces a real timestamp rather than
+ * the {@code 0L} fallback used when the key fails to parse as a number.
+ */
+ @Test
+ public void getFacets_dateHistogram_exposesEpochMillisTime() throws Exception {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final Map facets = tool.getFacets(IDX, DATE_HISTO_AGG);
+ final Facet facet = facets.get("by_day");
+ assertNotNull("'by_day' facet must be present", facet);
+ assertTrue("a date histogram must map to InternalWrapperCountDateHistogramFacet",
+ facet instanceof InternalWrapperCountDateHistogramFacet);
+
+ final InternalWrapperCountDateHistogramFacet histoFacet =
+ (InternalWrapperCountDateHistogramFacet) facet;
+ assertFalse("date histogram facet must expose count entries", histoFacet.entries().isEmpty());
+
+ long totalCount = 0;
+ for (final var entry : histoFacet.entries()) {
+ assertTrue("each entry time must be a real epoch-millis (>= the base UTC day), not the "
+ + "0L parse-failure fallback", entry.getTime() >= BASE_MODIFIED);
+ totalCount += entry.getCount();
+ }
+ assertEquals("every indexed doc must be counted across the day entries", TOTAL_DOCS, totalCount);
+
+ Logger.info(this, "✅ getFacets_dateHistogram_exposesEpochMillisTime passed");
+ }
+
/**
* Given scenario: a terms aggregation whose query matches no document (empty buckets).
* Expected: getFacets falls back to a plain {@link Facet} (neither wrapper), still exposing
From 86bf0fbb448922677feeeb46e1218726c96ea152 Mon Sep 17 00:00:00 2001
From: fabrizzio-dotCMS
Date: Wed, 24 Jun 2026 15:10:15 -0600
Subject: [PATCH 9/9] fix(sitesearch): preserve top_hits _source on the
OpenSearch aggregation path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
SearchHit.from(OpenSearch Hit) dropped the document source for nested top_hits:
TopHitsAggregate hits are HitsMetadata, so Hit.source() is a JsonData,
not a Map — it fell through to the empty-map fallback. Unwrap JsonData via
to(Map.class) so the _source survives. Fixes the deterministic failure of
OSSiteSearchAPIIntegrationTest.test_getAggregations_nestedTopHits_preservedOnOpenSearchPath
on the OpenSearch Upgrade Suite.
Co-Authored-By: Claude Opus 4.8 (1M context)
---
.../com/dotcms/content/index/domain/SearchHit.java | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/dotCMS/src/main/java/com/dotcms/content/index/domain/SearchHit.java b/dotCMS/src/main/java/com/dotcms/content/index/domain/SearchHit.java
index fa3dc06662d6..b4278390a21c 100644
--- a/dotCMS/src/main/java/com/dotcms/content/index/domain/SearchHit.java
+++ b/dotCMS/src/main/java/com/dotcms/content/index/domain/SearchHit.java
@@ -93,9 +93,18 @@ public static SearchHit from(org.opensearch.client.opensearch.core.search.Hit>
Object source = osHit.source();
if (source instanceof Map) {
sourceMap = (Map) source;
+ } else if (source instanceof org.opensearch.client.json.JsonData) {
+ // top_hits aggregation hits carry their _source as JsonData (HitsMetadata),
+ // not a Map — unwrap it so the document survives the conversion instead of being dropped.
+ Map unwrapped;
+ try {
+ unwrapped = ((org.opensearch.client.json.JsonData) source).to(Map.class);
+ } catch (final RuntimeException cannotMap) {
+ unwrapped = null;
+ }
+ sourceMap = unwrapped != null ? unwrapped : Map.of();
} else {
- // If "source" is a typed object, we might need custom mapping logic here
- // For now, we'll create an empty map as fallback
+ // Unknown typed source — fall back to an empty map rather than failing the conversion.
sourceMap = Map.of();
}