diff --git a/.github/workflows/nightly-build.yml b/.github/workflows/nightly-build.yml index 081169966c3..30672080a6a 100644 --- a/.github/workflows/nightly-build.yml +++ b/.github/workflows/nightly-build.yml @@ -20,6 +20,7 @@ name: Nightly-build # It is a dependency of uxlfoundation/scikit-learn-intelex's `CI` GitHub action on: + push: schedule: - cron: '0 21 * * *' pull_request: @@ -49,7 +50,6 @@ env: jobs: build_lnx: name: oneDAL Linux nightly build - if: github.repository == 'uxlfoundation/oneDAL' runs-on: ubuntu-24.04 timeout-minutes: 120 @@ -85,7 +85,6 @@ jobs: build_win: name: oneDAL Windows nightly build - if: github.repository == 'uxlfoundation/oneDAL' runs-on: windows-2025 timeout-minutes: 180 diff --git a/cpp/daal/include/algorithms/dbscan/dbscan_types.h b/cpp/daal/include/algorithms/dbscan/dbscan_types.h index bcbdb6cfa18..918c040310a 100644 --- a/cpp/daal/include/algorithms/dbscan/dbscan_types.h +++ b/cpp/daal/include/algorithms/dbscan/dbscan_types.h @@ -54,7 +54,9 @@ const int undefined = -2; */ enum Method { - defaultDense = 0 /*!< Default: performance-oriented method */ + defaultDense = 0, /*!< Default: brute-force performance-oriented method */ + kdTree = 1, /*!< K-d tree method: accelerated epsilon-neighborhood search */ + ballTree = 2 /*!< Ball tree method: hypersphere-based spatial partitioning */ }; /** diff --git a/cpp/daal/src/algorithms/dbscan/dbscan_ball_tree_batch_fpt_cpu.cpp b/cpp/daal/src/algorithms/dbscan/dbscan_ball_tree_batch_fpt_cpu.cpp new file mode 100644 index 00000000000..44fe16de9e2 --- /dev/null +++ b/cpp/daal/src/algorithms/dbscan/dbscan_ball_tree_batch_fpt_cpu.cpp @@ -0,0 +1,37 @@ +/* file: dbscan_ball_tree_batch_fpt_cpu.cpp */ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "src/algorithms/dbscan/dbscan_ball_tree_batch_impl.i" +#include "services/daal_defines.h" + +using namespace daal::internal; + +namespace daal +{ +namespace algorithms +{ +namespace dbscan +{ +namespace internal +{ + +template class DAAL_EXPORT DBSCANBatchKernel; + +} // namespace internal +} // namespace dbscan +} // namespace algorithms +} // namespace daal diff --git a/cpp/daal/src/algorithms/dbscan/dbscan_ball_tree_batch_impl.i b/cpp/daal/src/algorithms/dbscan/dbscan_ball_tree_batch_impl.i new file mode 100644 index 00000000000..8521e2b7dc7 --- /dev/null +++ b/cpp/daal/src/algorithms/dbscan/dbscan_ball_tree_batch_impl.i @@ -0,0 +1,421 @@ +/* file: dbscan_ball_tree_batch_impl.i */ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +/* + * DBSCAN implementation using a ball tree for accelerated epsilon-neighborhood queries. + * + * Ball tree uses hypersphere-bounded spatial partitioning, which is more robust + * in high dimensions than axis-aligned kd-tree splits. + * + * The approach: + * 1. Build a ball tree over the input data + * 2. For each point, find all neighbors within epsilon via tree range search + * 3. Mark core points (those with >= min_observations neighbors) + * 4. Expand clusters from core points using BFS with tree-accelerated queries + */ + +#include +#include +#include +#include +#include +#include + +#include "algorithms/dbscan/dbscan_types.h" +#include "src/algorithms/dbscan/dbscan_kernel.h" +#include "src/algorithms/dbscan/dbscan_utils.h" +#include "src/algorithms/service_error_handling.h" +#include "src/data_management/service_numeric_table.h" +#include "src/externals/service_memory.h" +#include "src/services/service_arrays.h" +#include "src/services/service_defines.h" +#include "src/threading/threading.h" + +namespace daal +{ +namespace algorithms +{ +namespace dbscan +{ +namespace internal +{ + +using daal::internal::ReadRows; +using daal::internal::WriteOnlyRows; +using daal::services::internal::TArray; + +template +struct DbscanBallNode +{ + int left; + int right; + int pointBegin; + int pointEnd; + int centerIdx; + FPType radius; +}; + +template +static FPType euclideanDist(const FPType * a, const FPType * b, int nCols) +{ + FPType sum = FPType(0); + for (int d = 0; d < nCols; d++) + { + const FPType diff = a[d] - b[d]; + sum += diff * diff; + } + return static_cast(sqrt(static_cast(sum))); +} + +template +static int buildDbscanBallTree(const FPType * data, int * pointIndices, int begin, int end, int nCols, DbscanBallNode * nodes, int & nextNode, + int maxLeafSize) +{ + const int nodeIdx = nextNode++; + DbscanBallNode & node = nodes[nodeIdx]; + node.pointBegin = begin; + node.pointEnd = end; + node.left = -1; + node.right = -1; + + const int count = end - begin; + + // Find diameter approximation: pick first, find farthest, find farthest from that + int pivot1 = pointIndices[begin]; + FPType bestDist = FPType(-1); + int pivot2 = pivot1; + for (int i = begin; i < end; i++) + { + const FPType d = euclideanDist(data + pivot1 * nCols, data + pointIndices[i] * nCols, nCols); + if (d > bestDist) + { + bestDist = d; + pivot2 = pointIndices[i]; + } + } + bestDist = FPType(-1); + int pivot3 = pivot2; + for (int i = begin; i < end; i++) + { + const FPType d = euclideanDist(data + pivot2 * nCols, data + pointIndices[i] * nCols, nCols); + if (d > bestDist) + { + bestDist = d; + pivot3 = pointIndices[i]; + } + } + + node.centerIdx = pivot2; + + // Compute radius + FPType maxR = FPType(0); + for (int i = begin; i < end; i++) + { + const FPType d = euclideanDist(data + node.centerIdx * nCols, data + pointIndices[i] * nCols, nCols); + if (d > maxR) maxR = d; + } + node.radius = maxR; + + if (count <= maxLeafSize) + { + return nodeIdx; + } + + // Split by distance to pivot2 vs pivot3 + const FPType * p2 = data + pivot2 * nCols; + const FPType * p3 = data + pivot3 * nCols; + + int mid = begin; + int hi = end - 1; + while (mid <= hi) + { + const FPType d2 = euclideanDist(data + pointIndices[mid] * nCols, p2, nCols); + const FPType d3 = euclideanDist(data + pointIndices[mid] * nCols, p3, nCols); + if (d2 <= d3) + mid++; + else + { + std::swap(pointIndices[mid], pointIndices[hi]); + hi--; + } + } + + if (mid == begin) mid = begin + 1; + if (mid == end) mid = end - 1; + + node.left = buildDbscanBallTree(data, pointIndices, begin, mid, nCols, nodes, nextNode, maxLeafSize); + node.right = buildDbscanBallTree(data, pointIndices, mid, end, nCols, nodes, nextNode, maxLeafSize); + + return nodeIdx; +} + +// Epsilon-range query on ball tree +template +static void ballTreeRangeQuery(const FPType * data, int nCols, const DbscanBallNode * nodes, const int * pointIndices, + const FPType * queryPoint, int nodeIdx, FPType epsilon, std::vector & result) +{ + const DbscanBallNode & node = nodes[nodeIdx]; + + // Prune: if closest point in ball is beyond epsilon + const FPType distToCenter = euclideanDist(queryPoint, data + node.centerIdx * nCols, nCols); + const FPType lowerBound = distToCenter - node.radius; + if (lowerBound > epsilon) return; + + if (node.left < 0) + { + // Leaf: check each point + for (int i = node.pointBegin; i < node.pointEnd; i++) + { + const int pi = pointIndices[i]; + const FPType * row = data + pi * nCols; + FPType distSq = FPType(0); + for (int d = 0; d < nCols; d++) + { + const FPType diff = queryPoint[d] - row[d]; + distSq += diff * diff; + } + if (distSq <= epsilon * epsilon) + { + result.push_back(pi); + } + } + return; + } + + // Visit nearer child first + const FPType dLeft = euclideanDist(queryPoint, data + nodes[node.left].centerIdx * nCols, nCols); + const FPType dRight = euclideanDist(queryPoint, data + nodes[node.right].centerIdx * nCols, nCols); + + if (dLeft <= dRight) + { + ballTreeRangeQuery(data, nCols, nodes, pointIndices, queryPoint, node.left, epsilon, result); + ballTreeRangeQuery(data, nCols, nodes, pointIndices, queryPoint, node.right, epsilon, result); + } + else + { + ballTreeRangeQuery(data, nCols, nodes, pointIndices, queryPoint, node.right, epsilon, result); + ballTreeRangeQuery(data, nCols, nodes, pointIndices, queryPoint, node.left, epsilon, result); + } +} + +// Count neighbors within epsilon on ball tree +template +static int ballTreeCountNeighbors(const FPType * data, int nCols, const DbscanBallNode * nodes, const int * pointIndices, + const FPType * queryPoint, int nodeIdx, FPType epsilon) +{ + const DbscanBallNode & node = nodes[nodeIdx]; + + const FPType distToCenter = euclideanDist(queryPoint, data + node.centerIdx * nCols, nCols); + const FPType lowerBound = distToCenter - node.radius; + if (lowerBound > epsilon) return 0; + + // If entire ball is within epsilon, count all points + if (distToCenter + node.radius <= epsilon) + { + return node.pointEnd - node.pointBegin; + } + + if (node.left < 0) + { + int cnt = 0; + for (int i = node.pointBegin; i < node.pointEnd; i++) + { + const int pi = pointIndices[i]; + const FPType * row = data + pi * nCols; + FPType distSq = FPType(0); + for (int d = 0; d < nCols; d++) + { + const FPType diff = queryPoint[d] - row[d]; + distSq += diff * diff; + } + if (distSq <= epsilon * epsilon) cnt++; + } + return cnt; + } + + return ballTreeCountNeighbors(data, nCols, nodes, pointIndices, queryPoint, node.left, epsilon) + + ballTreeCountNeighbors(data, nCols, nodes, pointIndices, queryPoint, node.right, epsilon); +} + +template +services::Status DBSCANBatchKernel::computeNoMemSave(const NumericTable * ntData, const NumericTable * ntWeights, + NumericTable * ntAssignments, NumericTable * ntNClusters, + NumericTable * ntCoreIndices, NumericTable * ntCoreObservations, + const Parameter * par) +{ + const size_t nRows = ntData->getNumberOfRows(); + const size_t nCols = ntData->getNumberOfColumns(); + const algorithmFPType epsilon = static_cast(par->epsilon); + const size_t minObservations = par->minObservations; + const int leafSize = 40; + + ReadRows dataRows(const_cast(ntData), 0, nRows); + DAAL_CHECK_BLOCK_STATUS(dataRows); + const algorithmFPType * data = dataRows.get(); + + // Allocate tree structures + const int maxNodes = 2 * static_cast(nRows) + 1; + TArray, cpu> nodesArr(maxNodes); + TArray pointIndicesArr(nRows); + DAAL_CHECK_MALLOC(nodesArr.get() && pointIndicesArr.get()); + + int * pointIndices = pointIndicesArr.get(); + for (size_t i = 0; i < nRows; i++) pointIndices[i] = static_cast(i); + + int nextNode = 0; + buildDbscanBallTree(data, pointIndices, 0, static_cast(nRows), static_cast(nCols), nodesArr.get(), nextNode, leafSize); + + // Phase 1: Identify core points + TArray isCoreArr(nRows); + DAAL_CHECK_MALLOC(isCoreArr.get()); + int * isCore = isCoreArr.get(); + + for (size_t i = 0; i < nRows; i++) + { + const algorithmFPType * queryPoint = data + i * nCols; + const int cnt = ballTreeCountNeighbors(data, static_cast(nCols), nodesArr.get(), pointIndices, queryPoint, 0, epsilon); + isCore[i] = (cnt >= static_cast(minObservations)) ? 1 : 0; + } + + // Phase 2: Cluster expansion via BFS + WriteOnlyRows assignRows(ntAssignments, 0, nRows); + DAAL_CHECK_BLOCK_STATUS(assignRows); + int * assignments = assignRows.get(); + for (size_t i = 0; i < nRows; i++) assignments[i] = undefined; + + int clusterId = 0; + std::vector queue; + std::vector neighbors; + + for (size_t i = 0; i < nRows; i++) + { + if (assignments[i] != undefined) continue; + if (!isCore[i]) + { + assignments[i] = noise; + continue; + } + + assignments[i] = clusterId; + neighbors.clear(); + ballTreeRangeQuery(data, static_cast(nCols), nodesArr.get(), pointIndices, data + i * nCols, 0, epsilon, neighbors); + + queue.clear(); + for (int nb : neighbors) + { + if (assignments[nb] == noise) + { + assignments[nb] = clusterId; + } + else if (assignments[nb] == undefined) + { + assignments[nb] = clusterId; + if (isCore[nb]) queue.push_back(nb); + } + } + + size_t qIdx = 0; + while (qIdx < queue.size()) + { + const int pt = queue[qIdx++]; + neighbors.clear(); + ballTreeRangeQuery(data, static_cast(nCols), nodesArr.get(), pointIndices, data + pt * nCols, 0, epsilon, neighbors); + + for (int nb : neighbors) + { + if (assignments[nb] == noise) + { + assignments[nb] = clusterId; + } + else if (assignments[nb] == undefined) + { + assignments[nb] = clusterId; + if (isCore[nb]) queue.push_back(nb); + } + } + } + + clusterId++; + } + + // Write cluster count + WriteOnlyRows ncRows(ntNClusters, 0, 1); + DAAL_CHECK_BLOCK_STATUS(ncRows); + ncRows.get()[0] = clusterId; + + // Process core indices/observations results + DAAL_UINT64 resultsToCompute = par->resultsToCompute; + if (resultsToCompute & computeCoreIndices) + { + size_t coreCount = 0; + for (size_t i = 0; i < nRows; i++) + if (isCore[i]) coreCount++; + + ntCoreIndices->resize(coreCount); + if (coreCount > 0) + { + WriteOnlyRows coreIdxRows(ntCoreIndices, 0, coreCount); + DAAL_CHECK_BLOCK_STATUS(coreIdxRows); + int * coreIdx = coreIdxRows.get(); + size_t pos = 0; + for (size_t i = 0; i < nRows; i++) + { + if (isCore[i]) coreIdx[pos++] = static_cast(i); + } + } + } + + if (resultsToCompute & computeCoreObservations) + { + size_t coreCount = 0; + for (size_t i = 0; i < nRows; i++) + if (isCore[i]) coreCount++; + + ntCoreObservations->resize(coreCount); + if (coreCount > 0) + { + WriteOnlyRows coreObsRows(ntCoreObservations, 0, coreCount); + DAAL_CHECK_BLOCK_STATUS(coreObsRows); + algorithmFPType * coreObs = coreObsRows.get(); + size_t pos = 0; + for (size_t i = 0; i < nRows; i++) + { + if (isCore[i]) + { + for (size_t d = 0; d < nCols; d++) coreObs[pos * nCols + d] = data[i * nCols + d]; + pos++; + } + } + } + } + + return services::Status(); +} + +template +services::Status DBSCANBatchKernel::computeMemSave(const NumericTable * ntData, const NumericTable * ntWeights, + NumericTable * ntAssignments, NumericTable * ntNClusters, + NumericTable * ntCoreIndices, NumericTable * ntCoreObservations, + const Parameter * par) +{ + return computeNoMemSave(ntData, ntWeights, ntAssignments, ntNClusters, ntCoreIndices, ntCoreObservations, par); +} + +} // namespace internal +} // namespace dbscan +} // namespace algorithms +} // namespace daal diff --git a/cpp/daal/src/algorithms/dbscan/dbscan_kd_tree_batch_fpt_cpu.cpp b/cpp/daal/src/algorithms/dbscan/dbscan_kd_tree_batch_fpt_cpu.cpp new file mode 100644 index 00000000000..87416cbb37e --- /dev/null +++ b/cpp/daal/src/algorithms/dbscan/dbscan_kd_tree_batch_fpt_cpu.cpp @@ -0,0 +1,37 @@ +/* file: dbscan_kd_tree_batch_fpt_cpu.cpp */ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "src/algorithms/dbscan/dbscan_kd_tree_batch_impl.i" +#include "services/daal_defines.h" + +using namespace daal::internal; + +namespace daal +{ +namespace algorithms +{ +namespace dbscan +{ +namespace internal +{ + +template class DAAL_EXPORT DBSCANBatchKernel; + +} // namespace internal +} // namespace dbscan +} // namespace algorithms +} // namespace daal diff --git a/cpp/daal/src/algorithms/dbscan/dbscan_kd_tree_batch_impl.i b/cpp/daal/src/algorithms/dbscan/dbscan_kd_tree_batch_impl.i new file mode 100644 index 00000000000..5cffb36c4e5 --- /dev/null +++ b/cpp/daal/src/algorithms/dbscan/dbscan_kd_tree_batch_impl.i @@ -0,0 +1,408 @@ +/* file: dbscan_kd_tree_batch_impl.i */ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +/* + * DBSCAN implementation using a k-d tree for accelerated epsilon-neighborhood queries. + * + * The approach: + * 1. Build a k-d tree over the input data (with bounding boxes per node) + * 2. For each point, find all neighbors within epsilon via tree range search + * 3. Mark core points (those with >= min_observations neighbors) + * 4. Expand clusters from core points using BFS with tree-accelerated queries + * + * Advantage over brute_force: O(N * log N) average for neighborhood queries + * vs O(N^2) brute-force pairwise distance computation. + */ + +#include +#include +#include +#include +#include +#include + +#include "algorithms/dbscan/dbscan_types.h" +#include "src/algorithms/dbscan/dbscan_kernel.h" +#include "src/algorithms/dbscan/dbscan_utils.h" +#include "src/algorithms/service_error_handling.h" +#include "src/data_management/service_numeric_table.h" +#include "src/externals/service_memory.h" +#include "src/services/service_arrays.h" +#include "src/services/service_defines.h" +#include "src/threading/threading.h" + +namespace daal +{ +namespace algorithms +{ +namespace dbscan +{ +namespace internal +{ + +using daal::internal::ReadRows; +using daal::internal::WriteOnlyRows; +using daal::services::internal::TArray; + +template +struct DbscanKdNode +{ + int splitDim; + FPType splitVal; + int left; + int right; + int pointBegin; + int pointEnd; +}; + +template +static int buildDbscanKdTree(const FPType * data, int * pointIndices, int begin, int end, int nCols, DbscanKdNode * nodes, int & nextNode, + int maxLeafSize, FPType * bboxLo, FPType * bboxHi) +{ + const int nodeIdx = nextNode++; + DbscanKdNode & node = nodes[nodeIdx]; + node.pointBegin = begin; + node.pointEnd = end; + + FPType bestSpread = FPType(-1); + int bestDim = 0; + for (int d = 0; d < nCols; d++) + { + FPType lo = std::numeric_limits::max(); + FPType hi = std::numeric_limits::lowest(); + for (int i = begin; i < end; i++) + { + const FPType val = data[pointIndices[i] * nCols + d]; + if (val < lo) lo = val; + if (val > hi) hi = val; + } + bboxLo[nodeIdx * nCols + d] = lo; + bboxHi[nodeIdx * nCols + d] = hi; + const FPType spread = hi - lo; + if (spread > bestSpread) + { + bestSpread = spread; + bestDim = d; + } + } + + const int count = end - begin; + if (count <= maxLeafSize) + { + node.splitDim = -1; + node.splitVal = 0; + node.left = -1; + node.right = -1; + return nodeIdx; + } + + node.splitDim = bestDim; + + const int mid = begin + count / 2; + std::nth_element(pointIndices + begin, pointIndices + mid, pointIndices + end, + [&](int a, int b) { return data[a * nCols + bestDim] < data[b * nCols + bestDim]; }); + + node.splitVal = data[pointIndices[mid] * nCols + bestDim]; + + node.left = buildDbscanKdTree(data, pointIndices, begin, mid, nCols, nodes, nextNode, maxLeafSize, bboxLo, bboxHi); + node.right = buildDbscanKdTree(data, pointIndices, mid, end, nCols, nodes, nextNode, maxLeafSize, bboxLo, bboxHi); + + return nodeIdx; +} + +// Epsilon-range query on kd-tree: find all points within epsilon of queryPoint +template +static void rangeQuery(const FPType * data, int nCols, const DbscanKdNode * nodes, const int * pointIndices, const FPType * bboxLo, + const FPType * bboxHi, const FPType * queryPoint, int nodeIdx, FPType epsilon, std::vector & result) +{ + const DbscanKdNode & node = nodes[nodeIdx]; + + // Prune: if bbox lower bound exceeds epsilon, skip + FPType lbSum = FPType(0); + for (int d = 0; d < nCols; d++) + { + FPType excess = FPType(0); + if (queryPoint[d] < bboxLo[nodeIdx * nCols + d]) + excess = bboxLo[nodeIdx * nCols + d] - queryPoint[d]; + else if (queryPoint[d] > bboxHi[nodeIdx * nCols + d]) + excess = queryPoint[d] - bboxHi[nodeIdx * nCols + d]; + lbSum += excess * excess; + } + if (lbSum > epsilon * epsilon) return; + + if (node.splitDim < 0) + { + // Leaf: check each point + for (int i = node.pointBegin; i < node.pointEnd; i++) + { + const int pi = pointIndices[i]; + const FPType * row = data + pi * nCols; + FPType distSq = FPType(0); + for (int d = 0; d < nCols; d++) + { + const FPType diff = queryPoint[d] - row[d]; + distSq += diff * diff; + } + if (distSq <= epsilon * epsilon) + { + result.push_back(pi); + } + } + return; + } + + const FPType queryVal = queryPoint[node.splitDim]; + const FPType diff = queryVal - node.splitVal; + + const int nearChild = (diff <= 0) ? node.left : node.right; + const int farChild = (diff <= 0) ? node.right : node.left; + + rangeQuery(data, nCols, nodes, pointIndices, bboxLo, bboxHi, queryPoint, nearChild, epsilon, result); + + if (diff * diff <= epsilon * epsilon) + { + rangeQuery(data, nCols, nodes, pointIndices, bboxLo, bboxHi, queryPoint, farChild, epsilon, result); + } +} + +// Count neighbors within epsilon (stops early once min_observations reached) +template +static int countNeighbors(const FPType * data, int nCols, const DbscanKdNode * nodes, const int * pointIndices, const FPType * bboxLo, + const FPType * bboxHi, const FPType * queryPoint, int nodeIdx, FPType epsilon, int minObs) +{ + const DbscanKdNode & node = nodes[nodeIdx]; + + FPType lbSum = FPType(0); + for (int d = 0; d < nCols; d++) + { + FPType excess = FPType(0); + if (queryPoint[d] < bboxLo[nodeIdx * nCols + d]) + excess = bboxLo[nodeIdx * nCols + d] - queryPoint[d]; + else if (queryPoint[d] > bboxHi[nodeIdx * nCols + d]) + excess = queryPoint[d] - bboxHi[nodeIdx * nCols + d]; + lbSum += excess * excess; + } + if (lbSum > epsilon * epsilon) return 0; + + if (node.splitDim < 0) + { + int cnt = 0; + for (int i = node.pointBegin; i < node.pointEnd; i++) + { + const int pi = pointIndices[i]; + const FPType * row = data + pi * nCols; + FPType distSq = FPType(0); + for (int d = 0; d < nCols; d++) + { + const FPType diff = queryPoint[d] - row[d]; + distSq += diff * diff; + } + if (distSq <= epsilon * epsilon) cnt++; + } + return cnt; + } + + const FPType queryVal = queryPoint[node.splitDim]; + const FPType diff = queryVal - node.splitVal; + + const int nearChild = (diff <= 0) ? node.left : node.right; + const int farChild = (diff <= 0) ? node.right : node.left; + + int cnt = countNeighbors(data, nCols, nodes, pointIndices, bboxLo, bboxHi, queryPoint, nearChild, epsilon, minObs); + + if (diff * diff <= epsilon * epsilon) + { + cnt += countNeighbors(data, nCols, nodes, pointIndices, bboxLo, bboxHi, queryPoint, farChild, epsilon, minObs); + } + return cnt; +} + +template +services::Status DBSCANBatchKernel::computeNoMemSave(const NumericTable * ntData, const NumericTable * ntWeights, + NumericTable * ntAssignments, NumericTable * ntNClusters, + NumericTable * ntCoreIndices, NumericTable * ntCoreObservations, + const Parameter * par) +{ + const size_t nRows = ntData->getNumberOfRows(); + const size_t nCols = ntData->getNumberOfColumns(); + const algorithmFPType epsilon = static_cast(par->epsilon); + const size_t minObservations = par->minObservations; + const int leafSize = 40; + + ReadRows dataRows(const_cast(ntData), 0, nRows); + DAAL_CHECK_BLOCK_STATUS(dataRows); + const algorithmFPType * data = dataRows.get(); + + // Allocate tree structures + const int maxNodes = 2 * static_cast(nRows) + 1; + TArray, cpu> nodesArr(maxNodes); + TArray pointIndicesArr(nRows); + TArray bboxLoArr(maxNodes * nCols); + TArray bboxHiArr(maxNodes * nCols); + DAAL_CHECK_MALLOC(nodesArr.get() && pointIndicesArr.get() && bboxLoArr.get() && bboxHiArr.get()); + + int * pointIndices = pointIndicesArr.get(); + for (size_t i = 0; i < nRows; i++) pointIndices[i] = static_cast(i); + + int nextNode = 0; + buildDbscanKdTree(data, pointIndices, 0, static_cast(nRows), static_cast(nCols), nodesArr.get(), nextNode, leafSize, bboxLoArr.get(), + bboxHiArr.get()); + + // Phase 1: Identify core points + TArray isCoreArr(nRows); + DAAL_CHECK_MALLOC(isCoreArr.get()); + int * isCore = isCoreArr.get(); + + for (size_t i = 0; i < nRows; i++) + { + const algorithmFPType * queryPoint = data + i * nCols; + const int cnt = countNeighbors(data, static_cast(nCols), nodesArr.get(), pointIndices, bboxLoArr.get(), bboxHiArr.get(), queryPoint, 0, + epsilon, static_cast(minObservations)); + isCore[i] = (cnt >= static_cast(minObservations)) ? 1 : 0; + } + + // Phase 2: Cluster expansion via BFS + WriteOnlyRows assignRows(ntAssignments, 0, nRows); + DAAL_CHECK_BLOCK_STATUS(assignRows); + int * assignments = assignRows.get(); + for (size_t i = 0; i < nRows; i++) assignments[i] = undefined; + + int clusterId = 0; + std::vector queue; + std::vector neighbors; + + for (size_t i = 0; i < nRows; i++) + { + if (assignments[i] != undefined) continue; + if (!isCore[i]) + { + assignments[i] = noise; + continue; + } + + assignments[i] = clusterId; + neighbors.clear(); + rangeQuery(data, static_cast(nCols), nodesArr.get(), pointIndices, bboxLoArr.get(), bboxHiArr.get(), data + i * nCols, 0, epsilon, + neighbors); + + queue.clear(); + for (int nb : neighbors) + { + if (assignments[nb] == noise) + { + assignments[nb] = clusterId; + } + else if (assignments[nb] == undefined) + { + assignments[nb] = clusterId; + if (isCore[nb]) queue.push_back(nb); + } + } + + size_t qIdx = 0; + while (qIdx < queue.size()) + { + const int pt = queue[qIdx++]; + neighbors.clear(); + rangeQuery(data, static_cast(nCols), nodesArr.get(), pointIndices, bboxLoArr.get(), bboxHiArr.get(), data + pt * nCols, 0, epsilon, + neighbors); + + for (int nb : neighbors) + { + if (assignments[nb] == noise) + { + assignments[nb] = clusterId; + } + else if (assignments[nb] == undefined) + { + assignments[nb] = clusterId; + if (isCore[nb]) queue.push_back(nb); + } + } + } + + clusterId++; + } + + // Write cluster count + WriteOnlyRows ncRows(ntNClusters, 0, 1); + DAAL_CHECK_BLOCK_STATUS(ncRows); + ncRows.get()[0] = clusterId; + + // Process core indices/observations results + DAAL_UINT64 resultsToCompute = par->resultsToCompute; + if (resultsToCompute & computeCoreIndices) + { + size_t coreCount = 0; + for (size_t i = 0; i < nRows; i++) + if (isCore[i]) coreCount++; + + ntCoreIndices->resize(coreCount); + if (coreCount > 0) + { + WriteOnlyRows coreIdxRows(ntCoreIndices, 0, coreCount); + DAAL_CHECK_BLOCK_STATUS(coreIdxRows); + int * coreIdx = coreIdxRows.get(); + size_t pos = 0; + for (size_t i = 0; i < nRows; i++) + { + if (isCore[i]) coreIdx[pos++] = static_cast(i); + } + } + } + + if (resultsToCompute & computeCoreObservations) + { + size_t coreCount = 0; + for (size_t i = 0; i < nRows; i++) + if (isCore[i]) coreCount++; + + ntCoreObservations->resize(coreCount); + if (coreCount > 0) + { + WriteOnlyRows coreObsRows(ntCoreObservations, 0, coreCount); + DAAL_CHECK_BLOCK_STATUS(coreObsRows); + algorithmFPType * coreObs = coreObsRows.get(); + size_t pos = 0; + for (size_t i = 0; i < nRows; i++) + { + if (isCore[i]) + { + for (size_t d = 0; d < nCols; d++) coreObs[pos * nCols + d] = data[i * nCols + d]; + pos++; + } + } + } + } + + return services::Status(); +} + +template +services::Status DBSCANBatchKernel::computeMemSave(const NumericTable * ntData, const NumericTable * ntWeights, + NumericTable * ntAssignments, NumericTable * ntNClusters, + NumericTable * ntCoreIndices, NumericTable * ntCoreObservations, + const Parameter * par) +{ + // Memory-save mode uses the same tree approach (already more memory-efficient than brute-force) + return computeNoMemSave(ntData, ntWeights, ntAssignments, ntNClusters, ntCoreIndices, ntCoreObservations, par); +} + +} // namespace internal +} // namespace dbscan +} // namespace algorithms +} // namespace daal diff --git a/cpp/daal/src/algorithms/dbscan/dbscan_kernel.h b/cpp/daal/src/algorithms/dbscan/dbscan_kernel.h index cceaf7e0e93..43fc0b2af06 100644 --- a/cpp/daal/src/algorithms/dbscan/dbscan_kernel.h +++ b/cpp/daal/src/algorithms/dbscan/dbscan_kernel.h @@ -63,6 +63,32 @@ class DBSCANBatchKernel : public Kernel NumericTable * ntCoreIndices, NumericTable * ntCoreObservations); }; +template +class DBSCANBatchKernel : public Kernel +{ +public: + services::Status computeNoMemSave(const NumericTable * ntData, const NumericTable * ntWeights, NumericTable * ntAssignments, + NumericTable * ntNClusters, NumericTable * ntCoreIndices, NumericTable * ntCoreObservations, + const Parameter * par); + + services::Status computeMemSave(const NumericTable * ntData, const NumericTable * ntWeights, NumericTable * ntAssignments, + NumericTable * ntNClusters, NumericTable * ntCoreIndices, NumericTable * ntCoreObservations, + const Parameter * par); +}; + +template +class DBSCANBatchKernel : public Kernel +{ +public: + services::Status computeNoMemSave(const NumericTable * ntData, const NumericTable * ntWeights, NumericTable * ntAssignments, + NumericTable * ntNClusters, NumericTable * ntCoreIndices, NumericTable * ntCoreObservations, + const Parameter * par); + + services::Status computeMemSave(const NumericTable * ntData, const NumericTable * ntWeights, NumericTable * ntAssignments, + NumericTable * ntNClusters, NumericTable * ntCoreIndices, NumericTable * ntCoreObservations, + const Parameter * par); +}; + template class DBSCANDistrStep1Kernel : public Kernel { diff --git a/cpp/oneapi/dal/algo/dbscan/backend/cpu/compute_kernel_ball_tree.cpp b/cpp/oneapi/dal/algo/dbscan/backend/cpu/compute_kernel_ball_tree.cpp new file mode 100644 index 00000000000..9a3378eabb3 --- /dev/null +++ b/cpp/oneapi/dal/algo/dbscan/backend/cpu/compute_kernel_ball_tree.cpp @@ -0,0 +1,143 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "oneapi/dal/backend/interop/common.hpp" +#include "oneapi/dal/backend/interop/table_conversion.hpp" +#include "oneapi/dal/algo/dbscan/backend/cpu/compute_kernel.hpp" +#include "oneapi/dal/algo/dbscan/backend/fill_core_flags.hpp" + +#include + +namespace oneapi::dal::dbscan::backend { + +using dal::backend::context_cpu; + +using descriptor_t = detail::descriptor_base; +using result_t = compute_result; +using input_t = compute_input; + +namespace daal_dbscan = daal::algorithms::dbscan; +namespace interop = dal::backend::interop; + +template +using daal_dbscan_ball_tree_t = + daal_dbscan::internal::DBSCANBatchKernel; + +template +class dbscan_ball_tree_compute_wrapper { +public: + template + auto compute(Args&&... args) { + const daal_dbscan::Parameter* par = + std::get(std::forward_as_tuple(args...)); + if (par->memorySavingMode == false) { + return daal_dbscan::internal::DBSCANBatchKernel{} + .computeNoMemSave(std::forward(args)...); + } + return daal_dbscan::internal::DBSCANBatchKernel{} + .computeMemSave(std::forward(args)...); + } +}; + +template +static result_t call_daal_kernel(const context_cpu& ctx, + const descriptor_t& desc, + const table& data, + const table& weights) { + const std::int64_t row_count = data.get_row_count(); + const std::int64_t column_count = data.get_column_count(); + + const double epsilon = desc.get_epsilon(); + const std::int64_t min_observations = desc.get_min_observations(); + + daal_dbscan::Parameter par(epsilon, dal::detail::integral_cast(min_observations)); + par.memorySavingMode = desc.get_mem_save_mode(); + if (desc.get_result_options().test(result_options::core_observation_indices)) { + par.resultsToCompute = daal_dbscan::computeCoreIndices; + } + else if (desc.get_result_options().test(result_options::core_observations)) { + par.resultsToCompute = daal_dbscan::computeCoreObservations; + } + if (desc.get_result_options().test(result_options::core_observations)) { + par.resultsToCompute |= daal_dbscan::computeCoreObservations; + } + + const auto daal_data = interop::convert_to_daal_table(data); + const auto daal_weights = interop::convert_to_daal_table(weights); + + array arr_responses = array::empty(row_count * 1); + array arr_cluster_count = array::empty(1); + + const auto daal_responses = interop::convert_to_daal_homogen_table(arr_responses, row_count, 1); + const auto daal_core_observation_indices = interop::empty_daal_homogen_table(1); + const auto daal_core_observations = interop::empty_daal_homogen_table(column_count); + const auto daal_cluster_count = interop::convert_to_daal_homogen_table(arr_cluster_count, 1, 1); + + interop::status_to_exception(interop::call_daal_kernel( + ctx, + daal_data.get(), + daal_weights.get(), + daal_responses.get(), + daal_cluster_count.get(), + daal_core_observation_indices.get(), + daal_core_observations.get(), + &par)); + + auto core_observation_indices = + interop::convert_from_daal_homogen_table(daal_core_observation_indices); + auto core_observations = + interop::convert_from_daal_homogen_table(daal_core_observations); + auto results = result_t() + .set_cluster_count(arr_cluster_count[0]) + .set_result_options(desc.get_result_options()); + + if (desc.get_result_options().test(result_options::responses)) { + results.set_responses(dal::homogen_table::wrap(arr_responses, row_count, 1)); + } + if (desc.get_result_options().test(result_options::core_flags)) { + auto arr_core_flags = fill_core_flags(core_observation_indices, row_count); + results.set_core_flags(dal::homogen_table::wrap(arr_core_flags, row_count, 1)); + } + + if (desc.get_result_options().test(result_options::core_observation_indices)) { + results.set_core_observation_indices(core_observation_indices); + } + + if (desc.get_result_options().test(result_options::core_observations)) { + results.set_core_observations(core_observations); + } + + return results; +} + +template +static result_t compute(const context_cpu& ctx, const descriptor_t& desc, const input_t& input) { + return call_daal_kernel(ctx, desc, input.get_data(), input.get_weights()); +} + +template +struct compute_kernel_cpu { + result_t operator()(const context_cpu& ctx, + const descriptor_t& desc, + const input_t& input) const { + return compute(ctx, desc, input); + } +}; + +template struct compute_kernel_cpu; +template struct compute_kernel_cpu; + +} // namespace oneapi::dal::dbscan::backend diff --git a/cpp/oneapi/dal/algo/dbscan/backend/cpu/compute_kernel_kd_tree.cpp b/cpp/oneapi/dal/algo/dbscan/backend/cpu/compute_kernel_kd_tree.cpp new file mode 100644 index 00000000000..e9d11114542 --- /dev/null +++ b/cpp/oneapi/dal/algo/dbscan/backend/cpu/compute_kernel_kd_tree.cpp @@ -0,0 +1,143 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "oneapi/dal/backend/interop/common.hpp" +#include "oneapi/dal/backend/interop/table_conversion.hpp" +#include "oneapi/dal/algo/dbscan/backend/cpu/compute_kernel.hpp" +#include "oneapi/dal/algo/dbscan/backend/fill_core_flags.hpp" + +#include + +namespace oneapi::dal::dbscan::backend { + +using dal::backend::context_cpu; + +using descriptor_t = detail::descriptor_base; +using result_t = compute_result; +using input_t = compute_input; + +namespace daal_dbscan = daal::algorithms::dbscan; +namespace interop = dal::backend::interop; + +template +using daal_dbscan_kd_tree_t = + daal_dbscan::internal::DBSCANBatchKernel; + +template +class dbscan_kd_tree_compute_wrapper { +public: + template + auto compute(Args&&... args) { + const daal_dbscan::Parameter* par = + std::get(std::forward_as_tuple(args...)); + if (par->memorySavingMode == false) { + return daal_dbscan::internal::DBSCANBatchKernel{} + .computeNoMemSave(std::forward(args)...); + } + return daal_dbscan::internal::DBSCANBatchKernel{} + .computeMemSave(std::forward(args)...); + } +}; + +template +static result_t call_daal_kernel(const context_cpu& ctx, + const descriptor_t& desc, + const table& data, + const table& weights) { + const std::int64_t row_count = data.get_row_count(); + const std::int64_t column_count = data.get_column_count(); + + const double epsilon = desc.get_epsilon(); + const std::int64_t min_observations = desc.get_min_observations(); + + daal_dbscan::Parameter par(epsilon, dal::detail::integral_cast(min_observations)); + par.memorySavingMode = desc.get_mem_save_mode(); + if (desc.get_result_options().test(result_options::core_observation_indices)) { + par.resultsToCompute = daal_dbscan::computeCoreIndices; + } + else if (desc.get_result_options().test(result_options::core_observations)) { + par.resultsToCompute = daal_dbscan::computeCoreObservations; + } + if (desc.get_result_options().test(result_options::core_observations)) { + par.resultsToCompute |= daal_dbscan::computeCoreObservations; + } + + const auto daal_data = interop::convert_to_daal_table(data); + const auto daal_weights = interop::convert_to_daal_table(weights); + + array arr_responses = array::empty(row_count * 1); + array arr_cluster_count = array::empty(1); + + const auto daal_responses = interop::convert_to_daal_homogen_table(arr_responses, row_count, 1); + const auto daal_core_observation_indices = interop::empty_daal_homogen_table(1); + const auto daal_core_observations = interop::empty_daal_homogen_table(column_count); + const auto daal_cluster_count = interop::convert_to_daal_homogen_table(arr_cluster_count, 1, 1); + + interop::status_to_exception(interop::call_daal_kernel( + ctx, + daal_data.get(), + daal_weights.get(), + daal_responses.get(), + daal_cluster_count.get(), + daal_core_observation_indices.get(), + daal_core_observations.get(), + &par)); + + auto core_observation_indices = + interop::convert_from_daal_homogen_table(daal_core_observation_indices); + auto core_observations = + interop::convert_from_daal_homogen_table(daal_core_observations); + auto results = result_t() + .set_cluster_count(arr_cluster_count[0]) + .set_result_options(desc.get_result_options()); + + if (desc.get_result_options().test(result_options::responses)) { + results.set_responses(dal::homogen_table::wrap(arr_responses, row_count, 1)); + } + if (desc.get_result_options().test(result_options::core_flags)) { + auto arr_core_flags = fill_core_flags(core_observation_indices, row_count); + results.set_core_flags(dal::homogen_table::wrap(arr_core_flags, row_count, 1)); + } + + if (desc.get_result_options().test(result_options::core_observation_indices)) { + results.set_core_observation_indices(core_observation_indices); + } + + if (desc.get_result_options().test(result_options::core_observations)) { + results.set_core_observations(core_observations); + } + + return results; +} + +template +static result_t compute(const context_cpu& ctx, const descriptor_t& desc, const input_t& input) { + return call_daal_kernel(ctx, desc, input.get_data(), input.get_weights()); +} + +template +struct compute_kernel_cpu { + result_t operator()(const context_cpu& ctx, + const descriptor_t& desc, + const input_t& input) const { + return compute(ctx, desc, input); + } +}; + +template struct compute_kernel_cpu; +template struct compute_kernel_cpu; + +} // namespace oneapi::dal::dbscan::backend diff --git a/cpp/oneapi/dal/algo/dbscan/backend/gpu/compute_kernel_ball_tree_dpc.cpp b/cpp/oneapi/dal/algo/dbscan/backend/gpu/compute_kernel_ball_tree_dpc.cpp new file mode 100644 index 00000000000..61458e51b06 --- /dev/null +++ b/cpp/oneapi/dal/algo/dbscan/backend/gpu/compute_kernel_ball_tree_dpc.cpp @@ -0,0 +1,341 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "oneapi/dal/algo/dbscan/backend/gpu/compute_kernel.hpp" +#include "oneapi/dal/algo/dbscan/backend/gpu/results.hpp" + +#include "oneapi/dal/detail/profiler.hpp" + +#include "oneapi/dal/backend/common.hpp" +#include "oneapi/dal/backend/primitives/ndarray.hpp" +#include "oneapi/dal/backend/primitives/utils.hpp" +#include "oneapi/dal/backend/communicator.hpp" + +namespace oneapi::dal::dbscan::backend { + +namespace bk = oneapi::dal::backend; +namespace pr = oneapi::dal::backend::primitives; +namespace spmd = oneapi::dal::preview::spmd; +namespace de = oneapi::dal::detail; + +using dal::backend::context_gpu; + +using descriptor_t = detail::descriptor_base; +using result_t = compute_result; +using input_t = compute_input; + +template +static result_t compute_kernel_ball_tree_impl(const context_gpu& ctx, + const descriptor_t& desc, + const table& local_data, + const table& local_weights) { + auto& comm = ctx.get_communicator(); + auto& queue = ctx.get_queue(); + + std::int64_t rank_count = comm.get_rank_count(); + + auto current_rank = comm.get_rank(); + + auto prev_node = (current_rank - 1 + rank_count) % rank_count; + auto next_node = (current_rank + 1) % rank_count; + + const std::int64_t local_row_count = local_data.get_row_count(); + const std::int64_t column_count = local_data.get_column_count(); + + std::int64_t global_row_count = local_row_count; + + std::int64_t max_local_block_size = local_row_count; + { + ONEDAL_PROFILER_TASK(allreduce_rows_count_global, queue); + comm.allreduce(max_local_block_size, spmd::reduce_op::max).wait(); + } + + auto send_recv_replace_local_size = array::zeros(1); + send_recv_replace_local_size.get_mutable_data()[0] = local_row_count; + + auto global_rank_offsets = array::zeros(rank_count); + global_rank_offsets.get_mutable_data()[current_rank] = local_row_count; + { + ONEDAL_PROFILER_TASK(allreduce_recv_counts, queue); + comm.allreduce(global_rank_offsets, spmd::reduce_op::sum).wait(); + } + { + ONEDAL_PROFILER_TASK(allreduce_rows_count_global, queue); + comm.allreduce(global_row_count, spmd::reduce_op::sum).wait(); + } + + std::int64_t local_offset = 0; + + for (std::int64_t i = 0; i < current_rank; i++) { + ONEDAL_ASSERT(global_rank_offsets.get_data()[i] >= 0); + local_offset += global_rank_offsets.get_data()[i]; + } + + const auto data_nd = pr::table2ndarray(queue, local_data, sycl::usm::alloc::device); + + pr::ndarray data_nd_replace; + if (rank_count > 1) { + auto data_copy_count = column_count * local_row_count; + data_nd_replace = pr::ndarray::empty(queue, + { max_local_block_size, column_count }, + sycl::usm::alloc::device); + bk::copy(queue, data_nd_replace.get_mutable_data(), data_nd.get_data(), data_copy_count, {}) + .wait_and_throw(); + } + + bool use_weights = false; + if (local_weights.get_row_count() == data_nd.get_dimension(0)) { + use_weights = true; + } + + pr::ndarray weights_nd; + if (use_weights) { + weights_nd = pr::table2ndarray(queue, local_weights, sycl::usm::alloc::device); + } + + pr::ndarray weights_nd_replace; + if (rank_count > 1 && use_weights) { + auto weights_copy_count = 1 * local_row_count; + weights_nd_replace = pr::ndarray::empty(queue, + { max_local_block_size, 1 }, + sycl::usm::alloc::device); + bk::copy(queue, + weights_nd_replace.get_mutable_data(), + weights_nd.get_data(), + weights_copy_count, + {}) + .wait_and_throw(); + } + const Float epsilon = desc.get_epsilon() * desc.get_epsilon(); + const std::int64_t min_observations = desc.get_min_observations(); + + auto [arr_cores, cores_event] = + pr::ndarray::full(queue, local_row_count, 0, sycl::usm::alloc::device); + + auto [arr_neighbours, neighbours_event] = + pr::ndarray::full(queue, local_row_count, 0, sycl::usm::alloc::device); + + auto [arr_responses, responses_event] = + pr::ndarray::full(queue, local_row_count, -1, sycl::usm::alloc::device); + + auto [observation_indices, observation_indices_event] = + pr::ndarray::full(queue, local_row_count, false, sycl::usm::alloc::device); + + auto [total_points_queue_size_arr, total_points_queue_size_event] = + pr::ndarray::full(queue, 1, 0, sycl::usm::alloc::device); + + auto [local_points_queue_size_arr, local_points_queue_size_event] = + pr::ndarray::full(queue, 1, 0, sycl::usm::alloc::device); + sycl::event::wait({ cores_event, + neighbours_event, + responses_event, + observation_indices_event, + total_points_queue_size_event, + local_points_queue_size_event }); + + auto get_cores_event = kernels_fp::get_cores(queue, + data_nd, + weights_nd, + arr_cores, + arr_neighbours, + epsilon, + min_observations); + + for (std::int64_t j = 0; j < rank_count - 1; j++) { + comm.sendrecv_replace(queue, + data_nd_replace.get_mutable_data(), + max_local_block_size * column_count, + prev_node, + next_node) + .wait(); + if (use_weights) { + comm.sendrecv_replace(queue, + weights_nd_replace.get_mutable_data(), + max_local_block_size, + prev_node, + next_node) + .wait(); + } + comm.sendrecv_replace(send_recv_replace_local_size.get_mutable_data(), + 1, + prev_node, + next_node) + .wait(); + auto local_row_block_count = send_recv_replace_local_size.get_data()[0]; + auto actual_current_block = data_nd_replace.get_row_slice(0, local_row_block_count); + if (use_weights) { + auto actual_weights = weights_nd_replace.get_row_slice(0, local_row_block_count); + kernels_fp::get_cores_send_recv_replace(queue, + data_nd, + actual_current_block, + actual_weights, + arr_cores, + arr_neighbours, + epsilon, + min_observations, + { get_cores_event }) + .wait_and_throw(); + } + else { + kernels_fp::get_cores_send_recv_replace(queue, + data_nd, + actual_current_block, + weights_nd, + arr_cores, + arr_neighbours, + epsilon, + min_observations, + { get_cores_event }) + .wait_and_throw(); + } + } + + std::int64_t cluster_count = 0; + + std::int32_t cluster_index = + kernels_fp::start_next_cluster(queue, arr_cores, arr_responses, { get_cores_event }); + cluster_index = + cluster_index < local_row_count ? cluster_index + local_offset : global_row_count; + { + ONEDAL_PROFILER_TASK(allreduce_cluster_index, queue); + comm.allreduce(cluster_index, spmd::reduce_op::min).wait(); + } + + if (cluster_index < 0) { + return make_results(queue, desc, data_nd, arr_responses, arr_cores, 0, 0); + } + + while (cluster_index < de::integral_cast(global_row_count)) { + cluster_count++; + bool in_range = + cluster_index >= local_offset && cluster_index < local_offset + local_row_count; + + std::int32_t local_points_queue_size = 0; + + if (in_range) { + set_arr_value(queue, arr_responses, cluster_index - local_offset, cluster_count - 1) + .wait_and_throw(); + set_init_index(queue, observation_indices, cluster_index - local_offset, true) + .wait_and_throw(); + local_points_queue_size++; + } + + std::int32_t total_points_queue_size = local_points_queue_size; + + { + ONEDAL_PROFILER_TASK(allreduce_total_points_queue_size_outer, queue); + comm.allreduce(total_points_queue_size, spmd::reduce_op::sum).wait(); + } + + while (total_points_queue_size != 0) { + auto recv_counts = array::zeros(rank_count); + recv_counts.get_mutable_data()[current_rank] = local_points_queue_size; + { + ONEDAL_PROFILER_TASK(allreduce_recv_counts, queue); + comm.allreduce(recv_counts, spmd::reduce_op::sum).wait(); + } + + auto displs = array::zeros(rank_count); + auto displs_ptr = displs.get_mutable_data(); + std::int64_t total_count = 0; + for (std::int64_t i = 0; i < rank_count; i++) { + displs_ptr[i] = total_count; + total_count += recv_counts.get_data()[i]; + } + + auto [current_points_queue, current_points_queue_event] = + pr::ndarray::full(queue, + { total_points_queue_size, column_count }, + 0, + sycl::usm::alloc::device); + + set_arr_value(queue, total_points_queue_size_arr, 0, total_points_queue_size) + .wait_and_throw(); + + sycl::event fill_queue_event; + + if (local_points_queue_size != 0) { + fill_queue_event = + kernels_fp::fill_current_points_queue(queue, + data_nd, + observation_indices, + current_points_queue, + local_points_queue_size_arr, + displs_ptr[current_rank], + { current_points_queue_event }); + set_arr_value(queue, local_points_queue_size_arr, 0, 0, { fill_queue_event }) + .wait_and_throw(); + } + { + ONEDAL_PROFILER_TASK(allreduce_xtx, queue); + comm.allreduce(current_points_queue.flatten(queue, { fill_queue_event }), + spmd::reduce_op::sum) + .wait(); + } + + kernels_fp::update_points_queue(queue, + data_nd, + arr_cores, + current_points_queue, + arr_responses, + total_points_queue_size_arr, + observation_indices, + epsilon, + cluster_count - 1, + { fill_queue_event }) + .wait_and_throw(); + + local_points_queue_size = + kernels_fp::get_points_queue_size(queue, total_points_queue_size_arr); + + total_points_queue_size = local_points_queue_size; + + { + ONEDAL_PROFILER_TASK(allreduce_total_points_queue_size_inner, queue); + comm.allreduce(total_points_queue_size, spmd::reduce_op::sum).wait(); + } + } + + cluster_index = kernels_fp::start_next_cluster(queue, arr_cores, arr_responses); + cluster_index = + cluster_index < local_row_count ? cluster_index + local_offset : global_row_count; + { + ONEDAL_PROFILER_TASK(cluster_index, queue); + comm.allreduce(cluster_index, spmd::reduce_op::min).wait(); + } + } + + return make_results(queue, desc, data_nd, arr_responses, arr_cores, cluster_count); +} + +template +static result_t compute(const context_gpu& ctx, const descriptor_t& desc, const input_t& input) { + return compute_kernel_ball_tree_impl(ctx, desc, input.get_data(), input.get_weights()); +} + +template +struct compute_kernel_gpu { + result_t operator()(const context_gpu& ctx, + const descriptor_t& desc, + const input_t& input) const { + return compute(ctx, desc, input); + } +}; + +template struct compute_kernel_gpu; +template struct compute_kernel_gpu; + +} // namespace oneapi::dal::dbscan::backend diff --git a/cpp/oneapi/dal/algo/dbscan/backend/gpu/compute_kernel_kd_tree_dpc.cpp b/cpp/oneapi/dal/algo/dbscan/backend/gpu/compute_kernel_kd_tree_dpc.cpp new file mode 100644 index 00000000000..24b836c8f3f --- /dev/null +++ b/cpp/oneapi/dal/algo/dbscan/backend/gpu/compute_kernel_kd_tree_dpc.cpp @@ -0,0 +1,341 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "oneapi/dal/algo/dbscan/backend/gpu/compute_kernel.hpp" +#include "oneapi/dal/algo/dbscan/backend/gpu/results.hpp" + +#include "oneapi/dal/detail/profiler.hpp" + +#include "oneapi/dal/backend/common.hpp" +#include "oneapi/dal/backend/primitives/ndarray.hpp" +#include "oneapi/dal/backend/primitives/utils.hpp" +#include "oneapi/dal/backend/communicator.hpp" + +namespace oneapi::dal::dbscan::backend { + +namespace bk = oneapi::dal::backend; +namespace pr = oneapi::dal::backend::primitives; +namespace spmd = oneapi::dal::preview::spmd; +namespace de = oneapi::dal::detail; + +using dal::backend::context_gpu; + +using descriptor_t = detail::descriptor_base; +using result_t = compute_result; +using input_t = compute_input; + +template +static result_t compute_kernel_kd_tree_impl(const context_gpu& ctx, + const descriptor_t& desc, + const table& local_data, + const table& local_weights) { + auto& comm = ctx.get_communicator(); + auto& queue = ctx.get_queue(); + + std::int64_t rank_count = comm.get_rank_count(); + + auto current_rank = comm.get_rank(); + + auto prev_node = (current_rank - 1 + rank_count) % rank_count; + auto next_node = (current_rank + 1) % rank_count; + + const std::int64_t local_row_count = local_data.get_row_count(); + const std::int64_t column_count = local_data.get_column_count(); + + std::int64_t global_row_count = local_row_count; + + std::int64_t max_local_block_size = local_row_count; + { + ONEDAL_PROFILER_TASK(allreduce_rows_count_global, queue); + comm.allreduce(max_local_block_size, spmd::reduce_op::max).wait(); + } + + auto send_recv_replace_local_size = array::zeros(1); + send_recv_replace_local_size.get_mutable_data()[0] = local_row_count; + + auto global_rank_offsets = array::zeros(rank_count); + global_rank_offsets.get_mutable_data()[current_rank] = local_row_count; + { + ONEDAL_PROFILER_TASK(allreduce_recv_counts, queue); + comm.allreduce(global_rank_offsets, spmd::reduce_op::sum).wait(); + } + { + ONEDAL_PROFILER_TASK(allreduce_rows_count_global, queue); + comm.allreduce(global_row_count, spmd::reduce_op::sum).wait(); + } + + std::int64_t local_offset = 0; + + for (std::int64_t i = 0; i < current_rank; i++) { + ONEDAL_ASSERT(global_rank_offsets.get_data()[i] >= 0); + local_offset += global_rank_offsets.get_data()[i]; + } + + const auto data_nd = pr::table2ndarray(queue, local_data, sycl::usm::alloc::device); + + pr::ndarray data_nd_replace; + if (rank_count > 1) { + auto data_copy_count = column_count * local_row_count; + data_nd_replace = pr::ndarray::empty(queue, + { max_local_block_size, column_count }, + sycl::usm::alloc::device); + bk::copy(queue, data_nd_replace.get_mutable_data(), data_nd.get_data(), data_copy_count, {}) + .wait_and_throw(); + } + + bool use_weights = false; + if (local_weights.get_row_count() == data_nd.get_dimension(0)) { + use_weights = true; + } + + pr::ndarray weights_nd; + if (use_weights) { + weights_nd = pr::table2ndarray(queue, local_weights, sycl::usm::alloc::device); + } + + pr::ndarray weights_nd_replace; + if (rank_count > 1 && use_weights) { + auto weights_copy_count = 1 * local_row_count; + weights_nd_replace = pr::ndarray::empty(queue, + { max_local_block_size, 1 }, + sycl::usm::alloc::device); + bk::copy(queue, + weights_nd_replace.get_mutable_data(), + weights_nd.get_data(), + weights_copy_count, + {}) + .wait_and_throw(); + } + const Float epsilon = desc.get_epsilon() * desc.get_epsilon(); + const std::int64_t min_observations = desc.get_min_observations(); + + auto [arr_cores, cores_event] = + pr::ndarray::full(queue, local_row_count, 0, sycl::usm::alloc::device); + + auto [arr_neighbours, neighbours_event] = + pr::ndarray::full(queue, local_row_count, 0, sycl::usm::alloc::device); + + auto [arr_responses, responses_event] = + pr::ndarray::full(queue, local_row_count, -1, sycl::usm::alloc::device); + + auto [observation_indices, observation_indices_event] = + pr::ndarray::full(queue, local_row_count, false, sycl::usm::alloc::device); + + auto [total_points_queue_size_arr, total_points_queue_size_event] = + pr::ndarray::full(queue, 1, 0, sycl::usm::alloc::device); + + auto [local_points_queue_size_arr, local_points_queue_size_event] = + pr::ndarray::full(queue, 1, 0, sycl::usm::alloc::device); + sycl::event::wait({ cores_event, + neighbours_event, + responses_event, + observation_indices_event, + total_points_queue_size_event, + local_points_queue_size_event }); + + auto get_cores_event = kernels_fp::get_cores(queue, + data_nd, + weights_nd, + arr_cores, + arr_neighbours, + epsilon, + min_observations); + + for (std::int64_t j = 0; j < rank_count - 1; j++) { + comm.sendrecv_replace(queue, + data_nd_replace.get_mutable_data(), + max_local_block_size * column_count, + prev_node, + next_node) + .wait(); + if (use_weights) { + comm.sendrecv_replace(queue, + weights_nd_replace.get_mutable_data(), + max_local_block_size, + prev_node, + next_node) + .wait(); + } + comm.sendrecv_replace(send_recv_replace_local_size.get_mutable_data(), + 1, + prev_node, + next_node) + .wait(); + auto local_row_block_count = send_recv_replace_local_size.get_data()[0]; + auto actual_current_block = data_nd_replace.get_row_slice(0, local_row_block_count); + if (use_weights) { + auto actual_weights = weights_nd_replace.get_row_slice(0, local_row_block_count); + kernels_fp::get_cores_send_recv_replace(queue, + data_nd, + actual_current_block, + actual_weights, + arr_cores, + arr_neighbours, + epsilon, + min_observations, + { get_cores_event }) + .wait_and_throw(); + } + else { + kernels_fp::get_cores_send_recv_replace(queue, + data_nd, + actual_current_block, + weights_nd, + arr_cores, + arr_neighbours, + epsilon, + min_observations, + { get_cores_event }) + .wait_and_throw(); + } + } + + std::int64_t cluster_count = 0; + + std::int32_t cluster_index = + kernels_fp::start_next_cluster(queue, arr_cores, arr_responses, { get_cores_event }); + cluster_index = + cluster_index < local_row_count ? cluster_index + local_offset : global_row_count; + { + ONEDAL_PROFILER_TASK(allreduce_cluster_index, queue); + comm.allreduce(cluster_index, spmd::reduce_op::min).wait(); + } + + if (cluster_index < 0) { + return make_results(queue, desc, data_nd, arr_responses, arr_cores, 0, 0); + } + + while (cluster_index < de::integral_cast(global_row_count)) { + cluster_count++; + bool in_range = + cluster_index >= local_offset && cluster_index < local_offset + local_row_count; + + std::int32_t local_points_queue_size = 0; + + if (in_range) { + set_arr_value(queue, arr_responses, cluster_index - local_offset, cluster_count - 1) + .wait_and_throw(); + set_init_index(queue, observation_indices, cluster_index - local_offset, true) + .wait_and_throw(); + local_points_queue_size++; + } + + std::int32_t total_points_queue_size = local_points_queue_size; + + { + ONEDAL_PROFILER_TASK(allreduce_total_points_queue_size_outer, queue); + comm.allreduce(total_points_queue_size, spmd::reduce_op::sum).wait(); + } + + while (total_points_queue_size != 0) { + auto recv_counts = array::zeros(rank_count); + recv_counts.get_mutable_data()[current_rank] = local_points_queue_size; + { + ONEDAL_PROFILER_TASK(allreduce_recv_counts, queue); + comm.allreduce(recv_counts, spmd::reduce_op::sum).wait(); + } + + auto displs = array::zeros(rank_count); + auto displs_ptr = displs.get_mutable_data(); + std::int64_t total_count = 0; + for (std::int64_t i = 0; i < rank_count; i++) { + displs_ptr[i] = total_count; + total_count += recv_counts.get_data()[i]; + } + + auto [current_points_queue, current_points_queue_event] = + pr::ndarray::full(queue, + { total_points_queue_size, column_count }, + 0, + sycl::usm::alloc::device); + + set_arr_value(queue, total_points_queue_size_arr, 0, total_points_queue_size) + .wait_and_throw(); + + sycl::event fill_queue_event; + + if (local_points_queue_size != 0) { + fill_queue_event = + kernels_fp::fill_current_points_queue(queue, + data_nd, + observation_indices, + current_points_queue, + local_points_queue_size_arr, + displs_ptr[current_rank], + { current_points_queue_event }); + set_arr_value(queue, local_points_queue_size_arr, 0, 0, { fill_queue_event }) + .wait_and_throw(); + } + { + ONEDAL_PROFILER_TASK(allreduce_xtx, queue); + comm.allreduce(current_points_queue.flatten(queue, { fill_queue_event }), + spmd::reduce_op::sum) + .wait(); + } + + kernels_fp::update_points_queue(queue, + data_nd, + arr_cores, + current_points_queue, + arr_responses, + total_points_queue_size_arr, + observation_indices, + epsilon, + cluster_count - 1, + { fill_queue_event }) + .wait_and_throw(); + + local_points_queue_size = + kernels_fp::get_points_queue_size(queue, total_points_queue_size_arr); + + total_points_queue_size = local_points_queue_size; + + { + ONEDAL_PROFILER_TASK(allreduce_total_points_queue_size_inner, queue); + comm.allreduce(total_points_queue_size, spmd::reduce_op::sum).wait(); + } + } + + cluster_index = kernels_fp::start_next_cluster(queue, arr_cores, arr_responses); + cluster_index = + cluster_index < local_row_count ? cluster_index + local_offset : global_row_count; + { + ONEDAL_PROFILER_TASK(cluster_index, queue); + comm.allreduce(cluster_index, spmd::reduce_op::min).wait(); + } + } + + return make_results(queue, desc, data_nd, arr_responses, arr_cores, cluster_count); +} + +template +static result_t compute(const context_gpu& ctx, const descriptor_t& desc, const input_t& input) { + return compute_kernel_kd_tree_impl(ctx, desc, input.get_data(), input.get_weights()); +} + +template +struct compute_kernel_gpu { + result_t operator()(const context_gpu& ctx, + const descriptor_t& desc, + const input_t& input) const { + return compute(ctx, desc, input); + } +}; + +template struct compute_kernel_gpu; +template struct compute_kernel_gpu; + +} // namespace oneapi::dal::dbscan::backend diff --git a/cpp/oneapi/dal/algo/dbscan/common.hpp b/cpp/oneapi/dal/algo/dbscan/common.hpp index 0c4dffc6687..743137f6e33 100644 --- a/cpp/oneapi/dal/algo/dbscan/common.hpp +++ b/cpp/oneapi/dal/algo/dbscan/common.hpp @@ -40,11 +40,15 @@ using v1::by_default; namespace method { namespace v1 { struct brute_force {}; +struct kd_tree {}; +struct ball_tree {}; using by_default = brute_force; } // namespace v1 using v1::brute_force; +using v1::kd_tree; +using v1::ball_tree; using v1::by_default; } // namespace method @@ -88,7 +92,8 @@ template constexpr bool is_valid_float_v = dal::detail::is_one_of_v; template -constexpr bool is_valid_method_v = dal::detail::is_one_of_v; +constexpr bool is_valid_method_v = + dal::detail::is_one_of_v; template constexpr bool is_valid_task_v = dal::detail::is_one_of_v; @@ -137,7 +142,8 @@ namespace v1 { /// intermediate computations. Can be :expr:`float` or /// :expr:`double`. /// @tparam Method Tag-type that specifies an implementation of algorithm. Can -/// be :expr:`method::brute_force`. +/// be :expr:`method::brute_force`, :expr:`method::kd_tree`, +/// or :expr:`method::ball_tree`. /// @tparam Task Tag-type that specifies the type of the problem to solve. Can /// be :expr:`task::clustering`. template { INSTANTIATE(float, method::brute_force, task::clustering) INSTANTIATE(double, method::brute_force, task::clustering) +INSTANTIATE(float, method::kd_tree, task::clustering) +INSTANTIATE(double, method::kd_tree, task::clustering) +INSTANTIATE(float, method::ball_tree, task::clustering) +INSTANTIATE(double, method::ball_tree, task::clustering) } // namespace v1 } // namespace oneapi::dal::dbscan::detail diff --git a/cpp/oneapi/dal/algo/dbscan/detail/compute_ops_dpc.cpp b/cpp/oneapi/dal/algo/dbscan/detail/compute_ops_dpc.cpp index ca1c7ac14b5..e0efb0881af 100644 --- a/cpp/oneapi/dal/algo/dbscan/detail/compute_ops_dpc.cpp +++ b/cpp/oneapi/dal/algo/dbscan/detail/compute_ops_dpc.cpp @@ -42,6 +42,10 @@ struct compute_ops_dispatcher { INSTANTIATE(float, method::brute_force, task::clustering) INSTANTIATE(double, method::brute_force, task::clustering) +INSTANTIATE(float, method::kd_tree, task::clustering) +INSTANTIATE(double, method::kd_tree, task::clustering) +INSTANTIATE(float, method::ball_tree, task::clustering) +INSTANTIATE(double, method::ball_tree, task::clustering) } // namespace v1 } // namespace oneapi::dal::dbscan::detail diff --git a/cpp/oneapi/dal/algo/dbscan/test/badarg.cpp b/cpp/oneapi/dal/algo/dbscan/test/badarg.cpp index 51788c327cb..54ef8afaa1d 100644 --- a/cpp/oneapi/dal/algo/dbscan/test/badarg.cpp +++ b/cpp/oneapi/dal/algo/dbscan/test/badarg.cpp @@ -66,7 +66,10 @@ class dbscan_badarg_test : public te::algo_fixture { static constexpr std::array bad_weights_ = { 1.0, 1.0 }; }; -using dbscan_types = COMBINE_TYPES((float, double), (dbscan::method::brute_force)); +using dbscan_types = COMBINE_TYPES((float, double), + (dbscan::method::brute_force, + dbscan::method::kd_tree, + dbscan::method::ball_tree)); #define DBSCAN_BADARG_TEST(name) \ TEMPLATE_LIST_TEST_M(dbscan_badarg_test, name, "[dbscan][badarg]", dbscan_types) diff --git a/cpp/oneapi/dal/algo/dbscan/test/batch.cpp b/cpp/oneapi/dal/algo/dbscan/test/batch.cpp index 99137994296..2f03aa855ae 100644 --- a/cpp/oneapi/dal/algo/dbscan/test/batch.cpp +++ b/cpp/oneapi/dal/algo/dbscan/test/batch.cpp @@ -21,7 +21,10 @@ namespace oneapi::dal::dbscan::test { template class dbscan_batch_test : public dbscan_test> {}; -using dbscan_types = COMBINE_TYPES((float, double), (dbscan::method::brute_force)); +using dbscan_types = COMBINE_TYPES((float, double), + (dbscan::method::brute_force, + dbscan::method::kd_tree, + dbscan::method::ball_tree)); TEMPLATE_LIST_TEST_M(dbscan_batch_test, "dbscan compute mode check", @@ -310,4 +313,56 @@ TEMPLATE_LIST_TEST_M(dbscan_batch_test, this->dbi_determenistic_checks(x, epsilon, min_observations, ref_dbi, 1.0e-1); } +using dbscan_cross_method_types = COMBINE_TYPES((float, double), (dbscan::method::brute_force)); + +TEMPLATE_LIST_TEST_M(dbscan_batch_test, + "dbscan cross-method comparison", + "[dbscan][batch]", + dbscan_cross_method_types) { + SKIP_IF(this->not_float64_friendly()); + using float_t = std::tuple_element_t<0, TestType>; + + constexpr float_t data[] = { 0.0, 0.1, 0.2, 1.0, 1.1, 1.2, 5.0, 5.1, 5.2, + 0.0, 0.1, 0.2, 1.0, 1.1, 1.2, 5.0, 5.1, 5.2 }; + const auto x = homogen_table::wrap(data, 9, 2); + + constexpr double epsilon = 0.5; + constexpr std::int64_t min_observations = 2; + + const auto desc_bf = + dbscan::descriptor(epsilon, min_observations) + .set_result_options(result_options::responses); + const auto desc_kd = + dbscan::descriptor(epsilon, min_observations) + .set_result_options(result_options::responses); + const auto desc_bt = + dbscan::descriptor(epsilon, min_observations) + .set_result_options(result_options::responses); + + const auto result_bf = te::compute(this->get_policy(), desc_bf, x, table{}); + const auto result_kd = te::compute(this->get_policy(), desc_kd, x, table{}); + const auto result_bt = te::compute(this->get_policy(), desc_bt, x, table{}); + + REQUIRE(result_bf.get_cluster_count() == result_kd.get_cluster_count()); + REQUIRE(result_bf.get_cluster_count() == result_bt.get_cluster_count()); + + const auto resp_bf = result_bf.get_responses(); + const auto resp_kd = result_kd.get_responses(); + const auto resp_bt = result_bt.get_responses(); + + row_accessor acc_bf(resp_bf); + row_accessor acc_kd(resp_kd); + row_accessor acc_bt(resp_bt); + + const auto arr_bf = acc_bf.pull({ 0, -1 }); + const auto arr_kd = acc_kd.pull({ 0, -1 }); + const auto arr_bt = acc_bt.pull({ 0, -1 }); + + for (std::int64_t i = 0; i < x.get_row_count(); ++i) { + CAPTURE(i, arr_bf[i], arr_kd[i], arr_bt[i]); + REQUIRE(arr_bf[i] == arr_kd[i]); + REQUIRE(arr_bf[i] == arr_bt[i]); + } +} + } // namespace oneapi::dal::dbscan::test diff --git a/cpp/oneapi/dal/algo/dbscan/test/spmd.cpp b/cpp/oneapi/dal/algo/dbscan/test/spmd.cpp index 2a530d0b0b5..1201e366f10 100644 --- a/cpp/oneapi/dal/algo/dbscan/test/spmd.cpp +++ b/cpp/oneapi/dal/algo/dbscan/test/spmd.cpp @@ -165,7 +165,10 @@ class dbscan_spmd_test : public dbscan_test std::int64_t rank_count_ = 1; }; -using dbscan_types = COMBINE_TYPES((float, double), (dbscan::method::brute_force)); +using dbscan_types = COMBINE_TYPES((float, double), + (dbscan::method::brute_force, + dbscan::method::kd_tree, + dbscan::method::ball_tree)); TEMPLATE_LIST_TEST_M(dbscan_spmd_test, "dbscan degenerated test", "[dbscan][spmd]", dbscan_types) { SKIP_IF(this->not_float64_friendly()); diff --git a/examples/oneapi/cpp/source/dbscan/dbscan_ball_tree_batch.cpp b/examples/oneapi/cpp/source/dbscan/dbscan_ball_tree_batch.cpp new file mode 100644 index 00000000000..34e10106ee0 --- /dev/null +++ b/examples/oneapi/cpp/source/dbscan/dbscan_ball_tree_batch.cpp @@ -0,0 +1,43 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "oneapi/dal/algo/dbscan.hpp" +#include "oneapi/dal/io/csv.hpp" + +#include "example_util/utils.hpp" + +namespace dal = oneapi::dal; + +int main(int argc, char const* argv[]) { + const auto data_file_name = get_data_path("data/dbscan_dense.csv"); + const auto x_data = dal::read(dal::csv::data_source{ data_file_name }); + + double epsilon = 0.04; + std::int64_t min_observations = 45; + + auto dbscan_desc = + dal::dbscan::descriptor(epsilon, min_observations); + dbscan_desc.set_result_options(dal::dbscan::result_options::responses | + dal::dbscan::result_options::core_observation_indices); + + const auto result = dal::compute(dbscan_desc, x_data); + + std::cout << "Cluster count: " << result.get_cluster_count() << std::endl; + std::cout << "Core observation count: " << result.get_core_observation_indices().get_row_count() + << std::endl; + std::cout << "Responses:\n" << result.get_responses() << std::endl; + return 0; +} diff --git a/examples/oneapi/cpp/source/dbscan/dbscan_kd_tree_batch.cpp b/examples/oneapi/cpp/source/dbscan/dbscan_kd_tree_batch.cpp new file mode 100644 index 00000000000..c53a0b6ee08 --- /dev/null +++ b/examples/oneapi/cpp/source/dbscan/dbscan_kd_tree_batch.cpp @@ -0,0 +1,43 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "oneapi/dal/algo/dbscan.hpp" +#include "oneapi/dal/io/csv.hpp" + +#include "example_util/utils.hpp" + +namespace dal = oneapi::dal; + +int main(int argc, char const* argv[]) { + const auto data_file_name = get_data_path("data/dbscan_dense.csv"); + const auto x_data = dal::read(dal::csv::data_source{ data_file_name }); + + double epsilon = 0.04; + std::int64_t min_observations = 45; + + auto dbscan_desc = + dal::dbscan::descriptor(epsilon, min_observations); + dbscan_desc.set_result_options(dal::dbscan::result_options::responses | + dal::dbscan::result_options::core_observation_indices); + + const auto result = dal::compute(dbscan_desc, x_data); + + std::cout << "Cluster count: " << result.get_cluster_count() << std::endl; + std::cout << "Core observation count: " << result.get_core_observation_indices().get_row_count() + << std::endl; + std::cout << "Responses:\n" << result.get_responses() << std::endl; + return 0; +} diff --git a/examples/oneapi/dpc/source/dbscan/dbscan_ball_tree_batch.cpp b/examples/oneapi/dpc/source/dbscan/dbscan_ball_tree_batch.cpp new file mode 100644 index 00000000000..88797f080a6 --- /dev/null +++ b/examples/oneapi/dpc/source/dbscan/dbscan_ball_tree_batch.cpp @@ -0,0 +1,57 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include +#include + +#ifndef ONEDAL_DATA_PARALLEL +#define ONEDAL_DATA_PARALLEL +#endif + +#include "oneapi/dal/algo/dbscan.hpp" +#include "oneapi/dal/io/csv.hpp" + +#include "example_util/utils.hpp" + +namespace dal = oneapi::dal; + +void run(sycl::queue& q) { + const auto data_file_name = get_data_path("data/dbscan_dense.csv"); + const auto x_data = dal::read(q, dal::csv::data_source{ data_file_name }); + + double epsilon = 0.04; + std::int64_t min_observations = 45; + + auto dbscan_desc = + dal::dbscan::descriptor(epsilon, min_observations); + dbscan_desc.set_result_options(dal::dbscan::result_options::responses); + + const auto result = dal::compute(q, dbscan_desc, x_data); + + std::cout << "Cluster count: " << result.get_cluster_count() << std::endl; + std::cout << "Responses:\n" << result.get_responses() << std::endl; +} + +int main(int argc, char const* argv[]) { + for (auto d : list_devices()) { + std::cout << "Running on " << d.get_platform().get_info() + << ", " << d.get_info() << "\n" + << std::endl; + auto q = sycl::queue{ d }; + run(q); + } + return 0; +} diff --git a/examples/oneapi/dpc/source/dbscan/dbscan_kd_tree_batch.cpp b/examples/oneapi/dpc/source/dbscan/dbscan_kd_tree_batch.cpp new file mode 100644 index 00000000000..689eebe5d98 --- /dev/null +++ b/examples/oneapi/dpc/source/dbscan/dbscan_kd_tree_batch.cpp @@ -0,0 +1,57 @@ +/******************************************************************************* +* Copyright contributors to the oneDAL project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include +#include + +#ifndef ONEDAL_DATA_PARALLEL +#define ONEDAL_DATA_PARALLEL +#endif + +#include "oneapi/dal/algo/dbscan.hpp" +#include "oneapi/dal/io/csv.hpp" + +#include "example_util/utils.hpp" + +namespace dal = oneapi::dal; + +void run(sycl::queue& q) { + const auto data_file_name = get_data_path("data/dbscan_dense.csv"); + const auto x_data = dal::read(q, dal::csv::data_source{ data_file_name }); + + double epsilon = 0.04; + std::int64_t min_observations = 45; + + auto dbscan_desc = + dal::dbscan::descriptor(epsilon, min_observations); + dbscan_desc.set_result_options(dal::dbscan::result_options::responses); + + const auto result = dal::compute(q, dbscan_desc, x_data); + + std::cout << "Cluster count: " << result.get_cluster_count() << std::endl; + std::cout << "Responses:\n" << result.get_responses() << std::endl; +} + +int main(int argc, char const* argv[]) { + for (auto d : list_devices()) { + std::cout << "Running on " << d.get_platform().get_info() + << ", " << d.get_info() << "\n" + << std::endl; + auto q = sycl::queue{ d }; + run(q); + } + return 0; +}