Skip to content

Commit 134c91f

Browse files
author
Vemula Anvesh
committed
fix(argocd): resolve repo_url at extraction time for reliable cicd_deployment_commits
Previously cicd_deployment_commits.repo_url was populated by reading application.RepoURL in convertSyncOperations. For multi-source apps _tool_argocd_applications.repo_url is empty when extractApplications is skipped (DevLake's collector state cache omits it when no new application raw data has been collected), causing the repo_url field to fall back to the deployment-name placeholder and breaking DevLake's commits_diff to PR linkage for DORA Lead Time metrics. Fix: resolve the git repo URL during extractSyncOperations, which always runs regardless of collector state. The resolved URL is stored in a new ArgocdSyncOperation.RepoURL field and used as the primary source in convertSyncOperations, with application.RepoURL as fallback. Changes: - models/sync_operation.go: add RepoURL varchar(500) field - models/migrationscripts/: add migration 20260331 to ADD COLUMN - tasks/sync_operation_extractor.go: - add resolveGitRepoURL(singleSourceURL, sources): prefers known git-hosting URLs from sources[], falls back to first non-chart URL - populate syncOp.RepoURL after revision resolution - tasks/sync_operation_convertor.go: use syncOp.RepoURL first, then application.RepoURL, then deployment name as last resort - tasks/sync_operation_extractor_test.go: 6 new tests for resolveGitRepoURL covering single-source, multi-source, fallback, all-chart, and empty inputs
1 parent 881c650 commit 134c91f

6 files changed

Lines changed: 168 additions & 1 deletion

File tree

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/*
2+
Licensed to the Apache Software Foundation (ASF) under one or more
3+
contributor license agreements. See the NOTICE file distributed with
4+
this work for additional information regarding copyright ownership.
5+
The ASF licenses this file to You under the Apache License, Version 2.0
6+
(the "License"); you may not use this file except in compliance with
7+
the License. You may obtain a copy of the License at
8+
9+
http://www.apache.org/licenses/LICENSE-2.0
10+
11+
Unless required by applicable law or agreed to in writing, software
12+
distributed under the License is distributed on an "AS IS" BASIS,
13+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
See the License for the specific language governing permissions and
15+
limitations under the License.
16+
*/
17+
18+
package migrationscripts
19+
20+
import (
21+
"github.com/apache/incubator-devlake/core/context"
22+
"github.com/apache/incubator-devlake/core/errors"
23+
"github.com/apache/incubator-devlake/core/plugin"
24+
)
25+
26+
var _ plugin.MigrationScript = (*addRepoURLToSyncOperations)(nil)
27+
28+
type addRepoURLToSyncOperations struct{}
29+
30+
// addRepoURLSyncOpArchived is a snapshot of ArgocdSyncOperation used solely
31+
// for this migration so the live model can evolve independently.
32+
type addRepoURLSyncOpArchived struct {
33+
ConnectionId uint64 `gorm:"primaryKey"`
34+
ApplicationName string `gorm:"primaryKey;type:varchar(255)"`
35+
DeploymentId int64 `gorm:"primaryKey"`
36+
RepoURL string `gorm:"type:varchar(500)"`
37+
}
38+
39+
func (addRepoURLSyncOpArchived) TableName() string {
40+
return "_tool_argocd_sync_operations"
41+
}
42+
43+
func (m *addRepoURLToSyncOperations) Up(basicRes context.BasicRes) errors.Error {
44+
db := basicRes.GetDal()
45+
return db.AutoMigrate(&addRepoURLSyncOpArchived{})
46+
}
47+
48+
func (*addRepoURLToSyncOperations) Version() uint64 {
49+
return 20260331000000
50+
}
51+
52+
func (*addRepoURLToSyncOperations) Name() string {
53+
return "argocd add repo_url to sync operations"
54+
}

backend/plugins/argocd/models/migrationscripts/register.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,5 +25,6 @@ func All() []plugin.MigrationScript {
2525
return []plugin.MigrationScript{
2626
new(addInitTables),
2727
new(addImageSupportArtifacts),
28+
new(addRepoURLToSyncOperations),
2829
}
2930
}

backend/plugins/argocd/models/sync_operation.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ type ArgocdSyncOperation struct {
2828
ApplicationName string `gorm:"primaryKey;type:varchar(255)"`
2929
DeploymentId int64 `gorm:"primaryKey"` // History ID from ArgoCD
3030
Revision string `gorm:"type:varchar(255)"` // Git SHA
31+
RepoURL string `gorm:"type:varchar(500)"` // Git repo URL resolved from source/sources at extraction time
3132
Kind string `gorm:"type:varchar(100)"` // Kubernetes resource kind: Deployment, ReplicaSet, Rollout, StatefulSet, DaemonSet, etc.
3233
StartedAt *time.Time
3334
FinishedAt *time.Time

backend/plugins/argocd/tasks/sync_operation_convertor.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,8 +137,14 @@ func ConvertSyncOperations(taskCtx plugin.SubTaskContext) errors.Error {
137137
results = append(results, deployment)
138138

139139
if syncOp.Revision != "" {
140+
// Priority: repo_url resolved at extraction time (always present for
141+
// multi-source apps) → application-level repo_url → deployment name
142+
// as a last-resort non-empty placeholder.
140143
repoUrl := deployment.Name
141-
if application != nil && application.RepoURL != "" {
144+
switch {
145+
case syncOp.RepoURL != "":
146+
repoUrl = syncOp.RepoURL
147+
case application != nil && application.RepoURL != "":
142148
repoUrl = application.RepoURL
143149
}
144150

backend/plugins/argocd/tasks/sync_operation_extractor.go

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,12 @@ func ExtractSyncOperations(taskCtx plugin.SubTaskContext) errors.Error {
197197
return nil, nil
198198
}
199199

200+
// Resolve the git repo URL at extraction time so the convertor can set
201+
// cicd_deployment_commits.repo_url correctly even when
202+
// _tool_argocd_applications.repo_url is empty (e.g. for multi-source apps
203+
// whose collectApplications subtask was skipped due to state caching).
204+
syncOp.RepoURL = resolveGitRepoURL(apiOp.Source.RepoURL, apiOp.Sources)
205+
200206
if isOperationState {
201207
start := normalize(apiOp.StartedAt)
202208
if start != nil {
@@ -469,6 +475,52 @@ func isGitHostedURL(repoURL string) bool {
469475
return strings.HasSuffix(strings.TrimSpace(repoURL), ".git")
470476
}
471477

478+
// resolveGitRepoURL returns the best git repository URL from a sync operation's
479+
// source metadata. For single-source apps the source.repoURL is used directly.
480+
// For multi-source apps (sources[]) the first URL that matches a known git
481+
// hosting service is preferred; if none match the heuristic, the first non-chart
482+
// HTTPS/SSH URL is used as a fallback so that cicd_deployment_commits.repo_url
483+
// is never left as the deployment-name placeholder.
484+
//
485+
// This is called during extractSyncOperations which always runs, providing
486+
// reliable repo_url population even when extractApplications is skipped due
487+
// to the collector state cache.
488+
func resolveGitRepoURL(singleSourceURL string, sources []ArgocdApiSyncSource) string {
489+
// Single-source app: use the URL directly.
490+
if singleSourceURL != "" {
491+
return singleSourceURL
492+
}
493+
494+
// Multi-source app: pass 1 — prefer a known git host.
495+
for _, src := range sources {
496+
if isGitHostedURL(src.RepoURL) {
497+
return src.RepoURL
498+
}
499+
}
500+
501+
// Pass 2 — fall back to the first non-chart URL (covers self-hosted instances
502+
// not in the known-host list, e.g. on-prem GitLab with a custom domain).
503+
chartPrefixes := []string{"gs://", "oci://", "s3://"}
504+
for _, src := range sources {
505+
if src.RepoURL == "" {
506+
continue
507+
}
508+
lower := strings.ToLower(src.RepoURL)
509+
isChart := false
510+
for _, pfx := range chartPrefixes {
511+
if strings.HasPrefix(lower, pfx) {
512+
isChart = true
513+
break
514+
}
515+
}
516+
if !isChart {
517+
return src.RepoURL
518+
}
519+
}
520+
521+
return ""
522+
}
523+
472524
// isCommitSHA returns true for a 40-character lowercase hexadecimal string,
473525
// which is the standard representation of a Git commit SHA-1.
474526
func isCommitSHA(s string) bool {

backend/plugins/argocd/tasks/sync_operation_extractor_test.go

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,59 @@ func TestIsCommitSHA(t *testing.T) {
147147
assert.False(t, isCommitSHA("5dd95b4efd7e9b668c361bbddb8d7f1e56c32ac12")) // 41 chars
148148
}
149149

150+
// ── resolveGitRepoURL ─────────────────────────────────────────────────────────
151+
152+
func TestResolveGitRepoURL_SingleSource(t *testing.T) {
153+
// Single-source app: singleSourceURL is used directly, sources ignored.
154+
got := resolveGitRepoURL("https://github.com/example/my-app", nil)
155+
assert.Equal(t, "https://github.com/example/my-app", got)
156+
}
157+
158+
func TestResolveGitRepoURL_MultiSourceGitHubWins(t *testing.T) {
159+
// Multi-source pattern: GCS chart + GitHub values ref.
160+
sources := []ArgocdApiSyncSource{
161+
{RepoURL: "gs://charts-example-net/infra/stable", Chart: "generic-service"},
162+
{RepoURL: "https://github.com/example/my-app"},
163+
}
164+
got := resolveGitRepoURL("", sources)
165+
assert.Equal(t, "https://github.com/example/my-app", got)
166+
}
167+
168+
func TestResolveGitRepoURL_MultiSourceOCIChart(t *testing.T) {
169+
// OCI chart + GitLab values repo.
170+
sources := []ArgocdApiSyncSource{
171+
{RepoURL: "oci://registry.example.com/charts", Chart: "app"},
172+
{RepoURL: "https://gitlab.com/org/config"},
173+
}
174+
got := resolveGitRepoURL("", sources)
175+
assert.Equal(t, "https://gitlab.com/org/config", got)
176+
}
177+
178+
func TestResolveGitRepoURL_FallbackNonChartURL(t *testing.T) {
179+
// No known git host but a non-chart HTTPS URL is still better than nothing.
180+
sources := []ArgocdApiSyncSource{
181+
{RepoURL: "gs://bucket/charts"},
182+
{RepoURL: "https://git.acme-corp.internal/team/config"},
183+
}
184+
got := resolveGitRepoURL("", sources)
185+
assert.Equal(t, "https://git.acme-corp.internal/team/config", got)
186+
}
187+
188+
func TestResolveGitRepoURL_AllChartSources(t *testing.T) {
189+
// All sources are chart registries — returns empty string.
190+
sources := []ArgocdApiSyncSource{
191+
{RepoURL: "gs://charts-example-net/infra/stable"},
192+
{RepoURL: "oci://registry.example.com/charts"},
193+
}
194+
got := resolveGitRepoURL("", sources)
195+
assert.Equal(t, "", got)
196+
}
197+
198+
func TestResolveGitRepoURL_EmptySources(t *testing.T) {
199+
assert.Equal(t, "", resolveGitRepoURL("", nil))
200+
assert.Equal(t, "", resolveGitRepoURL("", []ArgocdApiSyncSource{}))
201+
}
202+
150203
// ── isGitHostedURL ────────────────────────────────────────────────────────────
151204

152205
func TestIsGitHostedURL(t *testing.T) {

0 commit comments

Comments
 (0)