diff --git a/VERSION b/VERSION index 4e32c7b1..269fb5df 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -5.10.1 +5.10.2 diff --git a/docs/superpowers/plans/2026-06-18-ENGKNOW-3577-create-write-link-cache.md b/docs/superpowers/plans/2026-06-18-ENGKNOW-3577-create-write-link-cache.md new file mode 100644 index 00000000..4bc6ca39 --- /dev/null +++ b/docs/superpowers/plans/2026-06-18-ENGKNOW-3577-create-write-link-cache.md @@ -0,0 +1,155 @@ +# ENGKNOW-3577: create with `write -link` cache name fix — Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Make a `create ... | write -link X` store the actual written data-file name in the create cache so the create can be referenced afterwards. + +**Architecture:** The data-file name for a `-link` write is derived by the non-idempotent `LinkFileUtil.inferDataFileNameFromLinkFile` (random suffix). For a plain create it is derived twice — once at plan time (`BaseScriptExecutionEngine.resolveCache`, cached) and again at exec time (`ForkWrite`, actual write) — producing two different names. Fix: in `resolveCache`, inject the resolved name into the executed `write` command so `ForkWrite` reuses it instead of re-deriving. One derivation, cache and disk agree. + +**Tech Stack:** Java 17 + Scala 2.13, Gradle multi-module, JUnit 4, `TestUtils.runGorPipe`. + +**Spec:** `docs/superpowers/specs/2026-06-18-ENGKNOW-3577-create-write-link-cache-design.md` + +--- + +### Task 1: Regression test + fix for create/write-link cache name + +**Files:** +- Test: `gortools/src/test/java/gorsat/UTestGorWrite.java` (add one `@Test` method) +- Modify: `gortools/src/main/java/gorsat/Script/BaseScriptExecutionEngine.java:67-83` (`resolveCache`) + +Background facts (already verified, do not re-investigate): +- `UTestGorWrite` already imports `LinkFile`, `FileSource`, `Files`, `Path`, `GorDriverConfig`, `Assert`, and has `@Rule TemporaryFolder workDir` exposed via `workDirPath`, plus `@Rule EnvironmentVariables environmentVariables`. The `@Before setupTest()` sets `workDirPath`. +- `BaseScriptExecutionEngine.java` already imports `CommandParseUtilities` (used at line 325) and `Strings`, `DataType`, `PathUtils`, `Write`, `Tuple`, `Optional`. +- `resolveCache` already computes positional args via `write.validateArguments(args)` (passed into `parseBaseOptions`). + +--- + +- [ ] **Step 1: Write the failing regression test** + +Add this method to `gortools/src/test/java/gorsat/UTestGorWrite.java` (place it right after `testWriteLinkFileWithInferFileName`, ends at line 260): + +```java + @Test + public void testCreateWriteLinkCacheName() throws IOException { + // ENGKNOW-3577: a create whose query ends in `write -link` must store the actual + // written data file name in the cache, so referencing the create afterwards works. + environmentVariables.set(GorDriverConfig.GOR_DRIVER_LINK_MANAGED_DATA_ROOT_URL, + workDirPath.resolve("managed_data").toString()); + + String query = "create #test# = gorrow chr1,1,100 | write -link test.gor; nor [#test#]"; + String result = TestUtils.runGorPipe(query, "-gorroot", workDirPath.toString()); + + // The reference resolves and the original row survives the create -> link -> read round trip. + Assert.assertTrue("unexpected result: " + result, result.contains("chr1\t1\t100")); + + // The link points at a data file that actually exists on disk. + var linkFile = LinkFile.load(new FileSource(workDirPath.resolve("test.gor.link").toString())); + Assert.assertEquals(1, linkFile.getEntriesCount()); + Assert.assertTrue(Files.exists(Path.of(linkFile.getLatestEntry().url()))); + } +``` + +- [ ] **Step 2: Run the test to verify it FAILS** + +Run: +```bash +./gradlew :gortools:test --tests "gorsat.UTestGorWrite.testCreateWriteLinkCacheName" +``` +Expected: FAIL. `nor [#test#]` resolves to the plan-time derived data-file name (random suffix A), which differs from the exec-time written name (random suffix B), so the file does not exist — the run throws (e.g. a `GorResourceException`/file-not-found) and the test fails before reaching the assertions. + +- [ ] **Step 3: Implement the fix in `resolveCache`** + +In `gortools/src/main/java/gorsat/Script/BaseScriptExecutionEngine.java`, replace the current `resolveCache` body (lines 67-83): + +```java + private Optional> resolveCache(GorContext context, String lastCommand, ExecutionBlock queryBlock) { + var write = new Write(); + var args = lastCommand.substring("write ".length()).split(" "); + var options = write.parseBaseOptions(context, write.validateArguments(args), args, false); + var outFile = options._1(); + if (Strings.isNullOrEmpty(outFile)) { + if (queryBlock.signature() != null) { + var writeFilePath = context.getSession().getProjectContext().getFileCache().tempLocation(queryBlock.signature(), DataType.GORD.suffix); + writeFilePath = PathUtils.relativize(context.getSession().getProjectContext().getProjectRoot(), writeFilePath); + queryBlock.query_$eq(queryBlock.query() + " " + writeFilePath); + outFile = writeFilePath; + } else { + outFile = null; + } + } + return !Strings.isNullOrEmpty(outFile) ? resolveForkPathParent(outFile) : Optional.empty(); + } +``` + +with: + +```java + private Optional> resolveCache(GorContext context, String lastCommand, ExecutionBlock queryBlock) { + var write = new Write(); + var args = lastCommand.substring("write ".length()).split(" "); + var iargs = write.validateArguments(args); + var options = write.parseBaseOptions(context, iargs, args, false); + var outFile = options._1(); + if (Strings.isNullOrEmpty(outFile)) { + if (queryBlock.signature() != null) { + var writeFilePath = context.getSession().getProjectContext().getFileCache().tempLocation(queryBlock.signature(), DataType.GORD.suffix); + writeFilePath = PathUtils.relativize(context.getSession().getProjectContext().getProjectRoot(), writeFilePath); + queryBlock.query_$eq(queryBlock.query() + " " + writeFilePath); + outFile = writeFilePath; + } else { + outFile = null; + } + } else if (iargs.length == 0 && CommandParseUtilities.hasOption(args, "-link")) { + // ENGKNOW-3577: with `write -link X` (no explicit data file), the data file name is + // inferred from the link and is non-idempotent (random suffix). Inject the resolved + // name into the executed write so ForkWrite reuses it instead of re-deriving a + // different name, keeping the create cache entry and the written file in sync. + queryBlock.query_$eq(queryBlock.query() + " " + outFile); + } + return !Strings.isNullOrEmpty(outFile) ? resolveForkPathParent(outFile) : Optional.empty(); + } +``` + +Changes: capture `iargs` once and reuse it in `parseBaseOptions`; add the `else if` branch that injects the resolved `outFile` into the query for the link-inferred case. + +- [ ] **Step 4: Run the regression test to verify it PASSES** + +Run: +```bash +./gradlew :gortools:test --tests "gorsat.UTestGorWrite.testCreateWriteLinkCacheName" +``` +Expected: PASS. + +- [ ] **Step 5: Run the full write test class to check for regressions** + +Run: +```bash +./gradlew :gortools:test --tests "gorsat.UTestGorWrite" +``` +Expected: PASS (all existing `write -link` / infer / fork-write / gord-folder tests still green — the new branch only triggers when there is no positional file argument AND `-link` is present). + +- [ ] **Step 6: Commit** + +```bash +git add gortools/src/main/java/gorsat/Script/BaseScriptExecutionEngine.java gortools/src/test/java/gorsat/UTestGorWrite.java +git commit -m "fix(ENGKNOW-3577): store actual link data name in create cache + +A create ending in `write -link X` derived the data file name twice +(plan time in resolveCache, exec time in ForkWrite); the name is +non-idempotent (random suffix), so the cached name did not match the +written file and referencing the create failed. resolveCache now injects +the resolved name into the executed write so ForkWrite reuses it. + +Co-Authored-By: Claude Opus 4.8 (1M context) " +``` + +--- + +## Self-review notes + +- **Spec coverage:** root cause (resolveCache double-derive) → Step 3 fix; regression test → Steps 1-2; no-LinkFileUtil-change preserved (only `BaseScriptExecutionEngine` + test touched); follow-up (`MacroUtilities.getCachePath`) intentionally not in this plan. +- **Detection criterion** matches `Write.scala:52` (`fileName.isEmpty && linkOpt.nonEmpty`) — here `iargs.length == 0` ⇒ empty `fileName` (parseBaseOptions builds `fileName` from `iargs.mkString(" ")`), and `hasOption(args, "-link")`. +- **Idempotency:** after injection the executed `write` carries a positional filename, so any re-parse skips inference (no second random name). +- **No placeholders.** All code shown in full. +``` diff --git a/docs/superpowers/specs/2026-06-18-ENGKNOW-3577-create-write-link-cache-design.md b/docs/superpowers/specs/2026-06-18-ENGKNOW-3577-create-write-link-cache-design.md new file mode 100644 index 00000000..869fb6f0 --- /dev/null +++ b/docs/superpowers/specs/2026-06-18-ENGKNOW-3577-create-write-link-cache-design.md @@ -0,0 +1,134 @@ +# ENGKNOW-3577 — `create` with `write -link` stores wrong name in cache + +**Type:** Bug fix +**Ticket:** [ENGKNOW-3577](https://genedx.atlassian.net/browse/ENGKNOW-3577) (Bug, Highest) +**Branch:** `add-test-server` (current) +**Date:** 2026-06-18 + +## Problem + +A `create` whose query ends in `write -link ` (no explicit data filename) stores +an incorrect data-file name in the create cache. Referencing the create afterwards fails +because the cached name points at a file that was never written. + +Reproduction: + +``` +create #test# = gorrows -p chr1:1-100 + | write -link test.gorz.link +; + +nor [#test#] +``` + +`nor [#test#]` fails — the cache holds a data-file name that does not exist on disk. + +## Root cause + +`LinkFileUtil.inferDataFileNameFromLinkFile` (`model/.../linkfile/LinkFileUtil.java:50`) +derives the data-file path for a `-link` write. Its own doc comment (lines 23–31) states the +result **must be idempotent** ("we can not use random or time in the path"), but line 89 +injects a random 3-character suffix: + +```java +var uniqueIdPart = (link.getSerial() + 1) + "-" + RandomStringUtils.insecure().nextAlphanumeric(3); +``` + +The random suffix was added in ENGKNOW-3362 (versioned-link integrity). The serial portion is +stable across calls within one create (link file not yet written → serial = 1 both times); +only the random portion differs. + +For a plain `create ... | write -link X`, the data-file name is derived **twice**: + +1. **Plan time** — `BaseScriptExecutionEngine.resolveCache` + (`gortools/.../Script/BaseScriptExecutionEngine.java:67`) calls + `Write.parseBaseOptions`, which (filename empty + `-link` present) calls + `inferDataFileNameFromLinkFile` → **name #1** (random A). Returned as the create cache result. +2. **Exec time** — the executed query still carries the bare `write -link X` (the query is + only mutated in the *empty-outFile* branch, lines 72–81; the link case leaves it untouched + because `outFile` is non-empty). `ForkWrite` re-parses the write → `inferDataFileNameFromLinkFile` + → **name #2** (random B). Data + link file written here. + +name #1 ≠ name #2. Cache → #1, data → #2. Reference of `[#test#]` resolves to #1, which does +not exist. + +`pgor`/`partgor`/`parallel` creates use a parallel path +(`MacroUtilities.getCachePath`, `gortools/.../Utilities/MacroUtilities.scala:452`) with the same +double-derivation shape, but those write gord folders with different naming. **Out of scope** +for this ticket (see Follow-ups). + +## Fix — derive once, reuse the name + +In `resolveCache`, when the write filename was inferred from `-link` (no positional file +argument **and** `-link` present), inject the already-resolved `outFile` into the executed query +as the explicit write target — the same mutation pattern already used for the empty-outFile +branch. + +Effect: + +- `ForkWrite` parses an explicit filename → **does not re-derive** → no second random suffix. +- Data written to #1, link file points to #1, cache stores #1. All three agree. +- The mutation is also self-stabilising: a subsequent parse sees the positional filename and + skips inference entirely (idempotent). + +No change to `LinkFileUtil` — the ENGKNOW-3362 uniqueness/integrity behaviour is preserved. + +### Detection of the link-inferred case + +Inside `resolveCache`: + +- `iargs = write.validateArguments(args)` is empty (no positional data filename), **and** +- the `-link` option is present in `args`. + +When both hold and `outFile` is non-empty (i.e. it came from link inference), append `outFile` +to `queryBlock.query()` so the executed `write` becomes +`... | write -link `. + +### Touch points + +| File | Change | +|------|--------| +| `gortools/src/main/java/gorsat/Script/BaseScriptExecutionEngine.java` | `resolveCache`: inject resolved data-file name into the query for the link-inferred case | +| `gortools/src/test/java/gorsat/UTestGorWrite.java` | New regression test | + +## Regression test + +Model on `testWriteLinkFileWithInferFileName` (`UTestGorWrite.java:246`). + +``` +create #test# = gorrows -p chr1:1-100 | write -link test.gorz.link ; nor [#test#] +``` + +Assertions: + +1. The query runs without error (currently throws — file not found). +2. `nor [#test#]` returns the expected 99 rows. +3. The data file referenced by `test.gorz.link` exists, and the cached create result resolves + to that same file (cache name == actual written name). + +Test must **fail before** the fix and **pass after**. + +## Out of scope / Follow-ups + +- `MacroUtilities.getCachePath` (pgor/partgor/parallel link writes) — **verified** via added + tests (`testCreateWriteLinkInferFileName{Pgor,Parallel,Partgor}` in `UTestGorWrite.java`): the + resolved `cachePath` is derived once and reused, so the ENGKNOW-3577 cache-name mismatch does + **not** occur for any of the three. +- **Separate defect discovered and fixed in this PR:** `partgor` silently dropped the `-link` + option — `PartGor.scala` built the dictionary command but never appended `-link`/`-linkmeta`, + unlike `PGor.scala`/`Parallel.scala`, so `partgor ... | write -link X` wrote no `.link` file. + Fixed by appending the link options (extracted from `create.query`) to the partgor dictionary + command, mirroring `Parallel.scala`. Covered by `testCreateWriteLinkInferFileNamePartgor`. +- Making `inferDataFileNameFromLinkFile` idempotent again (removing the random suffix) was + rejected: it risks regressing the ENGKNOW-3362 concurrent-write uniqueness guarantee. +- The doc comment on `inferDataFileNameFromLinkFile` (`LinkFileUtil.java:23-31`) promises an + idempotent result, which the random suffix (line 89) violates. This fix works *around* the + non-idempotency rather than removing it, so the comment is now misleading — update the comment + (or the implementation) in a follow-up. + +## Verification + +```bash +./gradlew :gortools:test --tests "gorsat.UTestGorWrite.testCreateWriteLinkCacheName" +./gradlew :gortools:test --tests "gorsat.UTestGorWrite" +``` diff --git a/gortools/src/main/java/gorsat/Script/BaseScriptExecutionEngine.java b/gortools/src/main/java/gorsat/Script/BaseScriptExecutionEngine.java index eea68259..b2fd9705 100644 --- a/gortools/src/main/java/gorsat/Script/BaseScriptExecutionEngine.java +++ b/gortools/src/main/java/gorsat/Script/BaseScriptExecutionEngine.java @@ -67,7 +67,8 @@ private Optional> resolveForkPathParent(String res) { private Optional> resolveCache(GorContext context, String lastCommand, ExecutionBlock queryBlock) { var write = new Write(); var args = lastCommand.substring("write ".length()).split(" "); - var options = write.parseBaseOptions(context, write.validateArguments(args), args, false); + var iargs = write.validateArguments(args); + var options = write.parseBaseOptions(context, iargs, args, false); var outFile = options._1(); if (Strings.isNullOrEmpty(outFile)) { if (queryBlock.signature() != null) { @@ -78,6 +79,12 @@ private Optional> resolveCache(GorContext context, String } else { outFile = null; } + } else if (iargs.length == 0 && CommandParseUtilities.hasOption(args, "-link")) { + // With `write -link X` (no explicit data file), the data file name is + // inferred from the link and is non-idempotent (random suffix). Inject the resolved + // name into the executed write so ForkWrite reuses it instead of re-deriving a + // different name, keeping the create cache entry and the written file in sync. + queryBlock.query_$eq(queryBlock.query() + " " + outFile); } return !Strings.isNullOrEmpty(outFile) ? resolveForkPathParent(outFile) : Optional.empty(); } @@ -279,7 +286,6 @@ public Tuple> processBlocks(GorContext context, boolean sugg for (Map.Entry newExecutionBlockEntry : activeExecutionBlocks.entrySet()) { // Get the command to finally execute var newExecutionBlock = newExecutionBlockEntry.getValue(); - var commandToExecute = newExecutionBlock.query(); var cacheFile = newExecutionBlock.cachePath(); var hasForkWrite = newExecutionBlock.hasForkWrite(); @@ -300,6 +306,10 @@ public Tuple> processBlocks(GorContext context, boolean sugg hasFork = hasForkWrite; } + // Read the query AFTER getExplicitWrite, which may inject the resolved write + // target into the block's query, so the executed command stays in sync with the resolved cache path. + var commandToExecute = newExecutionBlock.query(); + // Extract used files from the final gor command var usedFiles = getUsedFiles(commandToExecute, session); diff --git a/gortools/src/main/scala/gorsat/Commands/Write.scala b/gortools/src/main/scala/gorsat/Commands/Write.scala index 861aca51..38e6bfc0 100644 --- a/gortools/src/main/scala/gorsat/Commands/Write.scala +++ b/gortools/src/main/scala/gorsat/Commands/Write.scala @@ -54,7 +54,7 @@ class Write extends CommandInfo("WRITE", val linkSourceRef = new SourceReference(DataUtil.toLink(linkOpt), context.getSession.getProjectContext.getFileReader.getSecurityContext, context.getSession.getProjectContext.getFileReader.getCommonRoot, null, null, true); - // Infer the full file name from the link (and defautl locations) + // Infer the full file name from the link (and default locations) LinkFileUtil.inferDataFileNameFromLinkFile( context.getSession.getProjectContext.getFileReader.resolveDataSource(linkSourceRef).asInstanceOf[StreamSource], linkMetaInfo.linkFileMeta, diff --git a/gortools/src/main/scala/gorsat/Macros/PGor.scala b/gortools/src/main/scala/gorsat/Macros/PGor.scala index be9e8dd1..b56e490a 100644 --- a/gortools/src/main/scala/gorsat/Macros/PGor.scala +++ b/gortools/src/main/scala/gorsat/Macros/PGor.scala @@ -25,7 +25,7 @@ package gorsat.Macros import gorsat.Commands.{CommandArguments, CommandParseUtilities} import gorsat.Script import gorsat.Script._ -import gorsat.Utilities.MacroUtilities.getCachePath +import gorsat.Utilities.MacroUtilities import org.gorpipe.gor.driver.linkfile.LinkFileUtil import org.gorpipe.gor.session.GorContext import org.gorpipe.gor.table.util.PathUtils @@ -56,7 +56,7 @@ class PGor extends MacroInfo("PGOR", CommandArguments("-nowithin", "-gordfolder" if (!doHeader) { val noWithin = CommandParseUtilities.hasOption(options, "-nowithin") - val (hasDictFolderWrite, cacheFileExists, hasForkWrite, theCachePath, theQueryAppend) = getCachePath(create, context, skipCache) + val (hasDictFolderWrite, cacheFileExists, hasForkWrite, theCachePath, theQueryAppend) = MacroUtilities.getCachePath(create, context, skipCache) val useGordFolders = CommandParseUtilities.hasOption(options, "-gordfolder") || hasDictFolderWrite var cachePath: String = null diff --git a/gortools/src/main/scala/gorsat/Macros/PartGor.scala b/gortools/src/main/scala/gorsat/Macros/PartGor.scala index 98e4e885..da18b337 100644 --- a/gortools/src/main/scala/gorsat/Macros/PartGor.scala +++ b/gortools/src/main/scala/gorsat/Macros/PartGor.scala @@ -28,8 +28,10 @@ import gorsat.Utilities.{AnalysisUtilities, MacroUtilities} import gorsat.gorsatGorIterator.MapAndListUtilities import gorsat.process.GorJavaUtilities import org.gorpipe.exceptions.{GorDataException, GorParsingException} +import org.gorpipe.gor.driver.linkfile.LinkFileUtil import org.gorpipe.gor.model.FileReader import org.gorpipe.gor.session.{GorContext, GorSession} +import org.gorpipe.util.Strings import java.util import scala.collection.mutable @@ -100,7 +102,18 @@ class PartGor extends MacroInfo("PARTGOR", CommandArguments("-gordfolder", "-s - }) val gordict = if (useGordFolders) CommandParseUtilities.GOR_DICTIONARY_FOLDER_PART else CommandParseUtilities.GOR_DICTIONARY_PART - val theCommand = partitions.keys.foldLeft(gordict) ((x, y) => x + " [" + theKey + "_" + y + "] " + y) + var theCommand = partitions.keys.foldLeft(gordict) ((x, y) => x + " [" + theKey + "_" + y + "] " + y) + + val linkOptions = LinkFileUtil.extractLinkOptionData(create.query) + if (!Strings.isNullOrEmpty(linkOptions)) { + theCommand += " -link " + linkOptions + } + + val linkMetaOptions = LinkFileUtil.extractLinkMetaOptionData(create.query) + if (!Strings.isNullOrEmpty(linkMetaOptions)) { + theCommand += String.format(" -linkMeta '%s'", linkMetaOptions) + } + parGorCommands.put(createKey, ExecutionBlock(create.groupName, theCommand, create.signature, theDependencies.toArray, create.batchGroupName, cachePath, isDictionary = true)) MacroParsingResult(parGorCommands, null) diff --git a/gortools/src/test/java/gorsat/UTestGorWrite.java b/gortools/src/test/java/gorsat/UTestGorWrite.java index 9e9a6d33..cc661504 100644 --- a/gortools/src/test/java/gorsat/UTestGorWrite.java +++ b/gortools/src/test/java/gorsat/UTestGorWrite.java @@ -35,6 +35,7 @@ import org.gorpipe.gor.model.BaseMeta; import org.gorpipe.gor.table.util.PathUtils; import org.gorpipe.gor.util.DataUtil; +import org.gorpipe.test.utils.FileTestUtils; import org.junit.*; import org.junit.contrib.java.lang.system.EnvironmentVariables; import org.junit.contrib.java.lang.system.RestoreSystemProperties; @@ -259,6 +260,25 @@ public void testWriteLinkFileWithInferFileName() throws IOException { } + @Test + public void testCreateWriteLinkCacheName() throws IOException { + // ENGKNOW-3577: a create whose query ends in `write -link` must store the actual + // written data file name in the cache, so referencing the create afterwards works. + environmentVariables.set(GorDriverConfig.GOR_DRIVER_LINK_MANAGED_DATA_ROOT_URL, + workDirPath.resolve("managed_data").toString()); + + String query = "create #test# = gorrow chr1,1,100 | write -link test.gor; nor [#test#]"; + String result = TestUtils.runGorPipe(query, "-gorroot", workDirPath.toString()); + + // The reference resolves and the original row survives the create -> link -> read round trip. + Assert.assertTrue("unexpected result: " + result, result.contains("chr1\t1\t100")); + + // The link points at a data file that actually exists on disk. + var linkFile = LinkFile.load(new FileSource(workDirPath.resolve("test.gor.link").toString())); + Assert.assertEquals(1, linkFile.getEntriesCount()); + Assert.assertTrue(Files.exists(Path.of(linkFile.getLatestEntry().url()))); + } + @Test public void testWriteLinkFileWithInferFileNameForExistingLink() throws IOException { @@ -341,6 +361,68 @@ public void testWriteLinkFileForGordFolderInferFilenameParallel() throws IOExcep Assert.assertEquals(linkresult1, linkresult3); } + @Test + public void testCreateWriteLinkInferFileNamePgor() throws IOException { + // ENGKNOW-3577 follow-up: a pgor create whose query ends in `write -link` with an inferred + // (non-idempotent) data name must store a create-cache name matching the written gord + // folder, so referencing the create with [#test#] resolves to the same data. getCachePath + // derives the name via parseBaseOptions -> inferDataFileNameFromLinkFile; this guards + // against the name being re-derived to a different value. + Path p = Paths.get("../tests/data/gor/dbsnp_test.gor"); + Files.copy(p, workDirPath.resolve("dbsnp.gor")); + + String viaCreate = TestUtils.runGorPipe( + "create #test# = pgor dbsnp.gor | write -link dbsnp3.gord; gor [#test#] | top 1000", + "-gorroot", workDirPath.toString()); + String viaLink = TestUtils.runGorPipe("gor dbsnp3.gord | top 1000", "-gorroot", workDirPath.toString()); + + Assert.assertEquals(viaLink, viaCreate); + + var linkFile = LinkFile.load(new FileSource(workDirPath.resolve("dbsnp3.gord.link").toString())); + Assert.assertEquals(1, linkFile.getEntriesCount()); + Assert.assertTrue(Files.exists(Path.of(linkFile.getLatestEntry().url()))); + } + + @Test + public void testCreateWriteLinkInferFileNameParallel() throws IOException { + // ENGKNOW-3577 follow-up: same check for a parallel create with inferred link data name. + Path p = Paths.get("../tests/data/gor/dbsnp_test.gor"); + Files.copy(p, workDirPath.resolve("dbsnp.gor")); + + String viaCreate = TestUtils.runGorPipe( + "create #test# = parallel -parts <(nor dbsnp.gor | select chrom | distinct) <(gor -p #{col:Chrom} dbsnp.gor) | write -link dbsnp3.gord; gor [#test#] | top 1000", + "-gorroot", workDirPath.toString()); + String viaLink = TestUtils.runGorPipe("gor dbsnp3.gord | top 1000", "-gorroot", workDirPath.toString()); + + Assert.assertEquals(viaLink, viaCreate); + + var linkFile = LinkFile.load(new FileSource(workDirPath.resolve("dbsnp3.gord.link").toString())); + Assert.assertEquals(1, linkFile.getEntriesCount()); + Assert.assertTrue(Files.exists(Path.of(linkFile.getLatestEntry().url()))); + } + + @Test + public void testCreateWriteLinkInferFileNamePartgor() throws IOException { + // ENGKNOW-3577 follow-up: a partgor create whose query ends in `write -link` with an + // inferred (non-idempotent) data name must store a create-cache name matching the written + // data, so referencing the create with [#test#] resolves to the same data, and partgor + // must propagate -link so the link file is written (parity with pgor/parallel). + Path p = Paths.get("../tests/data/gor/dbsnp_test.gor"); + Files.copy(p, workDirPath.resolve("dbsnp.gor")); + File dictFile = FileTestUtils.createTempFile(workDir.getRoot(), "parts.gord", "dbsnp.gor\tA\n"); + + String viaCreate = TestUtils.runGorPipe( + "create #test# = partgor -dict " + dictFile.getAbsolutePath() + " <(gor dbsnp.gor | calc t '#{tags}') | write -link dbsnp3.gord; gor [#test#] | top 1000", + "-gorroot", workDirPath.toString()); + String viaLink = TestUtils.runGorPipe("gor dbsnp3.gord | top 1000", "-gorroot", workDirPath.toString()); + + Assert.assertEquals(viaLink, viaCreate); + + var linkFile = LinkFile.load(new FileSource(workDirPath.resolve("dbsnp3.gord.link").toString())); + Assert.assertEquals(1, linkFile.getEntriesCount()); + Assert.assertTrue(Files.exists(Path.of(linkFile.getLatestEntry().url()))); + } + @Test public void testWriteLinkFileForForkWrite() throws IOException { Path p = Paths.get("../tests/data/gor/dbsnp_test.gor");