From b934772150f0733aaddddbd270a22d2ca4447c83 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tiberiu=20Sab=C4=83u?= <T.Sabau@student.tudelft.nl>
Date: Mon, 2 Feb 2026 15:52:50 +0100
Subject: [PATCH] fix(utils): accept single column csv files

---
 docs/features/csv-support.md           |  26 ++++++
 src/lib/utils/fileDetector.ts          |  24 ++++--
 test/fixtures/single-column-cities.csv |   8 ++
 test/fixtures/single-column-emails.csv |   3 +
 test/fixtures/single-column-ids.csv    |   5 ++
 test/fixtures/single-column-names.csv  |   8 ++
 test/unit/utils/csvProcessor.test.ts   | 110 +++++++++++++++++++++++++
 test/unit/utils/fileDetector.test.ts   | 109 ++++++++++++++++++++++++
 8 files changed, 287 insertions(+), 6 deletions(-)
 create mode 100644 test/fixtures/single-column-cities.csv
 create mode 100644 test/fixtures/single-column-emails.csv
 create mode 100644 test/fixtures/single-column-ids.csv
 create mode 100644 test/fixtures/single-column-names.csv

diff --git a/docs/features/csv-support.md b/docs/features/csv-support.md
index 2a9182da7..990af8f5a 100644
--- a/docs/features/csv-support.md
+++ b/docs/features/csv-support.md
@@ -12,6 +12,30 @@ CSV support in NeuroLink works just like image support - it's a multimodal input
 4. **Injects** formatted CSV data into your prompt text
 5. **Works** with ALL AI providers (not limited to vision models)
 
+### Single-Column CSV Support (CSV-009)
+
+NeuroLink fully supports **single-column CSV files**, which are commonly used for:
+
+- **Lists of IDs**: `ID123`, `ID456`, `ID789`
+- **Names**: `Alice Johnson`, `Bob Smith`, `Charlie Brown`
+- **Email addresses**: `alice@example.com`, `bob@company.org`
+- **Cities/Locations**: `New York`, `Los Angeles`, `Chicago`
+- **Product codes**, **account numbers**, **reference numbers**, etc.
+
+Single-column CSVs are automatically detected and processed like multi-column CSVs:
+
+```typescript
+// Example: Analyze a list of customer IDs
+const result = await neurolink.generate({
+  input: {
+    text: "Analyze these customer IDs and identify any patterns",
+    csvFiles: ["customer-ids.csv"], // Single column: ID123, ID456, ID789...
+  },
+});
+```
+
+The detection system uses data-like pattern validation to distinguish single-column CSVs from prose or other text formats.
+
 ## Quick Start
 
 ### SDK Usage
@@ -231,6 +255,8 @@ NeuroLink uses a **multi-strategy detection system** with confidence scores:
 4. **Content Heuristics** (75% confidence)
    - Analyzes file content patterns
    - Detects CSV by checking consistent comma-separated columns
+   - **CSV-009**: Supports single-column CSVs (e.g., lists of IDs, names, emails)
+   - Single-column detection uses data-like pattern validation
 
 The system stops at the **first strategy with 80%+ confidence**.
 
diff --git a/src/lib/utils/fileDetector.ts b/src/lib/utils/fileDetector.ts
index 7a1fd02e2..7d8dd95ab 100644
--- a/src/lib/utils/fileDetector.ts
+++ b/src/lib/utils/fileDetector.ts
@@ -1088,19 +1088,31 @@ class ContentHeuristicStrategy implements DetectionStrategy {
       );
       const noBinaryChars = !text.includes("\0");
 
-      // Single-column CSVs should have VERY uniform line lengths
-      // (data values like IDs, codes, numbers - not varied content)
+      // CSV-009: Accept single-column CSVs with varied data
+      // Single-column CSVs can contain varied data (names, cities, emails, IDs, etc.)
+      // We check for data-like characteristics rather than strict uniformity
       const lengths = lines.map((l) => l.length);
       const avgLength = lengths.reduce((a, b) => a + b, 0) / lengths.length;
       const variance =
         lengths.reduce((sum, len) => sum + Math.pow(len - avgLength, 2), 0) /
         lengths.length;
       const stdDev = Math.sqrt(variance);
-      // Single-column CSVs can contain varied data (names, cities, emails, etc.)
-      // but should still show some consistency compared to random text
-      const hasUniformLengths = stdDev / avgLength < 0.75;
 
-      return hasReasonableLengths && noBinaryChars && hasUniformLengths;
+      // Relaxed coefficient of variation check (1.0 instead of 0.75)
+      // This allows for more variation while still filtering out random text/prose
+      const coefficientOfVariation = stdDev / avgLength;
+      const hasReasonableVariation = coefficientOfVariation < 1.0;
+
+      // Additional check: At least 50% of lines should be non-empty after trimming
+      const nonEmptyLines = lines.filter((l) => l.trim().length > 0).length;
+      const hasEnoughContent = nonEmptyLines / lines.length >= 0.5;
+
+      return (
+        hasReasonableLengths &&
+        noBinaryChars &&
+        hasReasonableVariation &&
+        hasEnoughContent
+      );
     }
 
     // Count delimiters per line and check consistency
diff --git a/test/fixtures/single-column-cities.csv b/test/fixtures/single-column-cities.csv
new file mode 100644
index 000000000..82ebdb452
--- /dev/null
+++ b/test/fixtures/single-column-cities.csv
@@ -0,0 +1,8 @@
+New York
+Los Angeles
+Chicago
+Houston
+Phoenix
+PhiladelphiaSan Antonio
+San Diego
+Dallas
diff --git a/test/fixtures/single-column-emails.csv b/test/fixtures/single-column-emails.csv
new file mode 100644
index 000000000..7a2e91bf2
--- /dev/null
+++ b/test/fixtures/single-column-emails.csv
@@ -0,0 +1,3 @@
+alice@example.com
+bob.smith@company.org
+charlie.brown@mail.com
\ No newline at end of file
diff --git a/test/fixtures/single-column-ids.csv b/test/fixtures/single-column-ids.csv
new file mode 100644
index 000000000..613034fe4
--- /dev/null
+++ b/test/fixtures/single-column-ids.csv
@@ -0,0 +1,5 @@
+ID123
+ID456
+ID789
+ID101
+ID202
\ No newline at end of file
diff --git a/test/fixtures/single-column-names.csv b/test/fixtures/single-column-names.csv
new file mode 100644
index 000000000..d3456a46a
--- /dev/null
+++ b/test/fixtures/single-column-names.csv
@@ -0,0 +1,8 @@
+Alice Johnson
+Bob Smith
+Charlie Brown
+Diana Martinez
+Edward Thompson
+Frank Wilson
+Grace Lee
+Hannah Davis
\ No newline at end of file
diff --git a/test/unit/utils/csvProcessor.test.ts b/test/unit/utils/csvProcessor.test.ts
index d57ef8058..42f3fe39b 100644
--- a/test/unit/utils/csvProcessor.test.ts
+++ b/test/unit/utils/csvProcessor.test.ts
@@ -546,4 +546,114 @@ Charlie,35,Chicago`;
       expect(rawResult.metadata.totalLines).toBe(5); // header + 2 data + 2 whitespace
     });
   });
+
+  describe("CSV-009: Single-column CSV support", () => {
+    it("should process single-column CSV with IDs", async () => {
+      const csvData = Buffer.from("ID123\nID456\nID789\nID101");
+
+      const rawResult = await CSVProcessor.process(csvData, {
+        formatStyle: "raw",
+      });
+      const jsonResult = await CSVProcessor.process(csvData, {
+        formatStyle: "json",
+      });
+
+      expect(rawResult.type).toBe("csv");
+      expect(rawResult.metadata.columnCount).toBe(1);
+      expect(rawResult.metadata.rowCount).toBeGreaterThanOrEqual(3);
+      expect(rawResult.content).toContain("ID123");
+
+      expect(jsonResult.type).toBe("csv");
+      expect(jsonResult.metadata.columnCount).toBe(1);
+    });
+
+    it("should process single-column CSV with names", async () => {
+      const csvData = Buffer.from(
+        "Alice Johnson\nBob Smith\nCharlie Brown\nDiana Martinez",
+      );
+
+      const rawResult = await CSVProcessor.process(csvData, {
+        formatStyle: "raw",
+      });
+
+      expect(rawResult.type).toBe("csv");
+      expect(rawResult.metadata.columnCount).toBe(1);
+      expect(rawResult.metadata.rowCount).toBeGreaterThanOrEqual(3);
+      expect(rawResult.content).toContain("Alice Johnson");
+      expect(rawResult.content).toContain("Bob Smith");
+    });
+
+    it("should process single-column CSV with emails", async () => {
+      const csvData = Buffer.from(
+        "alice@example.com\nbob.smith@company.org\ncharlie.brown@mail.com",
+      );
+
+      const jsonResult = await CSVProcessor.process(csvData, {
+        formatStyle: "json",
+      });
+
+      expect(jsonResult.type).toBe("csv");
+      expect(jsonResult.metadata.columnCount).toBe(1);
+      // First row becomes header, so 2 data rows
+      expect(jsonResult.metadata.rowCount).toBe(2);
+      expect(jsonResult.content).toContain("alice@example.com");
+    });
+
+    it("should process single-column CSV with cities (varied lengths)", async () => {
+      const csvData = Buffer.from(
+        "New York\nLos Angeles\nChicago\nHouston\nPhoenix\nPhiladelphia",
+      );
+
+      const rawResult = await CSVProcessor.process(csvData, {
+        formatStyle: "raw",
+      });
+
+      expect(rawResult.type).toBe("csv");
+      expect(rawResult.metadata.columnCount).toBe(1);
+      // First row becomes header, so 5 data rows
+      expect(rawResult.metadata.rowCount).toBe(5);
+      expect(rawResult.content).toContain("New York");
+      expect(rawResult.content).toContain("Philadelphia");
+    });
+
+    it("should respect maxRows for single-column CSVs", async () => {
+      const csvData = Buffer.from("Item1\nItem2\nItem3\nItem4\nItem5\nItem6");
+
+      const result = await CSVProcessor.process(csvData, {
+        formatStyle: "raw",
+        maxRows: 3,
+      });
+
+      expect(result.metadata.rowCount).toBe(3);
+      expect(result.content).toContain("Item1");
+      expect(result.content).toContain("Item3");
+      expect(result.content).not.toContain("Item6");
+    });
+
+    it("should handle single-column CSV in markdown format", async () => {
+      const csvData = Buffer.from("Product\nLaptop\nMouse\nKeyboard");
+
+      const result = await CSVProcessor.process(csvData, {
+        formatStyle: "markdown",
+      });
+
+      expect(result.type).toBe("csv");
+      expect(result.metadata.columnCount).toBe(1);
+      expect(result.content).toContain("Product");
+      expect(result.content).toContain("Laptop");
+    });
+
+    it("should log success for single-column CSV processing", async () => {
+      const csvData = Buffer.from("Value1\nValue2\nValue3");
+      await CSVProcessor.process(csvData, { formatStyle: "raw" });
+
+      expect(logger.info).toHaveBeenCalledWith(
+        "[CSVProcessor] ✅ Processed CSV file",
+        expect.objectContaining({
+          formatStyle: "raw",
+          columnCount: 1,
+        }),
+      );
+    });
+  });
 });
diff --git a/test/unit/utils/fileDetector.test.ts b/test/unit/utils/fileDetector.test.ts
index 2a7d99aae..b1e535090 100644
--- a/test/unit/utils/fileDetector.test.ts
+++ b/test/unit/utils/fileDetector.test.ts
@@ -494,4 +494,113 @@ describe("FileDetector", () => {
       expect(result.type).toBe("pdf");
     });
   });
+
+  describe("CSV-009: Single-column CSV detection", () => {
+    it("should detect and process single-column CSV with IDs", async () => {
+      const csvPath = join(fixturesPath, "single-column-ids.csv");
+      const result = await FileDetector.detectAndProcess(csvPath);
+
+      expect(result.type).toBe("csv");
+      expect(result.mimeType).toBe("text/csv");
+      expect(result.metadata.columnCount).toBe(1);
+      expect(result.metadata.rowCount).toBeGreaterThanOrEqual(3);
+      expect(result.content).toContain("ID123");
+    });
+
+    it("should detect and process single-column CSV with names", async () => {
+      const csvPath = join(fixturesPath, "single-column-names.csv");
+      const result = await FileDetector.detectAndProcess(csvPath);
+
+      expect(result.type).toBe("csv");
+      expect(result.metadata.columnCount).toBe(1);
+      expect(result.metadata.rowCount).toBeGreaterThanOrEqual(5);
+      expect(result.content).toContain("Alice Johnson");
+      expect(result.content).toContain("Bob Smith");
+    });
+
+    it("should detect and process single-column CSV with cities (varied lengths)", async () => {
+      const csvPath = join(fixturesPath, "single-column-cities.csv");
+      const result = await FileDetector.detectAndProcess(csvPath);
+
+      expect(result.type).toBe("csv");
+      expect(result.metadata.columnCount).toBe(1);
+      expect(result.metadata.rowCount).toBeGreaterThanOrEqual(5);
+      expect(result.content).toContain("New York");
+      expect(result.content).toContain("Los Angeles");
+      expect(result.content).toContain("Philadelphia");
+    });
+
+    it("should detect and process single-column CSV with emails", async () => {
+      const csvPath = join(fixturesPath, "single-column-emails.csv");
+      const result = await FileDetector.detectAndProcess(csvPath);
+
+      expect(result.type).toBe("csv");
+      expect(result.metadata.columnCount).toBe(1);
+      // First row becomes header, so at least 2 data rows
+      expect(result.metadata.rowCount).toBeGreaterThanOrEqual(2);
+      expect(result.content).toContain("alice@example.com");
+      expect(result.content).toContain("bob.smith@company.org");
+    });
+
+    it("should detect single-column CSV from Buffer", async () => {
+      const csvBuffer = Buffer.from("Item1\nItem2\nItem3\nItem4");
+      const result = await FileDetector.detectAndProcess(csvBuffer);
+
+      expect(result.type).toBe("csv");
+      expect(result.metadata.columnCount).toBe(1);
+      // First row becomes header, so 3 data rows
+      expect(result.metadata.rowCount).toBe(3);
+    });
+
+    it("should respect allowedTypes for single-column CSVs", async () => {
+      const csvPath = join(fixturesPath, "single-column-ids.csv");
+      const result = await FileDetector.detectAndProcess(csvPath, {
+        allowedTypes: ["csv"],
+      });
+
+      expect(result.type).toBe("csv");
+      expect(result.metadata.columnCount).toBe(1);
+    });
+
+    it("should apply CSV options to single-column CSVs", async () => {
+      const csvPath = join(fixturesPath, "single-column-names.csv");
+      const result = await FileDetector.detectAndProcess(csvPath, {
+        csvOptions: {
+          maxRows: 3,
+          formatStyle: "json",
+        },
+      });
+
+      expect(result.type).toBe("csv");
+      expect(result.metadata.rowCount).toBe(3);
+      expect(result.metadata.columnCount).toBe(1);
+    });
+
+    it("should not detect prose/sentences as single-column CSV", async () => {
+      const proseBuffer = Buffer.from(
+        "This is a normal sentence with many words.\nHere is another sentence with even more words in it.\nAnd a third sentence to complete the paragraph.",
+      );
+      const result = await FileDetector.detectAndProcess(proseBuffer);
+
+      // Should be detected as text, not CSV (prose has >4 words per line)
+      expect(result.type).toBe("text");
+    });
+
+    it("should not detect JSON as single-column CSV", async () => {
+      const jsonBuffer = Buffer.from('[{"name": "Alice"}, {"name": "Bob"}]');
+      const result = await FileDetector.detectAndProcess(jsonBuffer);
+
+      // Should be detected as text (JSON), not CSV
+      expect(result.type).toBe("text");
+      expect(result.mimeType).toContain("json");
+    });
+
+    it("should not detect YAML as single-column CSV", async () => {
+      const yamlBuffer = Buffer.from("---\nkey1: value1\nkey2: value2\n");
+      const result = await FileDetector.detectAndProcess(yamlBuffer);
+
+      // Should be detected as text (YAML), not CSV
+      expect(result.type).toBe("text");
+    });
+  });
 });