divyang4481 · divyang4481 · Jun 7, 2026 · Jun 6, 2026
diff --git a/xtheta-lab/outputs/data/hensen_chsh_summary.csv b/xtheta-lab/outputs/data/hensen_chsh_summary.csv
@@ -1,2 +1,2 @@
 dataset_name,row_count,CHSH_S,CHSH_S_se,interpretation_warning,count_00,E_00,count_01,E_01,count_10,E_10,count_11,E_11,S_ci_low_95,S_ci_high_95,S_bootstrap_mean,S_bootstrap_std,bootstrap_samples,phi_eff,R_theta_eff,fit_status,fit_warning
-hensen,4746,2.7777777777777777,0.20951312035156963,"Phi_eff is an effective phenomenological parameter only. Without gravitational path, altitude, curvature, or spacetime-baseline metadata, this is not evidence of spacetime-induced X-Theta holonomy.",0,0.0,9,0.7777777777777778,3,1.0,1,-1.0,1.1083333333333336,3.0,2.3399311940553424,0.5782461478532692,1000,0.1348461424403639,0.14197506625061654,Success,"Phi_eff is an effective phenomenological parameter only. Without gravitational path, altitude, curvature, or spacetime-baseline metadata, this is not evidence of spacetime-induced X-Theta holonomy."
+hensen,245,2.4224998704613703,0.20382659825201987,"Phi_eff is an effective phenomenological parameter only. Without gravitational path, altitude, curvature, or spacetime-baseline metadata, this is not evidence of spacetime-induced X-Theta holonomy.",53,0.7358490566037735,79,0.5949367088607594,62,0.4838709677419355,51,-0.6078431372549019,2.002545796252275,2.805228895642471,2.424742094194,0.20430086873147751,1000,0.4091482169821624,1.0657490771776925,Success,"Phi_eff is an effective phenomenological parameter only. Without gravitational path, altitude, curvature, or spacetime-baseline metadata, this is not evidence of spacetime-induced X-Theta holonomy."
diff --git a/xtheta-lab/outputs/data/hensen_setting_counts.csv b/xtheta-lab/outputs/data/hensen_setting_counts.csv
@@ -1,5 +1,5 @@
 setting_pair,count,expectation
-00,0,0.0
-01,9,0.7777777777777778
-10,3,1.0
-11,1,-1.0
+00,53,0.7358490566037735
+01,79,0.5949367088607594
+10,62,0.4838709677419355
+11,51,-0.6078431372549019
diff --git a/xtheta-lab/outputs/reports/hensen_validation_report.md b/xtheta-lab/outputs/reports/hensen_validation_report.md
@@ -4,29 +4,29 @@
 
 ## Summary Results
 
-- **Total Row Count:** 4746
-- **CHSH S-statistic:** 2.777778 ± 0.209513 (Standard Error)
-- **95% Bootstrap Confidence Interval:** [1.108333, 3.000000]
+- **Total Row Count:** 245
+- **CHSH S-statistic:** 2.422500 ± 0.203827 (Standard Error)
+- **95% Bootstrap Confidence Interval:** [2.002546, 2.805229]
 - **Bootstrap Samples:** 1000
 
 ## Effective X-Theta Fit
 
-- **Effective Phase ($\Phi_{eff}$):** 0.134846 rad
-- **Effective Anisotropy ($R_{\Theta, eff}$):** 0.141975
+- **Effective Phase ($\Phi_{eff}$):** 0.409148 rad
+- **Effective Anisotropy ($R_{\Theta, eff}$):** 1.065749
 - **Fit Status:** Success
 - **Fit Warning:** Phi_eff is an effective phenomenological parameter only. Without gravitational path, altitude, curvature, or spacetime-baseline metadata, this is not evidence of spacetime-induced X-Theta holonomy.
 
 ## Setting Expectations and Counts
 
 |   setting_pair |   count |   expectation |
 |---------------:|--------:|--------------:|
-|             00 |       0 |      0        |
-|             01 |       9 |      0.777778 |
-|             10 |       3 |      1        |
-|             11 |       1 |     -1        |
+|             00 |      53 |      0.735849 |
+|             01 |      79 |      0.594937 |
+|             10 |      62 |      0.483871 |
+|             11 |      51 |     -0.607843 |
 
 ## Schema Validation (First Chunk)
 
 - Missing columns: []
-- Alice unique settings: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]
-- Bob unique settings: [1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 10, 11, 12, 13, 14, 15, 17, 18, 16, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35]
+- Alice unique settings: [0, 1]
+- Bob unique settings: [0, 1]
diff --git a/xtheta-lab/xtheta/data/adapters/hensen.py b/xtheta-lab/xtheta/data/adapters/hensen.py
@@ -49,42 +49,89 @@ def download_hensen_data(target_path: Path) -> bool:
 def load_hensen_dataset(path: str, chunksize: int = 200_000) -> Iterator[pd.DataFrame]:
     """
     Load Hensen (Delft) 2015 dataset from raw text file.
-    Mapping (based on download_delft.py):
-    - Col 1: Alice setting
-    - Col 2: Bob setting
-    - Col 3: Alice outcome (0/1)
-    - Col 4: Bob outcome (0/1)
+    Implements the official filtering and mapping logic from the 2015 Nature paper.
     """
     p = Path(path)
     if not download_hensen_data(p):
         raise FileNotFoundError(f"Hensen data not found and could not be downloaded to: {path}")
 
-    # Read raw lines
-    with open(p, 'r', encoding='utf-8') as f:
-        lines = [l.strip().split(',') for l in f.readlines() if l.strip()]
-
-    data = []
-    for line in lines:
-        try:
-            # Map settings to 0/1: a=1->0, a=2->1; b=1->0, b=2->1 (approx)
-            # Actually Delft settings are a={0, 1} and b={0, 1} in CHSH terms.
-            a_set = int(line[1]) - 1
-            b_set = int(line[2]) - 1
-            a_out = 1 if int(line[4]) == 1 else -1
-            b_out = 1 if int(line[6]) == 1 else -1
-
-            data.append({
-                "trial_id": len(data),
-                "timestamp": line[0],
-                "alice_setting": a_set,
-                "bob_setting": b_set,
-                "alice_outcome": a_out,
-                "bob_outcome": b_out,
-                "source_file": p.name
-            })
-        except (ValueError, IndexError):
-            continue
-
-    df = pd.DataFrame(data)
-    for i in range(0, len(df), chunksize):
-        yield df.iloc[i : i + chunksize]
+    # Read raw data
+    # The file has no header. Col 0 is timestamp, followed by 16 data columns.
+    df_raw = pd.read_csv(p, header=None)
+
+    # Official constants for event-ready (heralding) and readout windows
+    EVENT_READY_WINDOW_START_CH0 = 5426350
+    EVENT_READY_WINDOW_START_CH1 = 5425700
+    EVENT_READY_WINDOW_LENGTH = 52450
+    EVENT_READY_WINDOW_SEPARATION = 250000
+    READOUT_WINDOW_START = 10620
+    READOUT_WINDOW_LENGTH = 3700
+    CHECK_FOR_INVALID_MARKER_IN_PAST = 250
+
+    # Column mapping (0-indexed based on raw file)
+    ER_CLICK1_TIME = 3
+    ER_CLICK1_CH = 4
+    ER_CLICK2_TIME = 5
+    ER_CLICK2_CH = 6
+    RN_A = 7
+    RN_B = 8
+    RO_CLICK_A_TIME = 11
+    RO_CLICK_B_TIME = 12
+    CLICK_AFTER_EXCITE_A = 13
+    CLICK_AFTER_EXCITE_B = 14
+    INVALID_MARKER_A = 15
+    INVALID_MARKER_B = 16
+
+    # 1. Heralding Filters (Event Ready)
+    t1 = df_raw[ER_CLICK1_TIME]
+    ch1 = df_raw[ER_CLICK1_CH]
+    t2 = df_raw[ER_CLICK2_TIME]
+    ch2 = df_raw[ER_CLICK2_CH]
+
+    filter_w1_ch0 = (EVENT_READY_WINDOW_START_CH0 <= t1) & (t1 < EVENT_READY_WINDOW_START_CH0 + EVENT_READY_WINDOW_LENGTH) & (ch1 == 0)
+    filter_w1_ch1 = (EVENT_READY_WINDOW_START_CH1 <= t1) & (t1 < EVENT_READY_WINDOW_START_CH1 + EVENT_READY_WINDOW_LENGTH) & (ch1 == 1)
+    w1_filter = filter_w1_ch0 | filter_w1_ch1
+
+    filter_w2_ch0 = (EVENT_READY_WINDOW_START_CH0 + EVENT_READY_WINDOW_SEPARATION <= t2) & (t2 < EVENT_READY_WINDOW_START_CH0 + EVENT_READY_WINDOW_SEPARATION + EVENT_READY_WINDOW_LENGTH) & (ch2 == 0)
+    filter_w2_ch1 = (EVENT_READY_WINDOW_START_CH1 + EVENT_READY_WINDOW_SEPARATION <= t2) & (t2 < EVENT_READY_WINDOW_START_CH1 + EVENT_READY_WINDOW_SEPARATION + EVENT_READY_WINDOW_LENGTH) & (ch2 == 1)
+    w2_filter = filter_w2_ch0 | filter_w2_ch1
+
+    psi_min_filter = (ch1 != ch2)
+    ready_filter = w1_filter & w2_filter & psi_min_filter
+
+    # 2. Signal Integrity Filters
+    inv_a = df_raw[INVALID_MARKER_A]
+    inv_b = df_raw[INVALID_MARKER_B]
+    no_invalid_marker = ((inv_a == 0) | (inv_a > CHECK_FOR_INVALID_MARKER_IN_PAST)) & \
+                        ((inv_b == 0) | (inv_b > CHECK_FOR_INVALID_MARKER_IN_PAST))
+
+    exc_a = df_raw[CLICK_AFTER_EXCITE_A]
+    exc_b = df_raw[CLICK_AFTER_EXCITE_B]
+    no_excitation = (exc_a == 0) & (exc_b == 0)
+
+    # Final Bell Trial Filter
+    bell_trial_filter = ready_filter & no_invalid_marker & no_excitation
+    df_filtered = df_raw[bell_trial_filter].copy()
+
+    if df_filtered.empty:
+        return iter([])
+
+    # 3. Derive Outcomes from Readout Windows
+    ro_a = df_filtered[RO_CLICK_A_TIME]
+    ro_b = df_filtered[RO_CLICK_B_TIME]
+    det_a = (ro_a > READOUT_WINDOW_START) & (ro_a <= READOUT_WINDOW_START + READOUT_WINDOW_LENGTH)
+    det_b = (ro_b > READOUT_WINDOW_START) & (ro_b <= READOUT_WINDOW_START + READOUT_WINDOW_LENGTH)
+
+    # 4. Map to Canonical Schema
+    df_final = pd.DataFrame({
+        "trial_id": df_filtered.index,
+        "timestamp": df_filtered[0],
+        "alice_setting": df_filtered[RN_A].astype(int),
+        "bob_setting": df_filtered[RN_B].astype(int),
+        "alice_outcome": np.where(det_a, 1, -1),
+        "bob_outcome": np.where(det_b, 1, -1),
+        "source_file": p.name
+    })
+
+    for i in range(0, len(df_final), chunksize):
+        yield df_final.iloc[i : i + chunksize]