diff --git a/xtheta-lab/outputs/data/hensen_chsh_summary.csv b/xtheta-lab/outputs/data/hensen_chsh_summary.csv index 96d009f..a066287 100644 --- a/xtheta-lab/outputs/data/hensen_chsh_summary.csv +++ b/xtheta-lab/outputs/data/hensen_chsh_summary.csv @@ -1,2 +1,2 @@ dataset_name,row_count,CHSH_S,CHSH_S_se,interpretation_warning,count_00,E_00,count_01,E_01,count_10,E_10,count_11,E_11,S_ci_low_95,S_ci_high_95,S_bootstrap_mean,S_bootstrap_std,bootstrap_samples,phi_eff,R_theta_eff,fit_status,fit_warning -hensen,4746,2.7777777777777777,0.20951312035156963,"Phi_eff is an effective phenomenological parameter only. Without gravitational path, altitude, curvature, or spacetime-baseline metadata, this is not evidence of spacetime-induced X-Theta holonomy.",0,0.0,9,0.7777777777777778,3,1.0,1,-1.0,1.1083333333333336,3.0,2.3399311940553424,0.5782461478532692,1000,0.1348461424403639,0.14197506625061654,Success,"Phi_eff is an effective phenomenological parameter only. Without gravitational path, altitude, curvature, or spacetime-baseline metadata, this is not evidence of spacetime-induced X-Theta holonomy." +hensen,245,2.4224998704613703,0.20382659825201987,"Phi_eff is an effective phenomenological parameter only. Without gravitational path, altitude, curvature, or spacetime-baseline metadata, this is not evidence of spacetime-induced X-Theta holonomy.",53,0.7358490566037735,79,0.5949367088607594,62,0.4838709677419355,51,-0.6078431372549019,2.002545796252275,2.805228895642471,2.424742094194,0.20430086873147751,1000,0.4091482169821624,1.0657490771776925,Success,"Phi_eff is an effective phenomenological parameter only. Without gravitational path, altitude, curvature, or spacetime-baseline metadata, this is not evidence of spacetime-induced X-Theta holonomy." diff --git a/xtheta-lab/outputs/data/hensen_setting_counts.csv b/xtheta-lab/outputs/data/hensen_setting_counts.csv index befb8e9..0848c6c 100644 --- a/xtheta-lab/outputs/data/hensen_setting_counts.csv +++ b/xtheta-lab/outputs/data/hensen_setting_counts.csv @@ -1,5 +1,5 @@ setting_pair,count,expectation -00,0,0.0 -01,9,0.7777777777777778 -10,3,1.0 -11,1,-1.0 +00,53,0.7358490566037735 +01,79,0.5949367088607594 +10,62,0.4838709677419355 +11,51,-0.6078431372549019 diff --git a/xtheta-lab/outputs/reports/hensen_validation_report.md b/xtheta-lab/outputs/reports/hensen_validation_report.md index 7860fd3..0fb747b 100644 --- a/xtheta-lab/outputs/reports/hensen_validation_report.md +++ b/xtheta-lab/outputs/reports/hensen_validation_report.md @@ -4,15 +4,15 @@ ## Summary Results -- **Total Row Count:** 4746 -- **CHSH S-statistic:** 2.777778 ± 0.209513 (Standard Error) -- **95% Bootstrap Confidence Interval:** [1.108333, 3.000000] +- **Total Row Count:** 245 +- **CHSH S-statistic:** 2.422500 ± 0.203827 (Standard Error) +- **95% Bootstrap Confidence Interval:** [2.002546, 2.805229] - **Bootstrap Samples:** 1000 ## Effective X-Theta Fit -- **Effective Phase ($\Phi_{eff}$):** 0.134846 rad -- **Effective Anisotropy ($R_{\Theta, eff}$):** 0.141975 +- **Effective Phase ($\Phi_{eff}$):** 0.409148 rad +- **Effective Anisotropy ($R_{\Theta, eff}$):** 1.065749 - **Fit Status:** Success - **Fit Warning:** Phi_eff is an effective phenomenological parameter only. Without gravitational path, altitude, curvature, or spacetime-baseline metadata, this is not evidence of spacetime-induced X-Theta holonomy. @@ -20,13 +20,13 @@ | setting_pair | count | expectation | |---------------:|--------:|--------------:| -| 00 | 0 | 0 | -| 01 | 9 | 0.777778 | -| 10 | 3 | 1 | -| 11 | 1 | -1 | +| 00 | 53 | 0.735849 | +| 01 | 79 | 0.594937 | +| 10 | 62 | 0.483871 | +| 11 | 51 | -0.607843 | ## Schema Validation (First Chunk) - Missing columns: [] -- Alice unique settings: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] -- Bob unique settings: [1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 10, 11, 12, 13, 14, 15, 17, 18, 16, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35] +- Alice unique settings: [0, 1] +- Bob unique settings: [0, 1] diff --git a/xtheta-lab/xtheta/data/adapters/hensen.py b/xtheta-lab/xtheta/data/adapters/hensen.py index 80bf6a1..84e53cd 100644 --- a/xtheta-lab/xtheta/data/adapters/hensen.py +++ b/xtheta-lab/xtheta/data/adapters/hensen.py @@ -49,42 +49,89 @@ def download_hensen_data(target_path: Path) -> bool: def load_hensen_dataset(path: str, chunksize: int = 200_000) -> Iterator[pd.DataFrame]: """ Load Hensen (Delft) 2015 dataset from raw text file. - Mapping (based on download_delft.py): - - Col 1: Alice setting - - Col 2: Bob setting - - Col 3: Alice outcome (0/1) - - Col 4: Bob outcome (0/1) + Implements the official filtering and mapping logic from the 2015 Nature paper. """ p = Path(path) if not download_hensen_data(p): raise FileNotFoundError(f"Hensen data not found and could not be downloaded to: {path}") - # Read raw lines - with open(p, 'r', encoding='utf-8') as f: - lines = [l.strip().split(',') for l in f.readlines() if l.strip()] - - data = [] - for line in lines: - try: - # Map settings to 0/1: a=1->0, a=2->1; b=1->0, b=2->1 (approx) - # Actually Delft settings are a={0, 1} and b={0, 1} in CHSH terms. - a_set = int(line[1]) - 1 - b_set = int(line[2]) - 1 - a_out = 1 if int(line[4]) == 1 else -1 - b_out = 1 if int(line[6]) == 1 else -1 - - data.append({ - "trial_id": len(data), - "timestamp": line[0], - "alice_setting": a_set, - "bob_setting": b_set, - "alice_outcome": a_out, - "bob_outcome": b_out, - "source_file": p.name - }) - except (ValueError, IndexError): - continue - - df = pd.DataFrame(data) - for i in range(0, len(df), chunksize): - yield df.iloc[i : i + chunksize] + # Read raw data + # The file has no header. Col 0 is timestamp, followed by 16 data columns. + df_raw = pd.read_csv(p, header=None) + + # Official constants for event-ready (heralding) and readout windows + EVENT_READY_WINDOW_START_CH0 = 5426350 + EVENT_READY_WINDOW_START_CH1 = 5425700 + EVENT_READY_WINDOW_LENGTH = 52450 + EVENT_READY_WINDOW_SEPARATION = 250000 + READOUT_WINDOW_START = 10620 + READOUT_WINDOW_LENGTH = 3700 + CHECK_FOR_INVALID_MARKER_IN_PAST = 250 + + # Column mapping (0-indexed based on raw file) + ER_CLICK1_TIME = 3 + ER_CLICK1_CH = 4 + ER_CLICK2_TIME = 5 + ER_CLICK2_CH = 6 + RN_A = 7 + RN_B = 8 + RO_CLICK_A_TIME = 11 + RO_CLICK_B_TIME = 12 + CLICK_AFTER_EXCITE_A = 13 + CLICK_AFTER_EXCITE_B = 14 + INVALID_MARKER_A = 15 + INVALID_MARKER_B = 16 + + # 1. Heralding Filters (Event Ready) + t1 = df_raw[ER_CLICK1_TIME] + ch1 = df_raw[ER_CLICK1_CH] + t2 = df_raw[ER_CLICK2_TIME] + ch2 = df_raw[ER_CLICK2_CH] + + filter_w1_ch0 = (EVENT_READY_WINDOW_START_CH0 <= t1) & (t1 < EVENT_READY_WINDOW_START_CH0 + EVENT_READY_WINDOW_LENGTH) & (ch1 == 0) + filter_w1_ch1 = (EVENT_READY_WINDOW_START_CH1 <= t1) & (t1 < EVENT_READY_WINDOW_START_CH1 + EVENT_READY_WINDOW_LENGTH) & (ch1 == 1) + w1_filter = filter_w1_ch0 | filter_w1_ch1 + + filter_w2_ch0 = (EVENT_READY_WINDOW_START_CH0 + EVENT_READY_WINDOW_SEPARATION <= t2) & (t2 < EVENT_READY_WINDOW_START_CH0 + EVENT_READY_WINDOW_SEPARATION + EVENT_READY_WINDOW_LENGTH) & (ch2 == 0) + filter_w2_ch1 = (EVENT_READY_WINDOW_START_CH1 + EVENT_READY_WINDOW_SEPARATION <= t2) & (t2 < EVENT_READY_WINDOW_START_CH1 + EVENT_READY_WINDOW_SEPARATION + EVENT_READY_WINDOW_LENGTH) & (ch2 == 1) + w2_filter = filter_w2_ch0 | filter_w2_ch1 + + psi_min_filter = (ch1 != ch2) + ready_filter = w1_filter & w2_filter & psi_min_filter + + # 2. Signal Integrity Filters + inv_a = df_raw[INVALID_MARKER_A] + inv_b = df_raw[INVALID_MARKER_B] + no_invalid_marker = ((inv_a == 0) | (inv_a > CHECK_FOR_INVALID_MARKER_IN_PAST)) & \ + ((inv_b == 0) | (inv_b > CHECK_FOR_INVALID_MARKER_IN_PAST)) + + exc_a = df_raw[CLICK_AFTER_EXCITE_A] + exc_b = df_raw[CLICK_AFTER_EXCITE_B] + no_excitation = (exc_a == 0) & (exc_b == 0) + + # Final Bell Trial Filter + bell_trial_filter = ready_filter & no_invalid_marker & no_excitation + df_filtered = df_raw[bell_trial_filter].copy() + + if df_filtered.empty: + return iter([]) + + # 3. Derive Outcomes from Readout Windows + ro_a = df_filtered[RO_CLICK_A_TIME] + ro_b = df_filtered[RO_CLICK_B_TIME] + det_a = (ro_a > READOUT_WINDOW_START) & (ro_a <= READOUT_WINDOW_START + READOUT_WINDOW_LENGTH) + det_b = (ro_b > READOUT_WINDOW_START) & (ro_b <= READOUT_WINDOW_START + READOUT_WINDOW_LENGTH) + + # 4. Map to Canonical Schema + df_final = pd.DataFrame({ + "trial_id": df_filtered.index, + "timestamp": df_filtered[0], + "alice_setting": df_filtered[RN_A].astype(int), + "bob_setting": df_filtered[RN_B].astype(int), + "alice_outcome": np.where(det_a, 1, -1), + "bob_outcome": np.where(det_b, 1, -1), + "source_file": p.name + }) + + for i in range(0, len(df_final), chunksize): + yield df_final.iloc[i : i + chunksize]