Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion xtheta-lab/outputs/data/hensen_chsh_summary.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
dataset_name,row_count,CHSH_S,CHSH_S_se,interpretation_warning,count_00,E_00,count_01,E_01,count_10,E_10,count_11,E_11,S_ci_low_95,S_ci_high_95,S_bootstrap_mean,S_bootstrap_std,bootstrap_samples,phi_eff,R_theta_eff,fit_status,fit_warning
hensen,4746,2.7777777777777777,0.20951312035156963,"Phi_eff is an effective phenomenological parameter only. Without gravitational path, altitude, curvature, or spacetime-baseline metadata, this is not evidence of spacetime-induced X-Theta holonomy.",0,0.0,9,0.7777777777777778,3,1.0,1,-1.0,1.1083333333333336,3.0,2.3399311940553424,0.5782461478532692,1000,0.1348461424403639,0.14197506625061654,Success,"Phi_eff is an effective phenomenological parameter only. Without gravitational path, altitude, curvature, or spacetime-baseline metadata, this is not evidence of spacetime-induced X-Theta holonomy."
hensen,245,2.4224998704613703,0.20382659825201987,"Phi_eff is an effective phenomenological parameter only. Without gravitational path, altitude, curvature, or spacetime-baseline metadata, this is not evidence of spacetime-induced X-Theta holonomy.",53,0.7358490566037735,79,0.5949367088607594,62,0.4838709677419355,51,-0.6078431372549019,2.002545796252275,2.805228895642471,2.424742094194,0.20430086873147751,1000,0.4091482169821624,1.0657490771776925,Success,"Phi_eff is an effective phenomenological parameter only. Without gravitational path, altitude, curvature, or spacetime-baseline metadata, this is not evidence of spacetime-induced X-Theta holonomy."
8 changes: 4 additions & 4 deletions xtheta-lab/outputs/data/hensen_setting_counts.csv
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
setting_pair,count,expectation
00,0,0.0
01,9,0.7777777777777778
10,3,1.0
11,1,-1.0
00,53,0.7358490566037735
01,79,0.5949367088607594
10,62,0.4838709677419355
11,51,-0.6078431372549019
22 changes: 11 additions & 11 deletions xtheta-lab/outputs/reports/hensen_validation_report.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,29 +4,29 @@

## Summary Results

- **Total Row Count:** 4746
- **CHSH S-statistic:** 2.777778 ± 0.209513 (Standard Error)
- **95% Bootstrap Confidence Interval:** [1.108333, 3.000000]
- **Total Row Count:** 245
- **CHSH S-statistic:** 2.422500 ± 0.203827 (Standard Error)
- **95% Bootstrap Confidence Interval:** [2.002546, 2.805229]
- **Bootstrap Samples:** 1000

## Effective X-Theta Fit

- **Effective Phase ($\Phi_{eff}$):** 0.134846 rad
- **Effective Anisotropy ($R_{\Theta, eff}$):** 0.141975
- **Effective Phase ($\Phi_{eff}$):** 0.409148 rad
- **Effective Anisotropy ($R_{\Theta, eff}$):** 1.065749
- **Fit Status:** Success
- **Fit Warning:** Phi_eff is an effective phenomenological parameter only. Without gravitational path, altitude, curvature, or spacetime-baseline metadata, this is not evidence of spacetime-induced X-Theta holonomy.

## Setting Expectations and Counts

| setting_pair | count | expectation |
|---------------:|--------:|--------------:|
| 00 | 0 | 0 |
| 01 | 9 | 0.777778 |
| 10 | 3 | 1 |
| 11 | 1 | -1 |
| 00 | 53 | 0.735849 |
| 01 | 79 | 0.594937 |
| 10 | 62 | 0.483871 |
| 11 | 51 | -0.607843 |

## Schema Validation (First Chunk)

- Missing columns: []
- Alice unique settings: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]
- Bob unique settings: [1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 10, 11, 12, 13, 14, 15, 17, 18, 16, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35]
- Alice unique settings: [0, 1]
- Bob unique settings: [0, 1]
115 changes: 81 additions & 34 deletions xtheta-lab/xtheta/data/adapters/hensen.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,42 +49,89 @@ def download_hensen_data(target_path: Path) -> bool:
def load_hensen_dataset(path: str, chunksize: int = 200_000) -> Iterator[pd.DataFrame]:
"""
Load Hensen (Delft) 2015 dataset from raw text file.
Mapping (based on download_delft.py):
- Col 1: Alice setting
- Col 2: Bob setting
- Col 3: Alice outcome (0/1)
- Col 4: Bob outcome (0/1)
Implements the official filtering and mapping logic from the 2015 Nature paper.
"""
p = Path(path)
if not download_hensen_data(p):
raise FileNotFoundError(f"Hensen data not found and could not be downloaded to: {path}")

# Read raw lines
with open(p, 'r', encoding='utf-8') as f:
lines = [l.strip().split(',') for l in f.readlines() if l.strip()]

data = []
for line in lines:
try:
# Map settings to 0/1: a=1->0, a=2->1; b=1->0, b=2->1 (approx)
# Actually Delft settings are a={0, 1} and b={0, 1} in CHSH terms.
a_set = int(line[1]) - 1
b_set = int(line[2]) - 1
a_out = 1 if int(line[4]) == 1 else -1
b_out = 1 if int(line[6]) == 1 else -1

data.append({
"trial_id": len(data),
"timestamp": line[0],
"alice_setting": a_set,
"bob_setting": b_set,
"alice_outcome": a_out,
"bob_outcome": b_out,
"source_file": p.name
})
except (ValueError, IndexError):
continue

df = pd.DataFrame(data)
for i in range(0, len(df), chunksize):
yield df.iloc[i : i + chunksize]
# Read raw data
# The file has no header. Col 0 is timestamp, followed by 16 data columns.
df_raw = pd.read_csv(p, header=None)

# Official constants for event-ready (heralding) and readout windows
EVENT_READY_WINDOW_START_CH0 = 5426350
EVENT_READY_WINDOW_START_CH1 = 5425700
EVENT_READY_WINDOW_LENGTH = 52450
EVENT_READY_WINDOW_SEPARATION = 250000
READOUT_WINDOW_START = 10620
READOUT_WINDOW_LENGTH = 3700
CHECK_FOR_INVALID_MARKER_IN_PAST = 250

# Column mapping (0-indexed based on raw file)
ER_CLICK1_TIME = 3
ER_CLICK1_CH = 4
ER_CLICK2_TIME = 5
ER_CLICK2_CH = 6
RN_A = 7
RN_B = 8
RO_CLICK_A_TIME = 11
RO_CLICK_B_TIME = 12
CLICK_AFTER_EXCITE_A = 13
CLICK_AFTER_EXCITE_B = 14
INVALID_MARKER_A = 15
INVALID_MARKER_B = 16

# 1. Heralding Filters (Event Ready)
t1 = df_raw[ER_CLICK1_TIME]
ch1 = df_raw[ER_CLICK1_CH]
t2 = df_raw[ER_CLICK2_TIME]
ch2 = df_raw[ER_CLICK2_CH]

filter_w1_ch0 = (EVENT_READY_WINDOW_START_CH0 <= t1) & (t1 < EVENT_READY_WINDOW_START_CH0 + EVENT_READY_WINDOW_LENGTH) & (ch1 == 0)
filter_w1_ch1 = (EVENT_READY_WINDOW_START_CH1 <= t1) & (t1 < EVENT_READY_WINDOW_START_CH1 + EVENT_READY_WINDOW_LENGTH) & (ch1 == 1)
w1_filter = filter_w1_ch0 | filter_w1_ch1

filter_w2_ch0 = (EVENT_READY_WINDOW_START_CH0 + EVENT_READY_WINDOW_SEPARATION <= t2) & (t2 < EVENT_READY_WINDOW_START_CH0 + EVENT_READY_WINDOW_SEPARATION + EVENT_READY_WINDOW_LENGTH) & (ch2 == 0)
filter_w2_ch1 = (EVENT_READY_WINDOW_START_CH1 + EVENT_READY_WINDOW_SEPARATION <= t2) & (t2 < EVENT_READY_WINDOW_START_CH1 + EVENT_READY_WINDOW_SEPARATION + EVENT_READY_WINDOW_LENGTH) & (ch2 == 1)
w2_filter = filter_w2_ch0 | filter_w2_ch1

psi_min_filter = (ch1 != ch2)
ready_filter = w1_filter & w2_filter & psi_min_filter

# 2. Signal Integrity Filters
inv_a = df_raw[INVALID_MARKER_A]
inv_b = df_raw[INVALID_MARKER_B]
no_invalid_marker = ((inv_a == 0) | (inv_a > CHECK_FOR_INVALID_MARKER_IN_PAST)) & \
((inv_b == 0) | (inv_b > CHECK_FOR_INVALID_MARKER_IN_PAST))

exc_a = df_raw[CLICK_AFTER_EXCITE_A]
exc_b = df_raw[CLICK_AFTER_EXCITE_B]
no_excitation = (exc_a == 0) & (exc_b == 0)

# Final Bell Trial Filter
bell_trial_filter = ready_filter & no_invalid_marker & no_excitation
df_filtered = df_raw[bell_trial_filter].copy()

if df_filtered.empty:
return iter([])

# 3. Derive Outcomes from Readout Windows
ro_a = df_filtered[RO_CLICK_A_TIME]
ro_b = df_filtered[RO_CLICK_B_TIME]
det_a = (ro_a > READOUT_WINDOW_START) & (ro_a <= READOUT_WINDOW_START + READOUT_WINDOW_LENGTH)
det_b = (ro_b > READOUT_WINDOW_START) & (ro_b <= READOUT_WINDOW_START + READOUT_WINDOW_LENGTH)

# 4. Map to Canonical Schema
df_final = pd.DataFrame({
"trial_id": df_filtered.index,
"timestamp": df_filtered[0],
"alice_setting": df_filtered[RN_A].astype(int),
"bob_setting": df_filtered[RN_B].astype(int),
"alice_outcome": np.where(det_a, 1, -1),
"bob_outcome": np.where(det_b, 1, -1),
"source_file": p.name
})

for i in range(0, len(df_final), chunksize):
yield df_final.iloc[i : i + chunksize]
Loading