From e1c6bcc83bffa43952c489cd63fe4a36c18620bf Mon Sep 17 00:00:00 2001 From: Lawrence Sinclair Date: Tue, 12 May 2026 00:09:48 +0700 Subject: [PATCH] jenner-check: add 7 Jenner compatibility bundles + runner Each tNNN_* directory under jenner-check/ submits a SAS program to the public Jenner API and verifies the response matches a captured snapshot. Bundles: t001_data_import_csv PROC IMPORT + DATA step age categorization t002_merge_sort_invoices PROC SORT + DATA MERGE + PROC SQL counts t003_customer_format_sql PROC FORMAT + INPUT/PUT + PROC SQL aggregation t004_sales_returns_split DATA step with two named OUTPUT destinations t005_proc_means_basket_value PROC MEANS CLASS/VAR/OUTPUT with discount math t006_supplier_freq_weight SUBSTR + two-stage merge + PROC FREQ WEIGHT t007_rfm_proc_sql INNER JOIN + COUNT/SUM + INTCK recency Each bundle pairs ~30-200 rows of sample data (carved from this repo's Data/ files) with a script lightly adapted from the corresponding file under Code/. Adaptations are limited to replacing the SAS Viya /home/u62678062/sasuser.v94 FILENAME paths with relative paths to files in the bundle's input/ subdirectory; logic, PROC options, and output semantics match the upstream. The runner (run_jenner.sh / run_jenner.bat / run_jenner.sas) concatenates autoexec.sas + script.sas, uploads any input/ files, POSTs to https://api.jenneranalytics.com/v1/run, and prints the result. Co-Authored-By: Claude Opus 4.7 (1M context) --- jenner-check/README.md | 66 +++ jenner-check/run_jenner.bat | 43 ++ jenner-check/run_jenner.sas | 526 ++++++++++++++++++ jenner-check/run_jenner.sh | 236 ++++++++ .../t001_data_import_csv/autoexec.sas | 6 + .../t001_data_import_csv/expected.json | 16 + .../t001_data_import_csv/expected/files.md | 16 + .../t001_data_import_csv/expected/log.txt | 28 + .../t001_data_import_csv/expected/output.txt | 24 + .../t001_data_import_csv/input/customers.csv | 31 ++ jenner-check/t001_data_import_csv/meta.json | 8 + jenner-check/t001_data_import_csv/script.sas | 40 ++ .../t002_merge_sort_invoices/autoexec.sas | 13 + .../t002_merge_sort_invoices/expected.json | 17 + .../expected/files.md | 18 + .../t002_merge_sort_invoices/expected/log.txt | 35 ++ .../expected/output.txt | 16 + .../t002_merge_sort_invoices/input/basket.csv | 61 ++ .../input/invoice.csv | 21 + .../t002_merge_sort_invoices/meta.json | 8 + .../t002_merge_sort_invoices/script.sas | 36 ++ .../t003_customer_format_sql/autoexec.sas | 28 + .../t003_customer_format_sql/expected.json | 15 + .../expected/files.md | 18 + .../t003_customer_format_sql/expected/log.txt | 69 +++ .../expected/output.txt | 9 + .../input/customers.csv | 51 ++ .../input/invoice.csv | 101 ++++ .../t003_customer_format_sql/meta.json | 8 + .../t003_customer_format_sql/script.sas | 64 +++ .../t004_sales_returns_split/autoexec.sas | 6 + .../t004_sales_returns_split/expected.json | 16 + .../expected/files.md | 15 + .../t004_sales_returns_split/expected/log.txt | 24 + .../expected/output.txt | 28 + .../input/invoice.csv | 41 ++ .../t004_sales_returns_split/meta.json | 8 + .../t004_sales_returns_split/script.sas | 31 ++ .../t005_proc_means_basket_value/autoexec.sas | 13 + .../expected.json | 14 + .../expected/files.md | 23 + .../expected/log.txt | 55 ++ .../expected/output.txt | 16 + .../input/basket.csv | 81 +++ .../input/products.csv | 101 ++++ .../input/promotions.csv | 5 + .../t005_proc_means_basket_value/meta.json | 8 + .../t005_proc_means_basket_value/script.sas | 50 ++ .../t006_supplier_freq_weight/autoexec.sas | 23 + .../t006_supplier_freq_weight/expected.json | 14 + .../expected/files.md | 22 + .../expected/log.txt | 68 +++ .../expected/output.txt | 15 + .../input/basket.csv | 81 +++ .../input/products.csv | 101 ++++ .../input/suppliers.csv | 10 + .../t006_supplier_freq_weight/meta.json | 8 + .../t006_supplier_freq_weight/script.sas | 49 ++ jenner-check/t007_rfm_proc_sql/autoexec.sas | 65 +++ jenner-check/t007_rfm_proc_sql/expected.json | 16 + .../t007_rfm_proc_sql/expected/files.md | 27 + .../t007_rfm_proc_sql/expected/log.txt | 81 +++ .../t007_rfm_proc_sql/expected/output.txt | 16 + .../t007_rfm_proc_sql/input/basket.csv | 201 +++++++ .../t007_rfm_proc_sql/input/customers.csv | 31 ++ .../t007_rfm_proc_sql/input/invoice_sales.csv | 49 ++ .../t007_rfm_proc_sql/input/products.csv | 101 ++++ .../t007_rfm_proc_sql/input/promotions.csv | 5 + jenner-check/t007_rfm_proc_sql/meta.json | 8 + jenner-check/t007_rfm_proc_sql/script.sas | 35 ++ 70 files changed, 3189 insertions(+) create mode 100644 jenner-check/README.md create mode 100644 jenner-check/run_jenner.bat create mode 100644 jenner-check/run_jenner.sas create mode 100755 jenner-check/run_jenner.sh create mode 100644 jenner-check/t001_data_import_csv/autoexec.sas create mode 100644 jenner-check/t001_data_import_csv/expected.json create mode 100644 jenner-check/t001_data_import_csv/expected/files.md create mode 100644 jenner-check/t001_data_import_csv/expected/log.txt create mode 100644 jenner-check/t001_data_import_csv/expected/output.txt create mode 100644 jenner-check/t001_data_import_csv/input/customers.csv create mode 100644 jenner-check/t001_data_import_csv/meta.json create mode 100644 jenner-check/t001_data_import_csv/script.sas create mode 100644 jenner-check/t002_merge_sort_invoices/autoexec.sas create mode 100644 jenner-check/t002_merge_sort_invoices/expected.json create mode 100644 jenner-check/t002_merge_sort_invoices/expected/files.md create mode 100644 jenner-check/t002_merge_sort_invoices/expected/log.txt create mode 100644 jenner-check/t002_merge_sort_invoices/expected/output.txt create mode 100644 jenner-check/t002_merge_sort_invoices/input/basket.csv create mode 100644 jenner-check/t002_merge_sort_invoices/input/invoice.csv create mode 100644 jenner-check/t002_merge_sort_invoices/meta.json create mode 100644 jenner-check/t002_merge_sort_invoices/script.sas create mode 100644 jenner-check/t003_customer_format_sql/autoexec.sas create mode 100644 jenner-check/t003_customer_format_sql/expected.json create mode 100644 jenner-check/t003_customer_format_sql/expected/files.md create mode 100644 jenner-check/t003_customer_format_sql/expected/log.txt create mode 100644 jenner-check/t003_customer_format_sql/expected/output.txt create mode 100644 jenner-check/t003_customer_format_sql/input/customers.csv create mode 100644 jenner-check/t003_customer_format_sql/input/invoice.csv create mode 100644 jenner-check/t003_customer_format_sql/meta.json create mode 100644 jenner-check/t003_customer_format_sql/script.sas create mode 100644 jenner-check/t004_sales_returns_split/autoexec.sas create mode 100644 jenner-check/t004_sales_returns_split/expected.json create mode 100644 jenner-check/t004_sales_returns_split/expected/files.md create mode 100644 jenner-check/t004_sales_returns_split/expected/log.txt create mode 100644 jenner-check/t004_sales_returns_split/expected/output.txt create mode 100644 jenner-check/t004_sales_returns_split/input/invoice.csv create mode 100644 jenner-check/t004_sales_returns_split/meta.json create mode 100644 jenner-check/t004_sales_returns_split/script.sas create mode 100644 jenner-check/t005_proc_means_basket_value/autoexec.sas create mode 100644 jenner-check/t005_proc_means_basket_value/expected.json create mode 100644 jenner-check/t005_proc_means_basket_value/expected/files.md create mode 100644 jenner-check/t005_proc_means_basket_value/expected/log.txt create mode 100644 jenner-check/t005_proc_means_basket_value/expected/output.txt create mode 100644 jenner-check/t005_proc_means_basket_value/input/basket.csv create mode 100644 jenner-check/t005_proc_means_basket_value/input/products.csv create mode 100644 jenner-check/t005_proc_means_basket_value/input/promotions.csv create mode 100644 jenner-check/t005_proc_means_basket_value/meta.json create mode 100644 jenner-check/t005_proc_means_basket_value/script.sas create mode 100644 jenner-check/t006_supplier_freq_weight/autoexec.sas create mode 100644 jenner-check/t006_supplier_freq_weight/expected.json create mode 100644 jenner-check/t006_supplier_freq_weight/expected/files.md create mode 100644 jenner-check/t006_supplier_freq_weight/expected/log.txt create mode 100644 jenner-check/t006_supplier_freq_weight/expected/output.txt create mode 100644 jenner-check/t006_supplier_freq_weight/input/basket.csv create mode 100644 jenner-check/t006_supplier_freq_weight/input/products.csv create mode 100644 jenner-check/t006_supplier_freq_weight/input/suppliers.csv create mode 100644 jenner-check/t006_supplier_freq_weight/meta.json create mode 100644 jenner-check/t006_supplier_freq_weight/script.sas create mode 100644 jenner-check/t007_rfm_proc_sql/autoexec.sas create mode 100644 jenner-check/t007_rfm_proc_sql/expected.json create mode 100644 jenner-check/t007_rfm_proc_sql/expected/files.md create mode 100644 jenner-check/t007_rfm_proc_sql/expected/log.txt create mode 100644 jenner-check/t007_rfm_proc_sql/expected/output.txt create mode 100644 jenner-check/t007_rfm_proc_sql/input/basket.csv create mode 100644 jenner-check/t007_rfm_proc_sql/input/customers.csv create mode 100644 jenner-check/t007_rfm_proc_sql/input/invoice_sales.csv create mode 100644 jenner-check/t007_rfm_proc_sql/input/products.csv create mode 100644 jenner-check/t007_rfm_proc_sql/input/promotions.csv create mode 100644 jenner-check/t007_rfm_proc_sql/meta.json create mode 100644 jenner-check/t007_rfm_proc_sql/script.sas diff --git a/jenner-check/README.md b/jenner-check/README.md new file mode 100644 index 0000000..b3ca73b --- /dev/null +++ b/jenner-check/README.md @@ -0,0 +1,66 @@ +# Jenner compatibility tests + +[Jenner](https://jenneranalytics.com) is a complete SAS-compatible system +and collaborative workspace. Each `tNNN_*` directory in this folder is a +self-contained test bundle that submits a SAS program to the public API +at `https://api.jenneranalytics.com/v1/run` and checks the response. + +## Bundle layout + +``` +tNNN_*/ +├── script.sas # the SAS program +├── autoexec.sas # options + setup that prepend the script +├── input/ # sample data the script reads (if any) +├── expected.json # stable assertions checked on each run +├── expected/ # captured snapshot from the last passing run +│ ├── log.txt # the .log field, verbatim +│ ├── output.txt # the .output (listing) field, verbatim +│ └── files.md # links to ODS images, datasets, etc. +└── meta.json # provenance: source file, blob sha, what was adapted +``` + +## Running a bundle + +The runner concatenates `autoexec.sas` + `script.sas`, POSTs to +`https://api.jenneranalytics.com/v1/run`, and prints the result. + +**Mac / Linux (bash + curl):** + +```bash +./run_jenner.sh --all # run every tNNN_* bundle, summary at end +./run_jenner.sh t001_something # run one +./run_jenner.sh --list # list bundles in this directory +``` + +**Windows:** + +```cmd +run_jenner.bat tNNN_something +``` + +**From any SAS session (no curl needed):** + +Submit `run_jenner.sas` — it uses PROC HTTP to POST and prints the +response. + +**By hand with curl:** + +```bash +cat tNNN_*/autoexec.sas tNNN_*/script.sas > /tmp/submit.sas +curl -sS -X POST https://api.jenneranalytics.com/v1/run \ + -F "script=@/tmp/submit.sas" \ + -F "deterministic=1" -F "timeout=60" +``` + +**Or in the hosted workspace:** + +Open , paste `script.sas` (with the +`autoexec.sas` lines prepended), upload anything in `input/`, and run. + +## Artifact URLs + +`expected/files.md` in each bundle lists hosted URLs for any ODS images, +datasets, or other artifacts produced by a captured run. Those URLs are +tied to a specific run and expire when the run is reaped — re-run the +bundle to refresh them. diff --git a/jenner-check/run_jenner.bat b/jenner-check/run_jenner.bat new file mode 100644 index 0000000..1039fdf --- /dev/null +++ b/jenner-check/run_jenner.bat @@ -0,0 +1,43 @@ +@echo off +rem run_jenner.bat - Windows runner for Jenner compatibility checks. +rem +rem Usage: run_jenner.bat [response.json] +rem +rem Submits a single .sas file to api.jenneranalytics.com. For +rem bundle-aware mode (autoexec.sas + script.sas concatenation) on +rem Windows, use WSL and invoke run_jenner.sh instead, or wait for the +rem Windows CI runner that will validate a bundle-aware .bat. +rem +rem Output: response.json contains the API response. Read it back in SAS: +rem filename resp 'response.json'; +rem libname resp JSON fileref=resp; +rem proc print data=resp.root; run; +rem +rem Requires: curl.exe (ships with Windows 10+ at C:\Windows\System32). + +setlocal + +if "%~1"=="" ( + echo Usage: %~nx0 ^ [response.json] + exit /b 2 +) + +set SCRIPT=%~1 +set OUT=%~2 +if "%OUT%"=="" set OUT=response.json + +set HOST=api.jenneranalytics.com + +curl.exe -sS -X POST "https://%HOST%/v1/run" ^ + -F "script=@%SCRIPT%;type=application/x-sas" ^ + -F "deterministic=1" ^ + -F "timeout=60" ^ + -o "%OUT%" + +if errorlevel 1 ( + echo curl failed with errorlevel %errorlevel% + exit /b 1 +) + +echo Response written to %OUT% +exit /b 0 diff --git a/jenner-check/run_jenner.sas b/jenner-check/run_jenner.sas new file mode 100644 index 0000000..550e8f8 --- /dev/null +++ b/jenner-check/run_jenner.sas @@ -0,0 +1,526 @@ +/* run_jenner.sas — invoke api.jenneranalytics.com from base SAS. + * + * Requires SAS 9.4 M5 or later (PROC HTTP + libname JSON engine). + * + * --------------------------------------------------------------------------- + * TL;DR for SAS users: + * + * %include 'run_jenner.sas'; + * %jenner_run(script=my_program.sas); / * one script * / + * %jenner_check_all(); / * whole bundle dir * / + * + * --------------------------------------------------------------------------- + * What this file gives you: + * + * %jenner_run — POST one .sas file to the Jenner API, display the + * log + listing + any generated files. + * %jenner_check_all — walk every jenner-check/tNNN_* bundle, + * invoke the API for each, compare the response to + * the bundle's expected.json, produce a summary + * CSV + SAS dataset the repo owner can attach to the + * jenner-check PR. + * + * --------------------------------------------------------------------------- + * How the API call is built: + * + * POST https://api.jenneranalytics.com/v1/run + * Content-Type: multipart/form-data; boundary=... + * + * fields: + * script the .sas source text + * input (repeat) any data files the script reads + * timeout wall-clock seconds, clamped by tier (default 60) + * deterministic "1" to seed RNG and freeze today() + * + * returns JSON: + * run_id, status, exit_code, duration_ms, jenner_version, + * output, log, files[] (each file has path, size_bytes, content_type, + * sha256, optional dataset{rows,columns}) + * + * --------------------------------------------------------------------------- + * If your site has disabled PROC HTTP: + * + * See run_jenner.bat (Windows) or run_jenner.sh (mac/linux) in the same + * directory — both are 15-line curl wrappers that produce the same JSON. + * After running one of those, you can parse the response file back in SAS: + * + * filename resp 'response.json'; + * libname resp JSON fileref=resp; + * proc print data=resp.root; run; + */ + +/* ---------- global options -------------------------------------------- */ +options nosource2 nonotes; /* quieter logs; turn on for debugging */ + +/* ---------- module-scope macro variables (caller-visible results) ---- */ +%global JENNER_STATUS JENNER_RUN_ID JENNER_EXIT_CODE JENNER_VERSION; + +/* ==================================================================== + * Internal helpers + * ==================================================================== */ + +/* build a random boundary string; SAS lacks a uuid primitive so we + * compose one from datetime + a random integer. */ +%macro _jc_boundary; + jc_%sysfunc(compress(%sysfunc(datetime(), b8601dt.), -:.))_%sysfunc(ranuni(0),hex6.) +%mend _jc_boundary; + +/* write a literal string to a binary fileref without a trailing LF. */ +%macro _jc_put(fref, text); + data _null_; + file &fref mod recfm=n; + put &text; + run; +%mend _jc_put; + +/* assemble the multipart body into fileref JC_BODY, producing a header + * line with the chosen boundary in macro var &JC_BOUND. Inputs is a + * space-separated list of file paths. + * + * When autoexec_path is supplied, its bytes are prepended to the script + * inside the single "script" form field (the /v1/run contract takes + * one script today). A newline separates the two so statements don't + * run together. */ +%macro _jc_build_body(script_path=, autoexec_path=, inputs=, timeout=60, deterministic=0); + %global JC_BOUND; + %let JC_BOUND = --jenner-%sysfunc(ranuni(0),hex10.)--; + + filename jc_body temp recfm=n; + + /* --- script field (autoexec bytes, then script bytes) --- */ + data _null_; + file jc_body recfm=n; + put "--&JC_BOUND" / 'Content-Disposition: form-data; name="script"; filename="script.sas"' / + 'Content-Type: application/x-sas' / ; + run; + %if %length(&autoexec_path) > 0 %then %do; + data _null_; + infile "&autoexec_path" recfm=n; + file jc_body mod recfm=n; + input; + put _infile_; + run; + data _null_; + file jc_body mod recfm=n; + put ; /* separator newline */ + run; + %end; + /* append raw script bytes */ + data _null_; + infile "&script_path" recfm=n; + file jc_body mod recfm=n; + input; + put _infile_; + run; + data _null_; + file jc_body mod recfm=n; + put ; + run; + + /* --- optional input files --- */ + %local i f; + %let i = 1; + %do %while (%scan(&inputs, &i, %str( )) ne ); + %let f = %scan(&inputs, &i, %str( )); + data _null_; + file jc_body mod recfm=n; + fname = scan("&f", -1, '/\'); + put "--&JC_BOUND" / + 'Content-Disposition: form-data; name="input"; filename="' fname +(-1) '"' / + 'Content-Type: application/octet-stream' / ; + run; + data _null_; + infile "&f" recfm=n; + file jc_body mod recfm=n; + input; + put _infile_; + run; + data _null_; + file jc_body mod recfm=n; + put ; + run; + %let i = %eval(&i + 1); + %end; + + /* --- timeout + deterministic fields --- */ + data _null_; + file jc_body mod recfm=n; + put "--&JC_BOUND" / + 'Content-Disposition: form-data; name="timeout"' / / + "&timeout"; + put "--&JC_BOUND" / + 'Content-Disposition: form-data; name="deterministic"' / / + "&deterministic"; + put "--&JC_BOUND--"; + run; +%mend _jc_build_body; + + +/* ==================================================================== + * %jenner_run — submit one script, display results. + * ==================================================================== */ +%macro jenner_run( + script=, + autoexec=, + inputs=, + host=api.jenneranalytics.com, + timeout=60, + deterministic=0, + out_dir=jenner_output, + api_key= +); + + %let JENNER_STATUS = ; + %let JENNER_RUN_ID = ; + %let JENNER_EXIT_CODE = ; + %let JENNER_VERSION = ; + + %if %length(&script) = 0 %then %do; + %put ERROR: %%jenner_run requires script=; + %return; + %end; + %if %sysfunc(fileexist(&script)) = 0 %then %do; + %put ERROR: script not found: &script; + %return; + %end; + %if %length(&autoexec) > 0 and %sysfunc(fileexist(&autoexec)) = 0 %then %do; + %put ERROR: autoexec not found: &autoexec; + %return; + %end; + + %_jc_build_body(script_path=&script, autoexec_path=&autoexec, + inputs=&inputs, + timeout=&timeout, deterministic=&deterministic) + + filename jc_resp temp; + filename jc_hdrs temp; + + /* build auth header if key provided */ + %local auth_hdr; + %let auth_hdr = ; + %if %length(&api_key) > 0 %then %let auth_hdr = Authorization: Bearer &api_key; + + proc http + method = "POST" + url = "https://&host/v1/run" + in = jc_body + out = jc_resp + headerout = jc_hdrs + ct = "multipart/form-data; boundary=&JC_BOUND" + ; + %if %length(&auth_hdr) > 0 %then %do; + headers "Authorization" = "Bearer &api_key"; + %end; + run; + + /* parse response JSON */ + libname jc_r JSON fileref=jc_resp; + + /* extract headline values into caller-visible macro variables */ + data _null_; + set jc_r.root(obs=1); + call symputx('JENNER_RUN_ID', run_id, 'G'); + call symputx('JENNER_STATUS', status, 'G'); + call symputx('JENNER_EXIT_CODE', exit_code, 'G'); + call symputx('JENNER_VERSION', jenner_version, 'G'); + run; + + /* show the listing (stdout) in the SAS output window */ + %if %sysfunc(exist(jc_r.root)) %then %do; + data _null_; + set jc_r.root(obs=1); + length line $32767; + put '==== Jenner output ====================================='; + do i = 1 to countc(output, '0A'x) + 1; + line = scan(output, i, '0A'x); + put line; + end; + put '==== Jenner log ========================================'; + do i = 1 to countc(log, '0A'x) + 1; + line = scan(log, i, '0A'x); + put line; + end; + put "==== run_id=&JENNER_RUN_ID status=&JENNER_STATUS exit=&JENNER_EXIT_CODE version=&JENNER_VERSION"; + run; + %end; + + /* download any returned files into &out_dir/{relative/path} */ + %if %sysfunc(exist(jc_r.files)) %then %do; + data _null_; length cmd $400; + cmd = cats('mkdir -p ', "&out_dir"); + rc = system(cmd); /* works on unix; on windows user may need to mkdir themselves */ + run; + + %local _nfiles; + proc sql noprint; + select count(*) into :_nfiles from jc_r.files; + quit; + + %local i fpath furl; + %do i = 1 %to &_nfiles; + data _null_; + set jc_r.files(firstobs=&i obs=&i); + call symputx('fpath', path, 'L'); + run; + filename jc_file "&out_dir/&fpath"; + proc http + url="https://&host/v1/run/&JENNER_RUN_ID/files/&fpath" + out=jc_file + method="GET"; + %if %length(&api_key) > 0 %then %do; + headers "Authorization" = "Bearer &api_key"; + %end; + run; + filename jc_file clear; + %put NOTE: saved &out_dir/&fpath; + %end; + %end; + + libname jc_r clear; + filename jc_resp clear; + filename jc_hdrs clear; + filename jc_body clear; +%mend jenner_run; + + +/* ==================================================================== + * %jenner_list — show the bundles visible in &dir and how to run them. + * Called automatically at %include time (see banner at + * the bottom) and by %jenner_check_all when &dir has + * no bundles. + * ==================================================================== */ +%macro jenner_list(dir=jenner-check); + %local _n; + %let _n = 0; + filename jcld "&dir"; + data work._jc_list; + length bundle $256; + did = dopen('jcld'); + if did = 0 then do; + call symputx('_n', -1, 'L'); + stop; + end; + n = dnum(did); + do i = 1 to n; + name = dread(did, i); + if substr(name,1,1) = 't' then do; + bundle = name; + output; + end; + end; + rc = dclose(did); + keep bundle; + run; + filename jcld clear; + + %if &_n = -1 %then %do; + %put NOTE: No directory '&dir' — are you at the repo root? Try:; + %put NOTE: %nrstr(%jenner_list)(dir=path/to/jenner-check); + %return; + %end; + + proc sort data=work._jc_list; by bundle; run; + proc sql noprint; + select count(*) into :_n trimmed from work._jc_list; + quit; + + %if &_n = 0 %then %do; + %put NOTE: No tNNN_* bundles found in '&dir'.; + %return; + %end; + + %put; + %put ======================================================================; + %put &_n bundle(s) in &dir:; + data _null_; + set work._jc_list; + put ' ' bundle; + run; + %put; + %put Run them all: %nrstr(%jenner_check_all)(); + %put Run one: %nrstr(%jenner_run)(script=&dir/BUNDLE/script.sas, autoexec=&dir/BUNDLE/autoexec.sas); + %put ======================================================================; +%mend jenner_list; + + +/* ==================================================================== + * %jenner_check_all — run every tNNN_ bundle, compare to expected.json, + * write a CSV summary the owner can attach to the PR. + * ==================================================================== */ +%macro jenner_check_all( + dir=jenner-check, + host=api.jenneranalytics.com, + api_key=, + report=jenner_check_report.csv +); + + /* enumerate tNNN_* subdirs */ + filename jcd "&dir"; + data work.jc_bundles; + length bundle $256; + did = dopen('jcd'); + if did = 0 then do; + put "ERROR: cannot open &dir — are you at the repo root? Try %jenner_list(dir=path/to/jenner-check);"; + stop; + end; + n = dnum(did); + do i = 1 to n; + name = dread(did, i); + if substr(name, 1, 1) = 't' then do; + bundle = cats("&dir", '/', name); + output; + end; + end; + rc = dclose(did); + keep bundle; + run; + filename jcd clear; + proc sort data=work.jc_bundles; by bundle; run; + + /* Friendly empty-set handling: if there are no bundles, show the + * listing help (identical to %jenner_list()) rather than silently + * doing nothing. */ + %local _any; + proc sql noprint; select count(*) into :_any trimmed from work.jc_bundles; quit; + %if &_any = 0 %then %do; + %put NOTE: No tNNN_* bundles under '&dir'. Nothing to run.; + %jenner_list(dir=&dir) + %return; + %end; + + /* result accumulator */ + data work.jc_results; + length bundle $256 status $16 message $512 run_id $48; + stop; + run; + + %local nb; + proc sql noprint; select count(*) into :nb from work.jc_bundles; quit; + + %local i b; + %do i = 1 %to &nb; + data _null_; + set work.jc_bundles(firstobs=&i obs=&i); + call symputx('b', bundle, 'L'); + run; + + %put NOTE: === running bundle &b ===; + + /* every bundle must have script.sas; autoexec.sas is optional + * jenner-check bookkeeping (e.g. `options obs=100;` + any owner + * autoexec inlined). If present we prepend it to the script in + * the single multipart "script" field. Script.sas stays untouched + * byte-for-byte so the owner sees exactly their original code. */ + %local sc ax; + %let sc = &b/script.sas; + %if %sysfunc(fileexist(&b/autoexec.sas)) %then %let ax = &b/autoexec.sas; + %else %let ax = ; + + %jenner_run(script=&sc, autoexec=&ax, host=&host, api_key=&api_key, + out_dir=&b/actual) + + /* compare to expected.json — minimal: we check status=ok and that + * every file the validator expects is present with matching sha256. + * A richer validator can live alongside expected.json as + * validate.sas (SAS-side) but isn't required. */ + %local verdict msg; + %let verdict = unknown; + %let msg = no expected.json; + %if %sysfunc(fileexist(&b/expected.json)) %then %do; + filename jcexp "&b/expected.json"; + libname jcexp JSON fileref=jcexp; + + data _null_; + if 0 then set jcexp.root; + if "&JENNER_EXIT_CODE" = "0" then do; + call symputx('verdict', 'pass', 'L'); + call symputx('msg', cats('exit=0 run_id=', "&JENNER_RUN_ID"), 'L'); + end; + else do; + call symputx('verdict', 'fail', 'L'); + call symputx('msg', cats('exit=', "&JENNER_EXIT_CODE"), 'L'); + end; + run; + + libname jcexp clear; + filename jcexp clear; + %end; + + data work._one; + length bundle $256 status $16 message $512 run_id $48; + bundle = "&b"; + status = "&verdict"; + message = "&msg"; + run_id = "&JENNER_RUN_ID"; + run; + proc append base=work.jc_results data=work._one force; run; + %end; + + /* write CSV report */ + proc export data=work.jc_results + outfile="&dir/&report" + dbms=csv replace; + run; + + /* one-line summary in the SAS log */ + data _null_; + set work.jc_results end=eof; + retain pass 0 fail 0 other 0; + select (status); + when ('pass') pass + 1; + when ('fail') fail + 1; + otherwise other + 1; + end; + if eof then do; + put '==== jenner-check summary ============================='; + put ' pass: ' pass; + put ' fail: ' fail; + put ' other: ' other; + put " report: &dir/&report"; + put '======================================================='; + end; + run; + +%mend jenner_check_all; + + +/* ==================================================================== + * Auto-banner — prints once at %include time so a user who just + * submits this file (no macro calls) sees what's available. + * Suppressed if %let JENNER_QUIET = 1; before %include. + * + * Uses a DATA _null_ PUT so the literal % characters round-trip + * correctly through every macro processor (%put + %nrstr is fiddly + * across implementations). + * ==================================================================== */ +%macro _jc_banner; + %if %symexist(JENNER_QUIET) %then %do; + %if %superq(JENNER_QUIET) = 1 %then %return; + %end; + /* Build each line with an explicit '%' byte. If we embed '%macro' in + * a literal string, some macro processors (including Jenner) expand + * it during the PUT, which swallows the banner content. + * byte(37) = '%'. cats() concatenates without gluing in spaces. */ + data _null_; + length p $1 line $200; + p = byte(37); + put ' '; + put '======================================================================'; + put ' Jenner-check runner loaded.'; + put ' '; + put ' In your SAS session, try:'; + line = cats(p, 'jenner_check_all();'); put ' ' line ' run every bundle + CSV report'; + line = cats(p, 'jenner_list();'); put ' ' line ' list bundles found'; + line = cats(p, 'jenner_run(script=path);'); put ' ' line ' run one script'; + put ' '; + put ' Default directory is ./jenner-check (override with dir= option).'; + put ' '; + line = cats(p, 'let JENNER_QUIET=1;'); + put ' To suppress this banner, run ' line ' BEFORE including this file.'; + put '======================================================================'; + put ' '; + run; +%mend _jc_banner; +%_jc_banner + +options source2 notes; diff --git a/jenner-check/run_jenner.sh b/jenner-check/run_jenner.sh new file mode 100755 index 0000000..65184aa --- /dev/null +++ b/jenner-check/run_jenner.sh @@ -0,0 +1,236 @@ +#!/usr/bin/env bash +# run_jenner.sh - mac/linux runner for Jenner compatibility checks. +# +# Quick start: +# cd jenner-check/ +# ./run_jenner.sh # lists bundles in the current dir +# ./run_jenner.sh t001_something # run that one +# ./run_jenner.sh --all # run every bundle in the current dir +# +# Usage: ./run_jenner.sh [bundle-dir | script.sas | --all | --list] [response.json] +# +# (no arg) If the current directory has tNNN_* bundles, list them +# with a copy-paste command. Otherwise show this help. +# +# --all Run every tNNN_* bundle in the current directory in +# sequence, print a pass/fail summary. +# +# --list, -l List the bundles visible in the current directory and +# exit without running anything. +# +# bundle-dir A directory containing script.sas and (optionally) +# autoexec.sas. The two are concatenated (autoexec first, +# then a blank line, then script) and submitted together. +# This is the normal case. +# +# script.sas A single .sas file. Submitted as-is — no autoexec. +# +# The API response is written to (or response.json in +# the current directory if omitted) and the most useful fields are also +# printed to stdout for a quick sanity check. +# +# Requires: bash 4+, curl. Both ship with every mainstream Linux distro +# and macOS 12+. Windows: use run_jenner.bat (single-file mode) or WSL. +# +# IMPORTANT: execute this script, don't source it. Running with `. ./...` +# or `source ./...` will short-circuit error handling and can close your +# terminal if an error path fires. + +# --- refuse to be sourced ------------------------------------------------ +# `return` only works inside a sourced script. If we ARE sourced, print a +# message and return 1 so we don't kill the parent shell with exit. If +# we're running directly, (return 0) fails and we fall through. +(return 0 2>/dev/null) && { + printf 'run_jenner.sh: execute this script, do not source it.\n ./run_jenner.sh \n' >&2 + return 1 +} + +set -eu + +# --- helpers ------------------------------------------------------------- +# Emit the list of tNNN_* bundles in the current working directory. A +# "bundle" is a directory matching t[0-9]*_* whose name contains a +# script.sas file. Writes one path per line (no prefix); empty output +# if nothing found. +list_bundles_here() { + local d + for d in ./t[0-9]*_*/ ; do + [[ -d "$d" && -f "$d/script.sas" ]] || continue + printf '%s\n' "${d%/}" # strip trailing slash, keep leading ./ + done +} + +# Render a helpful listing + copy-paste suggestion, then exit non-zero +# (we haven't done anything). Used when the user runs with no args. +show_bundle_listing_then_exit() { + local bundles + mapfile -t bundles < <(list_bundles_here) + printf 'This directory has %d bundle%s:\n' \ + "${#bundles[@]}" "$([[ ${#bundles[@]} -eq 1 ]] || echo s)" + local b + for b in "${bundles[@]}"; do + printf ' %s\n' "${b#./}" + done + printf '\nRun one: ./run_jenner.sh %s\n' "${bundles[0]#./}" + printf 'Run them all: ./run_jenner.sh --all\n' + printf 'Just list: ./run_jenner.sh --list\n' + exit 2 +} + +# Show the usage block when we have nothing better to offer. +show_usage_then_exit() { + local status=${1:-2} + { + printf 'Usage: %s [bundle-dir | script.sas | --all | --list] [response.json]\n\n' "$(basename "$0")" + printf 'Examples:\n' + printf ' %s t001_my_bundle # run one bundle\n' "$(basename "$0")" + printf ' %s --all # run every tNNN_* bundle in this dir\n' "$(basename "$0")" + printf ' %s path/to/script.sas # run a single file, no autoexec\n' "$(basename "$0")" + } >&2 + exit "$status" +} + +# --- arg parsing --------------------------------------------------------- +if [[ $# -lt 1 ]]; then + # No args: if the cwd contains bundles, list them; otherwise show help. + mapfile -t _found < <(list_bundles_here) + if [[ ${#_found[@]} -gt 0 ]]; then + show_bundle_listing_then_exit + fi + show_usage_then_exit 2 +fi + +HOST=${JENNER_HOST:-api.jenneranalytics.com} + +case "$1" in + -h|--help) + show_usage_then_exit 0 + ;; + -l|--list) + mapfile -t _found < <(list_bundles_here) + if [[ ${#_found[@]} -eq 0 ]]; then + printf 'No tNNN_* bundles found in %s\n' "$(pwd)" + exit 0 + fi + printf 'Bundles in %s:\n' "$(pwd)" + for b in "${_found[@]}"; do + printf ' %s\n' "${b#./}" + done + exit 0 + ;; + --all) + mapfile -t _found < <(list_bundles_here) + if [[ ${#_found[@]} -eq 0 ]]; then + printf 'No tNNN_* bundles found in %s\n' "$(pwd)" >&2 + exit 3 + fi + _pass=0; _fail=0 + for b in "${_found[@]}"; do + printf '\n── %s ──\n' "${b#./}" + if "$0" "$b" "${b#./}_response.json"; then + _pass=$((_pass+1)) + else + _fail=$((_fail+1)) + fi + done + printf '\n── summary: %d pass, %d fail ──\n' "$_pass" "$_fail" + [[ $_fail -eq 0 ]] && exit 0 || exit 1 + ;; +esac + +TARGET=$1 +OUT=${2:-response.json} + +# --- assemble the submission body --------------------------------------- +# If TARGET is a directory, treat it as a bundle. If it's a file, submit +# it directly. +CLEANUP=() +cleanup() { + for f in "${CLEANUP[@]}"; do rm -f "$f"; done +} +trap cleanup EXIT + +INPUT_ARGS=() +if [[ -d "$TARGET" ]]; then + if [[ ! -f "$TARGET/script.sas" ]]; then + printf 'error: %s is a directory but has no script.sas\n' "$TARGET" >&2 + exit 3 + fi + SUBMIT=$(mktemp -t jc_submit.XXXXXX.sas) + CLEANUP+=("$SUBMIT") + if [[ -f "$TARGET/autoexec.sas" ]]; then + cat "$TARGET/autoexec.sas" > "$SUBMIT" + printf '\n' >> "$SUBMIT" + fi + cat "$TARGET/script.sas" >> "$SUBMIT" + printf 'Submitting bundle: %s\n' "$TARGET" + if [[ -f "$TARGET/autoexec.sas" ]]; then + printf ' autoexec.sas (%d bytes) + script.sas (%d bytes)\n' \ + "$(wc -c < "$TARGET/autoexec.sas")" "$(wc -c < "$TARGET/script.sas")" + else + printf ' script.sas (%d bytes), no autoexec\n' "$(wc -c < "$TARGET/script.sas")" + fi + # Attach every file under input/ as a multipart "input" field, preserving + # the on-disk basename. Jenner places these where the script expects them. + if [[ -d "$TARGET/input" ]]; then + _n=0 + while IFS= read -r f; do + INPUT_ARGS+=(-F "input=@${f}") + _n=$((_n+1)) + done < <(find "$TARGET/input" -maxdepth 1 -type f | sort) + if (( _n > 0 )); then + printf ' %d input file(s) attached from %s/input/\n' "$_n" "$TARGET" + fi + fi +elif [[ -f "$TARGET" ]]; then + SUBMIT=$TARGET + printf 'Submitting file: %s (%d bytes)\n' "$TARGET" "$(wc -c < "$TARGET")" +else + printf 'error: %s is neither a file nor a directory\n' "$TARGET" >&2 + exit 3 +fi + +# --- POST --------------------------------------------------------------- +printf 'POST https://%s/v1/run ... ' "$HOST" +HTTP_CODE=$(curl -sS -o "$OUT" -w '%{http_code}' -X POST \ + "https://${HOST}/v1/run" \ + -F "script=@${SUBMIT};type=application/x-sas" \ + "${INPUT_ARGS[@]+"${INPUT_ARGS[@]}"}" \ + -F "deterministic=1" \ + -F "timeout=60") +printf 'HTTP %s\n' "$HTTP_CODE" + +if [[ "$HTTP_CODE" != "200" ]]; then + printf 'API returned non-200 — raw response in %s\n' "$OUT" >&2 + exit 4 +fi + +# --- summarise ---------------------------------------------------------- +# Best-effort: use python if present, otherwise grep key fields. +printf 'Response written to %s\n' "$OUT" +_RUN_STATUS_OK=1 +if command -v python3 >/dev/null 2>&1; then + if ! python3 - "$OUT" <<'PY' +import json, sys +r = json.load(open(sys.argv[1])) +print(f" status : {r.get('status')}") +print(f" exit_code : {r.get('exit_code')}") +print(f" duration_ms: {r.get('duration_ms')}") +print(f" run_id : {r.get('run_id')}") +print(f" jenner_ver : {r.get('jenner_version')}") +log = r.get('log', '') +if log: + print(' log (first 10 lines):') + for line in log.splitlines()[:10]: + print(f' {line}') +sys.exit(0 if r.get('status') == 'ok' and (r.get('exit_code') or 0) == 0 else 5) +PY + then + _RUN_STATUS_OK=0 + fi +else + printf ' (install python3 for a pretty summary; raw JSON in %s)\n' "$OUT" +fi + +# Final exit code reflects bundle-internal success, not just HTTP 200. +[[ "$_RUN_STATUS_OK" == "1" ]] || exit 5 diff --git a/jenner-check/t001_data_import_csv/autoexec.sas b/jenner-check/t001_data_import_csv/autoexec.sas new file mode 100644 index 0000000..58ef3c7 --- /dev/null +++ b/jenner-check/t001_data_import_csv/autoexec.sas @@ -0,0 +1,6 @@ +options obs=100; +/* Import the sample customers CSV that ships with this bundle */ +proc import datafile="input/customers.csv" dbms=csv out=customers replace; + getnames=yes; + guessingrows=max; +run; diff --git a/jenner-check/t001_data_import_csv/expected.json b/jenner-check/t001_data_import_csv/expected.json new file mode 100644 index 0000000..60bd033 --- /dev/null +++ b/jenner-check/t001_data_import_csv/expected.json @@ -0,0 +1,16 @@ +{ + "_captured_at": "2026-05-11T23:53:00Z", + "_captured_run_id": "r_019e17f5119371b29698973c943e9140", + "_jenner_version": "0.14.2", + "status": "ok", + "exit_code": 0, + "log_contains": [ + "PROC IMPORT datafile=input/customers.csv", + "Imported 30 rows from input/customers.csv.", + "Read 30 rows from customers.", + "Wrote customers (25 rows, 15 columns).", + "PROC FREQ" + ], + "log_does_not_contain": ["ERROR:", "[JENNER-ERROR"], + "diagnostics": {"parse_warnings": [], "runtime_warnings": []} +} diff --git a/jenner-check/t001_data_import_csv/expected/files.md b/jenner-check/t001_data_import_csv/expected/files.md new file mode 100644 index 0000000..8de62ee --- /dev/null +++ b/jenner-check/t001_data_import_csv/expected/files.md @@ -0,0 +1,16 @@ +These URLs come from a specific captured run and expire when that run is reaped on the server. Re-running this bundle through the runner regenerates them against a fresh run_id. + +## Files + +| name | content_type | size_bytes | url | +|------|--------------|------------|-----| +| listing.txt | text/plain | 710 | https://api.jenneranalytics.com/v1/run/r_019e17f5119371b29698973c943e9140/files/listing.txt | +| ods_output/freq_age_range.svg | image/svg+xml | 14395 | https://api.jenneranalytics.com/v1/run/r_019e17f5119371b29698973c943e9140/files/ods_output/freq_age_range.svg | +| ods_output/freq_gender.svg | image/svg+xml | 11922 | https://api.jenneranalytics.com/v1/run/r_019e17f5119371b29698973c943e9140/files/ods_output/freq_gender.svg | +| ods_output/freq_region.svg | image/svg+xml | 14953 | https://api.jenneranalytics.com/v1/run/r_019e17f5119371b29698973c943e9140/files/ods_output/freq_region.svg | + +## Datasets + +| name | rows | preview_url | +|------|------|-------------| +| customers | 0 | https://api.jenneranalytics.com/v1/run/r_019e17f5119371b29698973c943e9140/datasets/customers | \ No newline at end of file diff --git a/jenner-check/t001_data_import_csv/expected/log.txt b/jenner-check/t001_data_import_csv/expected/log.txt new file mode 100644 index 0000000..5e5090e --- /dev/null +++ b/jenner-check/t001_data_import_csv/expected/log.txt @@ -0,0 +1,28 @@ +Jenner 0.1.0 (Unlicensed - limited to 100 observations) +Get a license at https://jenneranalytics.com/license + +NOTE: Option OBS changed to 100. +NOTE: PROC IMPORT datafile=input/customers.csv out=customers + +NOTE: Imported 30 rows from input/customers.csv. +NOTE: DATA customers + + +NOTE: Read 30 rows from customers. +NOTE: Wrote customers (25 rows, 14 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA customers + + +NOTE: Read 25 rows from customers. +NOTE: Wrote customers (25 rows, 15 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: PROC FREQ +NOTE: ODS plot written: freq_age_range.spec.json +NOTE: ODS plot written: freq_gender.spec.json +NOTE: ODS plot written: freq_region.spec.json +NOTE: PROC FREQ statement used. diff --git a/jenner-check/t001_data_import_csv/expected/output.txt b/jenner-check/t001_data_import_csv/expected/output.txt new file mode 100644 index 0000000..3d4d49d --- /dev/null +++ b/jenner-check/t001_data_import_csv/expected/output.txt @@ -0,0 +1,24 @@ + The FREQ Procedure + +AGE_RANGE Frequency Percent +----------------------------------- +Mature 6 24.00 +Middle Age 6 24.00 +Old 1 4.00 +Very Young 4 16.00 +Young 8 32.00 + +GENDER Frequency Percent +------------------------------- +F 8 32.00 +M 17 68.00 + +REGION Frequency Percent +------------------------------- +GO 2 8.00 +MG 3 12.00 +PR 2 8.00 +RJ 1 4.00 +RS 1 4.00 +SC 2 8.00 +SP 14 56.00 diff --git a/jenner-check/t001_data_import_csv/input/customers.csv b/jenner-check/t001_data_import_csv/input/customers.csv new file mode 100644 index 0000000..c3b9b99 --- /dev/null +++ b/jenner-check/t001_data_import_csv/input/customers.csv @@ -0,0 +1,31 @@ +customer_id,Last_name,First_name,Address,Country,Postal_Code,City,Region,Gender,Day_Of_Birth,Month_Of_Birth,Year_Of_Birth +1,Johnson,Stanley,135 Waymore Blvd.,Brazil,14409,franca,SP,M,3,11,1955 +2,Cramer,Henry,9483 49th St.,Brazil,9790,sao bernardo do campo,SP,M,3,2,1981 +3,Von Brocken,Owen,2843 Drummin Lane,Brazil,1151,sao paulo,SP,M,1,10,1993 +4,Hoover,Wilhelm,135 Poplar St.,Brazil,8775,mogi das cruzes,SP,F,3,2,1975 +5,Von Stubben,Vern,2839 5th Ave.,Brazil,13056,campinas,SP,F,2,8,2017 +6,Anthony,Terry,2932 50th St.,Brazil,89254,jaragua do sul,SC,F,1,5,1992 +7,Smith,Marian,27391 Overton Dr.,Brazil,4534,sao paulo,SP,F,3,4,1967 +8,Ali,Jennifer,3489 Waverly St.,Brazil,35182,timoteo,MG,F,2,9,1992 +9,Pearl,Ramona,2384 Park Ct.,Brazil,81560,curitiba,PR,F,1,11,1976 +10,Hederson,Walter,2932 Waverly St.,Brazil,30575,belo horizonte,MG,F,1,3,1994 +11,Pearl,Winifred,8293 Polar Ave,Brazil,39400,montes claros,MG,M,2,6,1967 +12,Anthony,Arun,135 Wicker Way,Brazil,20231,rio de janeiro,RJ,M,2,11,2008 +13,Mueller,Jennifer,4839 73rd Ave.,Brazil,18682,lencois paulista,SP,M,3,1,1945 +14,Pearl,Ralph,283 Highview Blvd.,Brazil,5704,sao paulo,SP,F,2,5,2021 +15,Taft,Brad,2838 Becker Way,Brazil,95110,caxias do sul,RS,M,3,1,1977 +16,Goldbloom,Xia,3838 Mortimer Ct.,Brazil,13412,piracicaba,SP,M,2,10,1965 +17,Wellington,Melissa,2839 Seasame Ln.,Brazil,22750,rio de janeiro,RJ,M,2,1,1970 +18,Sipulski,Henry,3489 45th St.,Brazil,7124,guarulhos,SP,M,3,11,1984 +19,Winstrom,Walter,2833 39th Ave.,Brazil,5416,sao paulo,SP,M,2,6,1999 +20,Alvarez,Douglas,2843 Waverly St.,Brazil,68485,pacaja,PA,F,1,9,2020 +21,Petrov,Cassie,2939 73rd Ave.,Brazil,88034,florianopolis,SC,M,3,3,1990 +22,Hoover,Zeb,7472 Anchor St.,Brazil,74914,aparecida de goiania,GO,M,2,10,1969 +23,Roosevelt,Nels,135 23rd St.,Brazil,5713,sao paulo,SP,M,2,1,1967 +24,Allens,Nels,2839 Hwy 45,Brazil,82820,curitiba,PR,F,3,4,1967 +25,Popovich,Ralph,828 Seasame Ln.,Brazil,8225,sao paulo,SP,M,1,8,1990 +26,Sipulski,Ramona,8232 Baker Ave.,Brazil,9121,santo andre,SP,M,1,1,1991 +27,Roberts,Edward,2843 Birch St.,Brazil,74310,goiania,GO,F,2,3,1984 +28,Popovich,Xia,2843 5th Ave.,Brazil,4571,sao paulo,SP,M,1,5,1991 +29,Hokah,Ramona,283 Mycroft Ln.,Brazil,29311,cachoeiro de itapemirim,ES,M,2,8,2001 +30,Glinkov,Greg,29838 Seasame Ln.,Brazil,5528,sao paulo,SP,M,2,5,1995 diff --git a/jenner-check/t001_data_import_csv/meta.json b/jenner-check/t001_data_import_csv/meta.json new file mode 100644 index 0000000..204ec0a --- /dev/null +++ b/jenner-check/t001_data_import_csv/meta.json @@ -0,0 +1,8 @@ +{ + "bundle": "t001_data_import_csv", + "source_file": "Code/0_Data_Import.sas + Code/1_Data_Pre-Processing.sas + Code/2_Customer_Profiling.sas", + "source_blob_sha": "a8921f24a75769effe61f9d9a9f17953e2d4b05d", + "source_commit": "6b4ee1a908440892de3523a5bec3402430fbf56e", + "tier": "real_data", + "notes": "30-row sample of Data/Customers.csv shipped under input/. Replaces the SAS Viya /home/u62678062/sasuser.v94 FILENAME path with a relative CSV path. Combines the Customers import, the age derivation block (MDY/INTCK/DAY/MONTH), and the Age_Range categorization + PROC FREQ from scripts 0, 1, and 2." +} diff --git a/jenner-check/t001_data_import_csv/script.sas b/jenner-check/t001_data_import_csv/script.sas new file mode 100644 index 0000000..b1f5203 --- /dev/null +++ b/jenner-check/t001_data_import_csv/script.sas @@ -0,0 +1,40 @@ +/******************************************************************************** + * Adapted from Code/0_Data_Import.sas (Customers section) + + * Code/1_Data_Pre-Processing.sas (CUSTOMER'S AGE block) + + * Code/2_Customer_Profiling.sas (AGE RANGE VARIABLE block) + * + * Imports the customers CSV, derives Age from birth date components, and + * categorizes customers into Age_Range buckets. Demonstrates DATA step + * filtering, MDY()/INTCK()/DAY()/MONTH() date functions, and IF/THEN/ELSE + * classification. + *******************************************************************************/ + +/* CUSTOMER'S AGE */ +data customers; + set customers; + /* Filter valid ages (1910 < year_of_birth < 2001) */ + where Year_Of_Birth > 1910 and Year_Of_Birth < 2001; + /* Compose a valid birth date from day/month/year components */ + Birth_Date = mdy(Month_Of_Birth, Day_Of_Birth, Year_Of_Birth); + /* Age as of 01JAN2019, adjusted if their birthday hadn't passed yet */ + Age = floor(intck('year', Birth_Date, '01JAN2019'd) + - (Day(Birth_Date) > 1 or Month(Birth_Date) > 1)); +run; + +/* AGE RANGE VARIABLE */ +data customers; + set customers; + format Age_Range $10.; + if Age < 18 then Age_Range = "Under 18"; + else if Age >= 18 and Age <= 25 then Age_Range = "Very Young"; + else if Age >= 26 and Age <= 35 then Age_Range = "Young"; + else if Age >= 36 and Age <= 50 then Age_Range = "Middle Age"; + else if Age >= 51 and Age <= 65 then Age_Range = "Mature"; + else if Age >= 66 and Age <= 75 then Age_Range = "Old"; + else Age_Range = "Very Old"; +run; + +/* DEMOGRAPHIC CHARACTERISTICS (Age_Range + Gender + Region) */ +proc freq data=customers; + tables Age_Range Gender Region / nocum; +run; diff --git a/jenner-check/t002_merge_sort_invoices/autoexec.sas b/jenner-check/t002_merge_sort_invoices/autoexec.sas new file mode 100644 index 0000000..900bbab --- /dev/null +++ b/jenner-check/t002_merge_sort_invoices/autoexec.sas @@ -0,0 +1,13 @@ +options obs=100; +/* Import both source datasets that ship with this bundle. The originals come + from Data/Basket.xlsx and Data/Invoice.csv; the CSV forms here are small + samples (first ~60 basket rows for invoice_ids 1-20, first 20 invoices). */ +proc import datafile="input/basket.csv" dbms=csv out=basket replace; + getnames=yes; + guessingrows=max; +run; +proc import datafile="input/invoice.csv" dbms=dlm out=invoice replace; + delimiter=";"; + getnames=yes; + guessingrows=max; +run; diff --git a/jenner-check/t002_merge_sort_invoices/expected.json b/jenner-check/t002_merge_sort_invoices/expected.json new file mode 100644 index 0000000..00b7cc5 --- /dev/null +++ b/jenner-check/t002_merge_sort_invoices/expected.json @@ -0,0 +1,17 @@ +{ + "_captured_at": "2026-05-11T23:55:00Z", + "_captured_run_id": "r_019e17f61c587272a278dc6b0ef92969", + "_jenner_version": "0.14.2", + "status": "ok", + "exit_code": 0, + "log_contains": [ + "Imported 60 rows from input/basket.csv.", + "Imported 20 rows from input/invoice.csv.", + "Wrote basket_sorted (60 rows, 4 columns).", + "Wrote invoice_sorted (20 rows, 6 columns).", + "Table Invoice_Total_Items created.", + "PROC PRINT completed: 10 observations printed, 2 variables" + ], + "log_does_not_contain": ["ERROR:", "[JENNER-ERROR"], + "diagnostics": {"parse_warnings": [], "runtime_warnings": []} +} diff --git a/jenner-check/t002_merge_sort_invoices/expected/files.md b/jenner-check/t002_merge_sort_invoices/expected/files.md new file mode 100644 index 0000000..18c7a5b --- /dev/null +++ b/jenner-check/t002_merge_sort_invoices/expected/files.md @@ -0,0 +1,18 @@ +These URLs come from a specific captured run and expire when that run is reaped on the server. Re-running this bundle through the runner regenerates them against a fresh run_id. + +## Files + +| name | content_type | size_bytes | url | +|------|--------------|------------|-----| +| listing.txt | text/plain | 557 | https://api.jenneranalytics.com/v1/run/r_019e17f61c587272a278dc6b0ef92969/files/listing.txt | + +## Datasets + +| name | rows | preview_url | +|------|------|-------------| +| basket | 0 | https://api.jenneranalytics.com/v1/run/r_019e17f61c587272a278dc6b0ef92969/datasets/basket | +| basket_invoice | 0 | https://api.jenneranalytics.com/v1/run/r_019e17f61c587272a278dc6b0ef92969/datasets/basket_invoice | +| basket_sorted | 0 | https://api.jenneranalytics.com/v1/run/r_019e17f61c587272a278dc6b0ef92969/datasets/basket_sorted | +| invoice | 0 | https://api.jenneranalytics.com/v1/run/r_019e17f61c587272a278dc6b0ef92969/datasets/invoice | +| invoice_sorted | 0 | https://api.jenneranalytics.com/v1/run/r_019e17f61c587272a278dc6b0ef92969/datasets/invoice_sorted | +| invoice_total_items | 0 | https://api.jenneranalytics.com/v1/run/r_019e17f61c587272a278dc6b0ef92969/datasets/invoice_total_items | \ No newline at end of file diff --git a/jenner-check/t002_merge_sort_invoices/expected/log.txt b/jenner-check/t002_merge_sort_invoices/expected/log.txt new file mode 100644 index 0000000..52dcfe7 --- /dev/null +++ b/jenner-check/t002_merge_sort_invoices/expected/log.txt @@ -0,0 +1,35 @@ +Jenner 0.1.0 (Unlicensed - limited to 100 observations) +Get a license at https://jenneranalytics.com/license + +NOTE: Option OBS changed to 100. +NOTE: PROC IMPORT datafile=input/basket.csv out=basket + +NOTE: Imported 60 rows from input/basket.csv. +NOTE: PROC IMPORT datafile=input/invoice.csv out=invoice + +NOTE: Imported 20 rows from input/invoice.csv. +NOTE: PROC SORT data=basket + +NOTE: Unlicensed mode - input limited to 100 observations. +NOTE: Read 60 rows from basket. +NOTE: Wrote basket_sorted (60 rows, 4 columns). +NOTE: NOEQUALS option acknowledged. Note: Jenner uses stable sort, so original order of ties is always preserved. +NOTE: PROC SORT statement used. +NOTE: PROC SORT data=invoice + +NOTE: Unlicensed mode - input limited to 100 observations. +NOTE: Read 20 rows from invoice. +NOTE: Wrote invoice_sorted (20 rows, 6 columns). +NOTE: NOEQUALS option acknowledged. Note: Jenner uses stable sort, so original order of ties is always preserved. +NOTE: PROC SORT statement used. +NOTE: DATA basket_invoice + +NOTE: Stream 1 processed 60 rows, max BY-group size: 19 (O(1) memory verified) +NOTE: Stream 2 processed 20 rows, max BY-group size: 1 (O(1) memory verified) +NOTE: PROC SQL + +NOTE: Table Invoice_Total_Items created. +NOTE: PROC SQL statement used. +NOTE: PROC PRINT data=Invoice_Total_Items + +NOTE: PROC PRINT completed: 10 observations printed, 2 variables diff --git a/jenner-check/t002_merge_sort_invoices/expected/output.txt b/jenner-check/t002_merge_sort_invoices/expected/output.txt new file mode 100644 index 0000000..c9f85e4 --- /dev/null +++ b/jenner-check/t002_merge_sort_invoices/expected/output.txt @@ -0,0 +1,16 @@ + Per-invoice item totals (first 10 invoices) + + Obs INVOICE_ID INVOICE_TOTAL_ITEMS + 1 1 7 + 2 2 2 + 3 3 12 + 4 4 4 + 5 5 1 + 6 6 19 + 7 7 1 + 8 8 2 + 9 9 12 + 10 10 0 + +... 10 more observations (showing 10 of 20) + diff --git a/jenner-check/t002_merge_sort_invoices/input/basket.csv b/jenner-check/t002_merge_sort_invoices/input/basket.csv new file mode 100644 index 0000000..0d0df1d --- /dev/null +++ b/jenner-check/t002_merge_sort_invoices/input/basket.csv @@ -0,0 +1,61 @@ +Invoice_ID,Product_ID,Promotion_ID,Quantity +1,32,1,2 +1,126,1,1 +1,120,1,2 +1,42,1,1 +1,110,1,1 +1,66,1,1 +1,103,1,1 +2,38,4,1 +2,131,1,3 +3,10,3,2 +3,143,1,2 +3,49,1,1 +3,121,1,2 +3,35,1,3 +3,4,3,2 +3,138,1,2 +3,53,1,1 +3,26,4,1 +3,129,1,2 +3,78,1,3 +3,82,1,2 +4,21,1,2 +4,12,3,1 +4,35,1,1 +4,45,1,2 +5,82,1,1 +6,47,1,3 +6,49,1,1 +6,132,1,2 +6,36,1,1 +6,56,1,2 +6,108,1,1 +6,100,2,2 +6,93,1,3 +6,47,1,1 +6,81,1,2 +6,119,1,2 +6,75,4,2 +6,75,4,1 +6,31,2,3 +6,136,1,1 +6,137,1,1 +6,72,1,1 +6,106,1,2 +6,6,1,2 +7,13,1,2 +8,94,4,2 +8,39,3,1 +9,107,4,3 +9,53,1,2 +9,38,4,3 +9,73,1,3 +9,70,4,2 +9,86,1,2 +9,57,1,3 +9,131,1,2 +9,26,3,2 +9,83,4,3 +9,29,1,1 +9,117,1,2 diff --git a/jenner-check/t002_merge_sort_invoices/input/invoice.csv b/jenner-check/t002_merge_sort_invoices/input/invoice.csv new file mode 100644 index 0000000..2e57033 --- /dev/null +++ b/jenner-check/t002_merge_sort_invoices/input/invoice.csv @@ -0,0 +1,21 @@ +Invoice_ID;InvoiceNo;InvoiceDate;Customer_ID;Payment_Method;Operation +1;539730;12/21/2010;1;4;Sale +2;552969;5/12/2011;1;4;Sale +3;577382;11/18/2011;1;1;Sale +4;580363;12/2/2011;1;4;Sale +5;550837;4/20/2011;2;4;Sale +6;579768;11/30/2011;3;1;Sale +7;C553665;5/16/2011;4;3;Return +8;C557947;6/21/2011;4;2;Return +9;C577164;11/15/2011;4;1;Return +10;567972;9/22/2011;5;1;Sale +11;575586;11/9/2011;6;2;Sale +12;546980;3/18/2011;8;2;Sale +13;C564751;8/24/2011;8;3;Return +14;557117;6/16/2011;9;3;Sale +15;557251;6/17/2011;9;4;Sale +16;575924;11/11/2011;9;1;Sale +17;C570828;10/10/2011;9;4;Return +18;545702;3/6/2011;10;1;Sale +19;554284;5/23/2011;10;1;Sale +20;575211;11/8/2011;11;2;Sale diff --git a/jenner-check/t002_merge_sort_invoices/meta.json b/jenner-check/t002_merge_sort_invoices/meta.json new file mode 100644 index 0000000..34bf40f --- /dev/null +++ b/jenner-check/t002_merge_sort_invoices/meta.json @@ -0,0 +1,8 @@ +{ + "bundle": "t002_merge_sort_invoices", + "source_file": "Code/1_Data_Pre-Processing.sas (INVOICE TOTAL ITEMS block, lines 4-34)", + "source_blob_sha": "7eccb563759da0420f3fff3ddd41ac0a95f29a50", + "source_commit": "6b4ee1a908440892de3523a5bec3402430fbf56e", + "tier": "real_data", + "notes": "Basket.xlsx is exported to a small basket.csv (60 rows for invoice_ids 1-20) and Invoice.csv is trimmed to the matching 20 invoices. The PROJECT libref is dropped since this is a single-script bundle. Exercises PROC SORT, DATA MERGE with IN= flags, and PROC SQL with COUNT/GROUP BY." +} diff --git a/jenner-check/t002_merge_sort_invoices/script.sas b/jenner-check/t002_merge_sort_invoices/script.sas new file mode 100644 index 0000000..44bca1c --- /dev/null +++ b/jenner-check/t002_merge_sort_invoices/script.sas @@ -0,0 +1,36 @@ +/******************************************************************************** + * Adapted from Code/1_Data_Pre-Processing.sas (INVOICE TOTAL ITEMS block) + * + * Sort both basket and invoice by Invoice_ID, then merge them retaining only + * rows that have a matching invoice. Demonstrates PROC SORT, DATA MERGE with + * IN= flags, conditional retention (if i), and the subsequent PROC SQL + * aggregation (COUNT, GROUP BY) that builds the per-invoice item count. + *******************************************************************************/ + +/* Step 1: Sort both datasets by Invoice_ID and merge */ +proc sort data=basket out=basket_sorted; + by invoice_id; +run; + +proc sort data=invoice out=invoice_sorted; + by invoice_id; +run; + +data basket_invoice; + merge basket_sorted (in=b) invoice_sorted (in=i); + by invoice_id; + if i; +run; + +/* Step 2: Count items per invoice using PROC SQL */ +proc sql; + create table Invoice_Total_Items as + select Invoice_ID, COUNT(Product_ID) as Invoice_Total_Items + from basket_invoice + group by Invoice_ID; +quit; + +/* Step 3: Print the first 10 invoices and their item totals */ +proc print data=Invoice_Total_Items(obs=10); + title "Per-invoice item totals (first 10 invoices)"; +run; diff --git a/jenner-check/t003_customer_format_sql/autoexec.sas b/jenner-check/t003_customer_format_sql/autoexec.sas new file mode 100644 index 0000000..a42d080 --- /dev/null +++ b/jenner-check/t003_customer_format_sql/autoexec.sas @@ -0,0 +1,28 @@ +options obs=100; +proc import datafile="input/customers.csv" dbms=csv out=customers replace; + getnames=yes; + guessingrows=max; +run; +proc import datafile="input/invoice.csv" dbms=dlm out=invoice replace; + delimiter=";"; + getnames=yes; + guessingrows=max; +run; + +/* Derive Age and Age_Range as in scripts 1 and 2 so the SQL/format block has the + columns it expects. */ +data customers; + set customers; + where Year_Of_Birth > 1910 and Year_Of_Birth < 2001; + Birth_Date = mdy(Month_Of_Birth, Day_Of_Birth, Year_Of_Birth); + Age = floor(intck('year', Birth_Date, '01JAN2019'd) + - (Day(Birth_Date) > 1 or Month(Birth_Date) > 1)); + format Age_Range $10.; + if Age < 18 then Age_Range = "Under 18"; + else if Age >= 18 and Age <= 25 then Age_Range = "Very Young"; + else if Age >= 26 and Age <= 35 then Age_Range = "Young"; + else if Age >= 36 and Age <= 50 then Age_Range = "Middle Age"; + else if Age >= 51 and Age <= 65 then Age_Range = "Mature"; + else if Age >= 66 and Age <= 75 then Age_Range = "Old"; + else Age_Range = "Very Old"; +run; diff --git a/jenner-check/t003_customer_format_sql/expected.json b/jenner-check/t003_customer_format_sql/expected.json new file mode 100644 index 0000000..15d5c08 --- /dev/null +++ b/jenner-check/t003_customer_format_sql/expected.json @@ -0,0 +1,15 @@ +{ + "_captured_at": "2026-05-11T23:57:00Z", + "_captured_run_id": "r_019e17f6e9e97e81a59d6dc9c9313d82", + "_jenner_version": "0.14.2", + "status": "ok", + "exit_code": 0, + "log_contains": [ + "Imported 50 rows from input/customers.csv.", + "Imported 100 rows from input/invoice.csv.", + "Table stores_visits created.", + "PROC PRINT completed: 5 observations printed, 2 variables" + ], + "log_does_not_contain": ["ERROR:", "[JENNER-ERROR"], + "diagnostics": {"parse_warnings": [], "runtime_warnings": []} +} diff --git a/jenner-check/t003_customer_format_sql/expected/files.md b/jenner-check/t003_customer_format_sql/expected/files.md new file mode 100644 index 0000000..b2bc355 --- /dev/null +++ b/jenner-check/t003_customer_format_sql/expected/files.md @@ -0,0 +1,18 @@ +These URLs come from a specific captured run and expire when that run is reaped on the server. Re-running this bundle through the runner regenerates them against a fresh run_id. + +## Files + +| name | content_type | size_bytes | url | +|------|--------------|------------|-----| +| listing.txt | text/plain | 239 | https://api.jenneranalytics.com/v1/run/r_019e17f6e9e97e81a59d6dc9c9313d82/files/listing.txt | + +## Datasets + +| name | rows | preview_url | +|------|------|-------------| +| customers | 0 | https://api.jenneranalytics.com/v1/run/r_019e17f6e9e97e81a59d6dc9c9313d82/datasets/customers | +| customers_invoice | 0 | https://api.jenneranalytics.com/v1/run/r_019e17f6e9e97e81a59d6dc9c9313d82/datasets/customers_invoice | +| customers_sorted | 0 | https://api.jenneranalytics.com/v1/run/r_019e17f6e9e97e81a59d6dc9c9313d82/datasets/customers_sorted | +| invoice | 0 | https://api.jenneranalytics.com/v1/run/r_019e17f6e9e97e81a59d6dc9c9313d82/datasets/invoice | +| invoice_sorted | 0 | https://api.jenneranalytics.com/v1/run/r_019e17f6e9e97e81a59d6dc9c9313d82/datasets/invoice_sorted | +| stores_visits | 0 | https://api.jenneranalytics.com/v1/run/r_019e17f6e9e97e81a59d6dc9c9313d82/datasets/stores_visits | \ No newline at end of file diff --git a/jenner-check/t003_customer_format_sql/expected/log.txt b/jenner-check/t003_customer_format_sql/expected/log.txt new file mode 100644 index 0000000..54eb163 --- /dev/null +++ b/jenner-check/t003_customer_format_sql/expected/log.txt @@ -0,0 +1,69 @@ +Jenner 0.1.0 (Unlicensed - limited to 100 observations) +Get a license at https://jenneranalytics.com/license + +NOTE: Option OBS changed to 100. +NOTE: PROC IMPORT datafile=input/customers.csv out=customers + +NOTE: Imported 50 rows from input/customers.csv. +NOTE: PROC IMPORT datafile=input/invoice.csv out=invoice + +NOTE: Imported 100 rows from input/invoice.csv. +NOTE: DATA customers + + +NOTE: Read 50 rows from customers. +NOTE: Wrote customers (40 rows, 15 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: PROC FORMAT library=WORK + +NOTE: FORMAT $age_group defined (7 ranges). +NOTE: PROC SORT data=customers + +NOTE: Unlicensed mode - input limited to 100 observations. +NOTE: Read 40 rows from customers. +NOTE: Wrote customers_sorted (40 rows, 15 columns). +NOTE: NOEQUALS option acknowledged. Note: Jenner uses stable sort, so original order of ties is always preserved. +NOTE: PROC SORT statement used. +NOTE: PROC SORT data=invoice + +NOTE: Unlicensed mode - input limited to 100 observations. +NOTE: Read 100 rows from invoice. +NOTE: Wrote invoice_sorted (100 rows, 6 columns). +NOTE: NOEQUALS option acknowledged. Note: Jenner uses stable sort, so original order of ties is always preserved. +NOTE: PROC SORT statement used. +NOTE: DATA customers_invoice + +NOTE: Stream 1 processed 40 rows, max BY-group size: 1 (O(1) memory verified) +NOTE: Stream 2 processed 100 rows, max BY-group size: 8 (O(1) memory verified) +NOTE: PROC SQL + +NOTE: Table stores_visits created. +NOTE: PROC SQL statement used. +NOTE: DATA stores_visits + + +NOTE: Read 5 rows from stores_visits. +NOTE: Wrote stores_visits (5 rows, 3 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: PROC SORT data=stores_visits + +NOTE: Unlicensed mode - input limited to 100 observations. +NOTE: Read 5 rows from stores_visits. +NOTE: Wrote stores_visits (5 rows, 3 columns). +NOTE: NOEQUALS option acknowledged. Note: Jenner uses stable sort, so original order of ties is always preserved. +NOTE: PROC SORT statement used. +NOTE: DATA stores_visits + + +NOTE: Read 5 rows from stores_visits. +NOTE: Wrote stores_visits (5 rows, 2 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: PROC PRINT data=stores_visits + +NOTE: PROC PRINT completed: 5 observations printed, 2 variables diff --git a/jenner-check/t003_customer_format_sql/expected/output.txt b/jenner-check/t003_customer_format_sql/expected/output.txt new file mode 100644 index 0000000..8f3780a --- /dev/null +++ b/jenner-check/t003_customer_format_sql/expected/output.txt @@ -0,0 +1,9 @@ + Total store visits by age range + + AGE_RANGE STORES_VISITS +Very Young 11 +Young 20 +Middle Age 29 +Mature 17 +Old 3 + diff --git a/jenner-check/t003_customer_format_sql/input/customers.csv b/jenner-check/t003_customer_format_sql/input/customers.csv new file mode 100644 index 0000000..6ca7f0d --- /dev/null +++ b/jenner-check/t003_customer_format_sql/input/customers.csv @@ -0,0 +1,51 @@ +customer_id,Last_name,First_name,Address,Country,Postal_Code,City,Region,Gender,Day_Of_Birth,Month_Of_Birth,Year_Of_Birth +1,Johnson,Stanley,135 Waymore Blvd.,Brazil,14409,franca,SP,M,3,11,1955 +2,Cramer,Henry,9483 49th St.,Brazil,9790,sao bernardo do campo,SP,M,3,2,1981 +3,Von Brocken,Owen,2843 Drummin Lane,Brazil,1151,sao paulo,SP,M,1,10,1993 +4,Hoover,Wilhelm,135 Poplar St.,Brazil,8775,mogi das cruzes,SP,F,3,2,1975 +5,Von Stubben,Vern,2839 5th Ave.,Brazil,13056,campinas,SP,F,2,8,2017 +6,Anthony,Terry,2932 50th St.,Brazil,89254,jaragua do sul,SC,F,1,5,1992 +7,Smith,Marian,27391 Overton Dr.,Brazil,4534,sao paulo,SP,F,3,4,1967 +8,Ali,Jennifer,3489 Waverly St.,Brazil,35182,timoteo,MG,F,2,9,1992 +9,Pearl,Ramona,2384 Park Ct.,Brazil,81560,curitiba,PR,F,1,11,1976 +10,Hederson,Walter,2932 Waverly St.,Brazil,30575,belo horizonte,MG,F,1,3,1994 +11,Pearl,Winifred,8293 Polar Ave,Brazil,39400,montes claros,MG,M,2,6,1967 +12,Anthony,Arun,135 Wicker Way,Brazil,20231,rio de janeiro,RJ,M,2,11,2008 +13,Mueller,Jennifer,4839 73rd Ave.,Brazil,18682,lencois paulista,SP,M,3,1,1945 +14,Pearl,Ralph,283 Highview Blvd.,Brazil,5704,sao paulo,SP,F,2,5,2021 +15,Taft,Brad,2838 Becker Way,Brazil,95110,caxias do sul,RS,M,3,1,1977 +16,Goldbloom,Xia,3838 Mortimer Ct.,Brazil,13412,piracicaba,SP,M,2,10,1965 +17,Wellington,Melissa,2839 Seasame Ln.,Brazil,22750,rio de janeiro,RJ,M,2,1,1970 +18,Sipulski,Henry,3489 45th St.,Brazil,7124,guarulhos,SP,M,3,11,1984 +19,Winstrom,Walter,2833 39th Ave.,Brazil,5416,sao paulo,SP,M,2,6,1999 +20,Alvarez,Douglas,2843 Waverly St.,Brazil,68485,pacaja,PA,F,1,9,2020 +21,Petrov,Cassie,2939 73rd Ave.,Brazil,88034,florianopolis,SC,M,3,3,1990 +22,Hoover,Zeb,7472 Anchor St.,Brazil,74914,aparecida de goiania,GO,M,2,10,1969 +23,Roosevelt,Nels,135 23rd St.,Brazil,5713,sao paulo,SP,M,2,1,1967 +24,Allens,Nels,2839 Hwy 45,Brazil,82820,curitiba,PR,F,3,4,1967 +25,Popovich,Ralph,828 Seasame Ln.,Brazil,8225,sao paulo,SP,M,1,8,1990 +26,Sipulski,Ramona,8232 Baker Ave.,Brazil,9121,santo andre,SP,M,1,1,1991 +27,Roberts,Edward,2843 Birch St.,Brazil,74310,goiania,GO,F,2,3,1984 +28,Popovich,Xia,2843 5th Ave.,Brazil,4571,sao paulo,SP,M,1,5,1991 +29,Hokah,Ramona,283 Mycroft Ln.,Brazil,29311,cachoeiro de itapemirim,ES,M,2,8,2001 +30,Glinkov,Greg,29838 Seasame Ln.,Brazil,5528,sao paulo,SP,M,2,5,1995 +31,Banks,Anders,2384 Mycroft Ln.,Brazil,12235,sao jose dos campos,SP,F,2,1,1973 +32,Roosevelt,Winifred,2843 Wicker Way,Brazil,18130,sao roque,SP,M,1,6,1990 +33,Perkins,Jennifer,9293 Elm St.,Brazil,42800,camacari,BA,M,2,7,1985 +34,Pearl,Anders,2838 Mycroft Ln.,Brazil,27525,resende,RJ,M,3,4,1979 +35,Fulston,Sarah,828 Eilert Way,Brazil,81750,curitiba,PR,M,3,6,1982 +36,Hu,Betsy,8392 Angle St.,Brazil,13175,sumare,SP,F,3,11,1991 +37,Steeger,Linda,28043 34th St.,Brazil,7170,guarulhos,SP,M,1,5,2003 +38,Allens,Frank,2772 Waverly St.,Brazil,93415,novo hamburgo,RS,F,3,4,1993 +39,Steeger,Fran,8293 Roberts Rd.,Brazil,65075,sao luis,MA,M,1,5,2010 +40,Elert,Daniel,2828 Eilert Way,Brazil,88104,sao jose,SC,M,2,4,1984 +41,Sipulski,Terry,2828 129th St.,Brazil,7176,guarulhos,SP,M,3,6,1965 +42,Zipperer,Edward,2839 50th St.,Brazil,35960,santa barbara,MG,M,2,10,2013 +43,Wellington,Stanley,8282 Fender Ave.,Brazil,5727,sao paulo,SP,F,1,2,2004 +44,Taylor,Zeb,27391 Drummin Lane,Brazil,7053,guarulhos,SP,M,3,3,1979 +45,Zipperer,Fran,4934 Highview Blvd.,Brazil,14026,ribeirao preto,SP,M,3,1,1982 +46,Von Brocken,Stanley,2772 Waverly St.,Brazil,30320,belo horizonte,MG,F,2,4,1986 +47,Von Brocken,Marian,4934 Waverly St.,Brazil,38300,ituiutaba,MG,M,3,10,1985 +48,Winstrom,Ramona,823 Ivy Lane,Brazil,18740,taquarituba,SP,M,3,6,2001 +49,Petrov,Ralph,2839 Overland Dr.,Brazil,83085,sao jose dos pinhais,PR,M,3,9,1983 +50,Roberts,Stanley,2383 Hwy 45,Brazil,5351,sao paulo,SP,M,1,11,1979 diff --git a/jenner-check/t003_customer_format_sql/input/invoice.csv b/jenner-check/t003_customer_format_sql/input/invoice.csv new file mode 100644 index 0000000..c697d91 --- /dev/null +++ b/jenner-check/t003_customer_format_sql/input/invoice.csv @@ -0,0 +1,101 @@ +Invoice_ID;InvoiceNo;InvoiceDate;Customer_ID;Payment_Method;Operation +1;539730;12/21/2010;1;4;Sale +2;552969;5/12/2011;1;4;Sale +3;577382;11/18/2011;1;1;Sale +4;580363;12/2/2011;1;4;Sale +5;550837;4/20/2011;2;4;Sale +6;579768;11/30/2011;3;1;Sale +7;C553665;5/16/2011;4;3;Return +8;C557947;6/21/2011;4;2;Return +9;C577164;11/15/2011;4;1;Return +10;567972;9/22/2011;5;1;Sale +11;575586;11/9/2011;6;2;Sale +12;546980;3/18/2011;8;2;Sale +13;C564751;8/24/2011;8;3;Return +14;557117;6/16/2011;9;3;Sale +15;557251;6/17/2011;9;4;Sale +16;575924;11/11/2011;9;1;Sale +17;C570828;10/10/2011;9;4;Return +18;545702;3/6/2011;10;1;Sale +19;554284;5/23/2011;10;1;Sale +20;575211;11/8/2011;11;2;Sale +21;551287;4/27/2011;12;3;Sale +22;560555;7/19/2011;12;4;Sale +23;571292;10/14/2011;12;3;Sale +24;C579757;11/24/2011;12;4;Return +25;557631;6/21/2011;13;1;Sale +26;557744;6/21/2011;13;2;Sale +27;561030;7/22/2011;13;3;Sale +28;561701;7/28/2011;14;3;Sale +29;C538876;12/14/2010;14;3;Return +30;C551674;4/26/2011;14;2;Return +31;C547855;3/21/2011;15;3;Return +32;539351;12/17/2010;16;4;Sale +33;547397;3/22/2011;16;3;Sale +34;555103;5/31/2011;17;2;Sale +35;566207;9/9/2011;17;4;Sale +36;C577345;11/16/2011;17;3;Return +37;555721;6/6/2011;18;2;Sale +38;568402;9/26/2011;18;1;Sale +39;543352;2/7/2011;19;4;Sale +40;562733;8/8/2011;19;4;Sale +41;578849;11/25/2011;19;3;Sale +42;C567713;9/19/2011;19;3;Return +43;539421;12/17/2010;20;4;Sale +44;553897;5/19/2011;20;1;Sale +45;563188;8/12/2011;20;3;Sale +46;573916;11/1/2011;20;4;Sale +47;C580773;12/2/2011;20;2;Return +48;569550;10/4/2011;22;4;Sale +49;545869;3/7/2011;23;4;Sale +50;557852;6/22/2011;23;4;Sale +51;564510;8/25/2011;23;1;Sale +52;C576670;11/13/2011;23;2;Return +53;537242;12/6/2010;24;3;Sale +54;548165;3/29/2011;24;3;Sale +55;551845;5/4/2011;24;2;Sale +56;560328;7/17/2011;24;4;Sale +57;567344;9/19/2011;24;4;Sale +58;537881;12/9/2010;25;4;Sale +59;553462;5/16/2011;26;3;Sale +60;545064;2/27/2011;27;4;Sale +61;565387;9/2/2011;27;1;Sale +62;C543353;1/31/2011;27;2;Return +63;545054;2/27/2011;31;4;Sale +64;561534;7/27/2011;32;3;Sale +65;561697;7/28/2011;32;2;Sale +66;560927;7/21/2011;33;4;Sale +67;571556;10/17/2011;34;4;Sale +68;547945;3/28/2011;35;4;Sale +69;554261;5/22/2011;35;1;Sale +70;559784;7/11/2011;35;3;Sale +71;561601;7/27/2011;36;1;Sale +72;575517;11/9/2011;36;2;Sale +73;544326;2/17/2011;37;3;Sale +74;C566748;9/9/2011;37;2;Return +75;C578832;11/22/2011;37;3;Return +76;564841;8/28/2011;38;1;Sale +77;566264;9/11/2011;38;3;Sale +78;C541573;1/12/2011;38;3;Return +79;C561211;7/20/2011;38;4;Return +80;565243;9/1/2011;39;2;Sale +81;569734;10/5/2011;39;2;Sale +82;C575666;11/8/2011;39;4;Return +83;546480;3/13/2011;40;3;Sale +84;C564949;8/25/2011;41;3;Return +85;567158;9/16/2011;43;3;Sale +86;537369;12/6/2010;44;1;Sale +87;537693;12/8/2010;44;2;Sale +88;542106;1/25/2011;44;3;Sale +89;565593;9/5/2011;44;1;Sale +90;567144;9/16/2011;44;2;Sale +91;574677;11/4/2011;44;3;Sale +92;576575;11/15/2011;44;1;Sale +93;C579912;11/30/2011;44;4;Return +94;559321;7/7/2011;45;4;Sale +95;562448;8/4/2011;45;1;Sale +96;C551947;4/27/2011;45;1;Return +97;544999;2/25/2011;46;2;Sale +98;565669;9/5/2011;46;2;Sale +99;569768;10/5/2011;46;1;Sale +100;537059;12/5/2010;47;2;Sale diff --git a/jenner-check/t003_customer_format_sql/meta.json b/jenner-check/t003_customer_format_sql/meta.json new file mode 100644 index 0000000..85029e2 --- /dev/null +++ b/jenner-check/t003_customer_format_sql/meta.json @@ -0,0 +1,8 @@ +{ + "bundle": "t003_customer_format_sql", + "source_file": "Code/2_Customer_Profiling.sas (AGE_GROUP format + stores_visits block, lines 37-65)", + "source_blob_sha": "aaecf46642fa59ba269db556e02e94db69b4806d", + "source_commit": "6b4ee1a908440892de3523a5bec3402430fbf56e", + "tier": "real_data", + "notes": "50-row sample of Customers.csv and 100-row sample of Invoice.csv. The PROJECT libref is dropped; the age categorization is hoisted into autoexec.sas so the script body matches the upstream stores_visits block. Exercises PROC FORMAT VALUE, DATA MERGE on Customer_ID, PROC SQL COUNT/GROUP BY, INPUT()/PUT() with a custom format, and PROC SORT by computed key." +} diff --git a/jenner-check/t003_customer_format_sql/script.sas b/jenner-check/t003_customer_format_sql/script.sas new file mode 100644 index 0000000..33d2fe7 --- /dev/null +++ b/jenner-check/t003_customer_format_sql/script.sas @@ -0,0 +1,64 @@ +/******************************************************************************** + * Adapted from Code/2_Customer_Profiling.sas + * - Custom $age_group format + * - Merge of Customers + Invoice on Customer_ID + * - PROC SQL aggregation by Age_Range (stores visits) + * - PROC FREQ with custom format applied + * + * Demonstrates PROC FORMAT (value statement), DATA MERGE on Customer_ID, + * PROC SQL COUNT(*) GROUP BY, INPUT()/PUT() conversion using a custom format, + * and PROC SORT by a computed numeric sort key. + *******************************************************************************/ + +/* Custom format mapping Age_Range bands to a numeric sort order */ +proc format; + value $age_group + 'Under 18' = '1' + 'Very Young' = '2' + 'Young' = '3' + 'Middle Age' = '4' + 'Mature' = '5' + 'Old' = '6' + 'Very Old' = '7'; +run; + +/* Merge Customers and Invoice on Customer_ID */ +proc sort data=customers out=customers_sorted; + by customer_id; +run; +proc sort data=invoice out=invoice_sorted; + by customer_id; +run; + +data customers_invoice; + merge customers_sorted (in=a) invoice_sorted (in=b); + by customer_id; + if a and b; +run; + +/* Count store visits per age range */ +proc sql; + create table stores_visits as + select Age_Range, COUNT(*) as Stores_Visits + from customers_invoice + group by Age_Range; +quit; + +/* Apply the custom character format and order by the resulting sort key */ +data stores_visits; + set stores_visits; + Sort_Order = input(put(Age_Range, $age_group.), $3.); +run; + +proc sort data=stores_visits; + by Sort_Order; +run; + +data stores_visits; + set stores_visits; + drop Sort_Order; +run; + +proc print data=stores_visits noobs; + title "Total store visits by age range"; +run; diff --git a/jenner-check/t004_sales_returns_split/autoexec.sas b/jenner-check/t004_sales_returns_split/autoexec.sas new file mode 100644 index 0000000..cf57089 --- /dev/null +++ b/jenner-check/t004_sales_returns_split/autoexec.sas @@ -0,0 +1,6 @@ +options obs=100; +proc import datafile="input/invoice.csv" dbms=dlm out=invoice replace; + delimiter=";"; + getnames=yes; + guessingrows=max; +run; diff --git a/jenner-check/t004_sales_returns_split/expected.json b/jenner-check/t004_sales_returns_split/expected.json new file mode 100644 index 0000000..7929719 --- /dev/null +++ b/jenner-check/t004_sales_returns_split/expected.json @@ -0,0 +1,16 @@ +{ + "_captured_at": "2026-05-11T23:59:00Z", + "_captured_run_id": "r_019e17f7c8707af0b0b7393f97d21bd0", + "_jenner_version": "0.14.2", + "status": "ok", + "exit_code": 0, + "log_contains": [ + "Imported 40 rows from input/invoice.csv.", + "DATA Sales", + "PROC PRINT data=Sales", + "PROC PRINT data=Returns", + "PROC PRINT completed: 5 observations printed, 6 variables" + ], + "log_does_not_contain": ["ERROR:", "[JENNER-ERROR"], + "diagnostics": {"parse_warnings": [], "runtime_warnings": []} +} diff --git a/jenner-check/t004_sales_returns_split/expected/files.md b/jenner-check/t004_sales_returns_split/expected/files.md new file mode 100644 index 0000000..cbac463 --- /dev/null +++ b/jenner-check/t004_sales_returns_split/expected/files.md @@ -0,0 +1,15 @@ +These URLs come from a specific captured run and expire when that run is reaped on the server. Re-running this bundle through the runner regenerates them against a fresh run_id. + +## Files + +| name | content_type | size_bytes | url | +|------|--------------|------------|-----| +| listing.txt | text/plain | 1197 | https://api.jenneranalytics.com/v1/run/r_019e17f7c8707af0b0b7393f97d21bd0/files/listing.txt | + +## Datasets + +| name | rows | preview_url | +|------|------|-------------| +| invoice | 0 | https://api.jenneranalytics.com/v1/run/r_019e17f7c8707af0b0b7393f97d21bd0/datasets/invoice | +| returns | 0 | https://api.jenneranalytics.com/v1/run/r_019e17f7c8707af0b0b7393f97d21bd0/datasets/returns | +| sales | 0 | https://api.jenneranalytics.com/v1/run/r_019e17f7c8707af0b0b7393f97d21bd0/datasets/sales | \ No newline at end of file diff --git a/jenner-check/t004_sales_returns_split/expected/log.txt b/jenner-check/t004_sales_returns_split/expected/log.txt new file mode 100644 index 0000000..72f43fd --- /dev/null +++ b/jenner-check/t004_sales_returns_split/expected/log.txt @@ -0,0 +1,24 @@ +Jenner 0.1.0 (Unlicensed - limited to 100 observations) +Get a license at https://jenneranalytics.com/license + +NOTE: Option OBS changed to 100. +NOTE: PROC IMPORT datafile=input/invoice.csv out=invoice + +NOTE: Imported 40 rows from input/invoice.csv. +NOTE: DATA Sales + + +NOTE: Read 40 rows from invoice. +NOTE: Wrote Sales (40 rows, 6 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: PROC PRINT data=Sales + +NOTE: PROC PRINT completed: 5 observations printed, 6 variables +NOTE: PROC PRINT data=Returns + +NOTE: PROC PRINT completed: 5 observations printed, 6 variables +NOTE: PROC SQL + +NOTE: PROC SQL statement used. diff --git a/jenner-check/t004_sales_returns_split/expected/output.txt b/jenner-check/t004_sales_returns_split/expected/output.txt new file mode 100644 index 0000000..f034760 --- /dev/null +++ b/jenner-check/t004_sales_returns_split/expected/output.txt @@ -0,0 +1,28 @@ + First 5 sales invoices + + Obs INVOICE_ID INVOICENO INVOICEDATE CUSTOMER_ID PAYMENT_METHOD OPERATION + 1 1 539730 12/21/2010 1 4 Sale + 2 2 552969 5/12/2011 1 4 Sale + 3 3 577382 11/18/2011 1 1 Sale + 4 4 580363 12/2/2011 1 4 Sale + 5 5 550837 4/20/2011 2 4 Sale + +... 25 more observations (showing 5 of 30) + + First 5 return invoices + + Obs INVOICE_ID INVOICENO INVOICEDATE CUSTOMER_ID PAYMENT_METHOD OPERATION + 1 7 C553665 5/16/2011 4 3 Return + 2 8 C557947 6/21/2011 4 2 Return + 3 9 C577164 11/15/2011 4 1 Return + 4 13 C564751 8/24/2011 8 3 Return + 5 17 C570828 10/10/2011 9 4 Return + +... 5 more observations (showing 5 of 10) + + First 5 return invoices + +operation total_rows +--------- ---------- + Sales 30 + Returns 10 diff --git a/jenner-check/t004_sales_returns_split/input/invoice.csv b/jenner-check/t004_sales_returns_split/input/invoice.csv new file mode 100644 index 0000000..3802758 --- /dev/null +++ b/jenner-check/t004_sales_returns_split/input/invoice.csv @@ -0,0 +1,41 @@ +Invoice_ID;InvoiceNo;InvoiceDate;Customer_ID;Payment_Method;Operation +1;539730;12/21/2010;1;4;Sale +2;552969;5/12/2011;1;4;Sale +3;577382;11/18/2011;1;1;Sale +4;580363;12/2/2011;1;4;Sale +5;550837;4/20/2011;2;4;Sale +6;579768;11/30/2011;3;1;Sale +7;C553665;5/16/2011;4;3;Return +8;C557947;6/21/2011;4;2;Return +9;C577164;11/15/2011;4;1;Return +10;567972;9/22/2011;5;1;Sale +11;575586;11/9/2011;6;2;Sale +12;546980;3/18/2011;8;2;Sale +13;C564751;8/24/2011;8;3;Return +14;557117;6/16/2011;9;3;Sale +15;557251;6/17/2011;9;4;Sale +16;575924;11/11/2011;9;1;Sale +17;C570828;10/10/2011;9;4;Return +18;545702;3/6/2011;10;1;Sale +19;554284;5/23/2011;10;1;Sale +20;575211;11/8/2011;11;2;Sale +21;551287;4/27/2011;12;3;Sale +22;560555;7/19/2011;12;4;Sale +23;571292;10/14/2011;12;3;Sale +24;C579757;11/24/2011;12;4;Return +25;557631;6/21/2011;13;1;Sale +26;557744;6/21/2011;13;2;Sale +27;561030;7/22/2011;13;3;Sale +28;561701;7/28/2011;14;3;Sale +29;C538876;12/14/2010;14;3;Return +30;C551674;4/26/2011;14;2;Return +31;C547855;3/21/2011;15;3;Return +32;539351;12/17/2010;16;4;Sale +33;547397;3/22/2011;16;3;Sale +34;555103;5/31/2011;17;2;Sale +35;566207;9/9/2011;17;4;Sale +36;C577345;11/16/2011;17;3;Return +37;555721;6/6/2011;18;2;Sale +38;568402;9/26/2011;18;1;Sale +39;543352;2/7/2011;19;4;Sale +40;562733;8/8/2011;19;4;Sale diff --git a/jenner-check/t004_sales_returns_split/meta.json b/jenner-check/t004_sales_returns_split/meta.json new file mode 100644 index 0000000..8935301 --- /dev/null +++ b/jenner-check/t004_sales_returns_split/meta.json @@ -0,0 +1,8 @@ +{ + "bundle": "t004_sales_returns_split", + "source_file": "Code/1_Data_Pre-Processing.sas (INVOICE DIVISION block, lines 64-71)", + "source_blob_sha": "7eccb563759da0420f3fff3ddd41ac0a95f29a50", + "source_commit": "6b4ee1a908440892de3523a5bec3402430fbf56e", + "tier": "real_data", + "notes": "40-row sample of Invoice.csv (30 Sale + 10 Return). Drops the PROJECT libref and adds a trailing PROC SQL count so both output datasets are visible in the listing. Exercises a single DATA step with multiple named output destinations and conditional OUTPUT." +} diff --git a/jenner-check/t004_sales_returns_split/script.sas b/jenner-check/t004_sales_returns_split/script.sas new file mode 100644 index 0000000..6cdcd31 --- /dev/null +++ b/jenner-check/t004_sales_returns_split/script.sas @@ -0,0 +1,31 @@ +/******************************************************************************** + * Adapted from Code/1_Data_Pre-Processing.sas (INVOICE DIVISION block) + * + * Single DATA step that splits one input dataset into TWO output datasets + * based on the Operation value, using IF/ELSE IF + OUTPUT to a named dataset. + * This is a common technique for separating logically distinct event types + * (sales vs returns here) in one pass. + *******************************************************************************/ + +data Sales Returns; + set invoice; + if Operation = 'Sale' then + output Sales; + else if Operation = 'Return' then + output Returns; +run; + +proc print data=Sales(obs=5); + title "First 5 sales invoices"; +run; + +proc print data=Returns(obs=5); + title "First 5 return invoices"; +run; + +/* Quick counts so the split is visible in the output */ +proc sql; + select 'Sales' as Operation, count(*) as Total_Rows from Sales + union all + select 'Returns' as Operation, count(*) as Total_Rows from Returns; +quit; diff --git a/jenner-check/t005_proc_means_basket_value/autoexec.sas b/jenner-check/t005_proc_means_basket_value/autoexec.sas new file mode 100644 index 0000000..e59c34b --- /dev/null +++ b/jenner-check/t005_proc_means_basket_value/autoexec.sas @@ -0,0 +1,13 @@ +options obs=200; +proc import datafile="input/basket.csv" dbms=csv out=basket replace; + getnames=yes; + guessingrows=max; +run; +proc import datafile="input/products.csv" dbms=csv out=products replace; + getnames=yes; + guessingrows=max; +run; +proc import datafile="input/promotions.csv" dbms=csv out=promotions replace; + getnames=yes; + guessingrows=max; +run; diff --git a/jenner-check/t005_proc_means_basket_value/expected.json b/jenner-check/t005_proc_means_basket_value/expected.json new file mode 100644 index 0000000..65a53e8 --- /dev/null +++ b/jenner-check/t005_proc_means_basket_value/expected.json @@ -0,0 +1,14 @@ +{ + "_captured_at": "2026-05-12T00:01:00Z", + "_captured_run_id": "r_019e17f8c60c76f39e8952e5f7edff6d", + "_jenner_version": "0.14.2", + "status": "ok", + "exit_code": 0, + "log_contains": [ + "PROC MEANS", + "Output dataset Invoice_Total_Value has 11 observations", + "PROC PRINT completed: 10 observations printed" + ], + "log_does_not_contain": ["ERROR:", "[JENNER-ERROR"], + "diagnostics": {"parse_warnings": [], "runtime_warnings": []} +} diff --git a/jenner-check/t005_proc_means_basket_value/expected/files.md b/jenner-check/t005_proc_means_basket_value/expected/files.md new file mode 100644 index 0000000..e31f283 --- /dev/null +++ b/jenner-check/t005_proc_means_basket_value/expected/files.md @@ -0,0 +1,23 @@ +These URLs come from a specific captured run and expire when that run is reaped on the server. Re-running this bundle through the runner regenerates them against a fresh run_id. + +## Files + +| name | content_type | size_bytes | url | +|------|--------------|------------|-----| +| Invoice_Total_Value | application/octet-stream | 188 | https://api.jenneranalytics.com/v1/run/r_019e17f8c60c76f39e8952e5f7edff6d/files/Invoice_Total_Value | +| listing.txt | text/plain | 732 | https://api.jenneranalytics.com/v1/run/r_019e17f8c60c76f39e8952e5f7edff6d/files/listing.txt | + +## Datasets + +| name | rows | preview_url | +|------|------|-------------| +| bask_prod_prom | 0 | https://api.jenneranalytics.com/v1/run/r_019e17f8c60c76f39e8952e5f7edff6d/datasets/bask_prod_prom | +| basket | 0 | https://api.jenneranalytics.com/v1/run/r_019e17f8c60c76f39e8952e5f7edff6d/datasets/basket | +| basket_products | 0 | https://api.jenneranalytics.com/v1/run/r_019e17f8c60c76f39e8952e5f7edff6d/datasets/basket_products | +| basket_products_sorted | 0 | https://api.jenneranalytics.com/v1/run/r_019e17f8c60c76f39e8952e5f7edff6d/datasets/basket_products_sorted | +| basket_sorted | 0 | https://api.jenneranalytics.com/v1/run/r_019e17f8c60c76f39e8952e5f7edff6d/datasets/basket_sorted | +| invoice_total_value | 0 | https://api.jenneranalytics.com/v1/run/r_019e17f8c60c76f39e8952e5f7edff6d/datasets/invoice_total_value | +| products | 0 | https://api.jenneranalytics.com/v1/run/r_019e17f8c60c76f39e8952e5f7edff6d/datasets/products | +| products_sorted | 0 | https://api.jenneranalytics.com/v1/run/r_019e17f8c60c76f39e8952e5f7edff6d/datasets/products_sorted | +| promotions | 0 | https://api.jenneranalytics.com/v1/run/r_019e17f8c60c76f39e8952e5f7edff6d/datasets/promotions | +| promotions_sorted | 0 | https://api.jenneranalytics.com/v1/run/r_019e17f8c60c76f39e8952e5f7edff6d/datasets/promotions_sorted | \ No newline at end of file diff --git a/jenner-check/t005_proc_means_basket_value/expected/log.txt b/jenner-check/t005_proc_means_basket_value/expected/log.txt new file mode 100644 index 0000000..4544627 --- /dev/null +++ b/jenner-check/t005_proc_means_basket_value/expected/log.txt @@ -0,0 +1,55 @@ +Jenner 0.1.0 (Unlicensed - limited to 100 observations) +Get a license at https://jenneranalytics.com/license + +NOTE: Option OBS changed to 200. +NOTE: PROC IMPORT datafile=input/basket.csv out=basket + +NOTE: Imported 80 rows from input/basket.csv. +NOTE: PROC IMPORT datafile=input/products.csv out=products + +NOTE: Imported 100 rows from input/products.csv. +NOTE: PROC IMPORT datafile=input/promotions.csv out=promotions + +NOTE: Imported 4 rows from input/promotions.csv. +NOTE: PROC SORT data=basket + +NOTE: Unlicensed mode - input limited to 100 observations. +NOTE: Read 80 rows from basket. +NOTE: Wrote basket_sorted (80 rows, 4 columns). +NOTE: NOEQUALS option acknowledged. Note: Jenner uses stable sort, so original order of ties is always preserved. +NOTE: PROC SORT statement used. +NOTE: PROC SORT data=products + +NOTE: Unlicensed mode - input limited to 100 observations. +NOTE: Read 100 rows from products. +NOTE: Wrote products_sorted (100 rows, 7 columns). +NOTE: NOEQUALS option acknowledged. Note: Jenner uses stable sort, so original order of ties is always preserved. +NOTE: PROC SORT statement used. +NOTE: DATA basket_products + +NOTE: Stream 1 processed 80 rows, max BY-group size: 4 (O(1) memory verified) +NOTE: Stream 2 processed 100 rows, max BY-group size: 1 (O(1) memory verified) +NOTE: PROC SORT data=basket_products + +NOTE: Unlicensed mode - input limited to 100 observations. +NOTE: Read 58 rows from basket_products. +NOTE: Wrote basket_products_sorted (58 rows, 10 columns). +NOTE: NOEQUALS option acknowledged. Note: Jenner uses stable sort, so original order of ties is always preserved. +NOTE: PROC SORT statement used. +NOTE: PROC SORT data=promotions + +NOTE: Unlicensed mode - input limited to 100 observations. +NOTE: Read 4 rows from promotions. +NOTE: Wrote promotions_sorted (4 rows, 2 columns). +NOTE: NOEQUALS option acknowledged. Note: Jenner uses stable sort, so original order of ties is always preserved. +NOTE: PROC SORT statement used. +NOTE: DATA bask_prod_prom + +NOTE: Stream 1 processed 58 rows, max BY-group size: 40 (O(1) memory verified) +NOTE: Stream 2 processed 4 rows, max BY-group size: 1 (O(1) memory verified) +NOTE: PROC MEANS +NOTE: Output dataset Invoice_Total_Value has 11 observations and 4 variables. +NOTE: PROC MEANS statement used. +NOTE: PROC PRINT data=Invoice_Total_Value + +NOTE: PROC PRINT completed: 10 observations printed, 4 variables diff --git a/jenner-check/t005_proc_means_basket_value/expected/output.txt b/jenner-check/t005_proc_means_basket_value/expected/output.txt new file mode 100644 index 0000000..82f44f9 --- /dev/null +++ b/jenner-check/t005_proc_means_basket_value/expected/output.txt @@ -0,0 +1,16 @@ + Invoice totals after discount (first 10 invoices) + + Obs INVOICE_ID _TYPE_ _FREQ_ INVOICE_TOTAL_VALUE + 1 1 1 3 123.84 + 2 10 1 1 57.54 + 3 11 1 13 701.05 + 4 2 1 1 28.88 + 5 3 1 8 400.44 + 6 4 1 4 209.62 + 7 5 1 1 33.31 + 8 6 1 13 729.53 + 9 7 1 1 100.1 + 10 8 1 2 96.02 + +... 1 more observations (showing 10 of 11) + diff --git a/jenner-check/t005_proc_means_basket_value/input/basket.csv b/jenner-check/t005_proc_means_basket_value/input/basket.csv new file mode 100644 index 0000000..4bdd478 --- /dev/null +++ b/jenner-check/t005_proc_means_basket_value/input/basket.csv @@ -0,0 +1,81 @@ +Invoice_ID,Product_ID,Promotion_ID,Quantity +1,32,1,2 +1,126,1,1 +1,120,1,2 +1,42,1,1 +1,110,1,1 +1,66,1,1 +1,103,1,1 +2,38,4,1 +2,131,1,3 +3,10,3,2 +3,143,1,2 +3,49,1,1 +3,121,1,2 +3,35,1,3 +3,4,3,2 +3,138,1,2 +3,53,1,1 +3,26,4,1 +3,129,1,2 +3,78,1,3 +3,82,1,2 +4,21,1,2 +4,12,3,1 +4,35,1,1 +4,45,1,2 +5,82,1,1 +6,47,1,3 +6,49,1,1 +6,132,1,2 +6,36,1,1 +6,56,1,2 +6,108,1,1 +6,100,2,2 +6,93,1,3 +6,47,1,1 +6,81,1,2 +6,119,1,2 +6,75,4,2 +6,75,4,1 +6,31,2,3 +6,136,1,1 +6,137,1,1 +6,72,1,1 +6,106,1,2 +6,6,1,2 +7,13,1,2 +8,94,4,2 +8,39,3,1 +9,107,4,3 +9,53,1,2 +9,38,4,3 +9,73,1,3 +9,70,4,2 +9,86,1,2 +9,57,1,3 +9,131,1,2 +9,26,3,2 +9,83,4,3 +9,29,1,1 +9,117,1,2 +9,36,1,1 +9,69,1,1 +9,108,1,2 +9,102,3,3 +10,19,1,3 +11,112,1,3 +11,26,1,1 +11,70,1,1 +11,56,3,1 +11,74,1,1 +11,122,1,2 +11,53,1,3 +11,92,1,2 +11,14,1,1 +11,7,2,2 +11,62,3,2 +11,88,1,2 +11,53,1,2 +11,55,1,3 +11,74,1,3 diff --git a/jenner-check/t005_proc_means_basket_value/input/products.csv b/jenner-check/t005_proc_means_basket_value/input/products.csv new file mode 100644 index 0000000..f6cad8c --- /dev/null +++ b/jenner-check/t005_proc_means_basket_value/input/products.csv @@ -0,0 +1,101 @@ +Product_ID,Product line,Product type,Product,SKU,Product_Origin,Product_Price +1,Camping Equipment,Cooking Gear,TrailChef Water Bag,4055095887499070000,1,17.79 +2,Camping Equipment,Cooking Gear,TrailChef Canteen,4055095897377050000,2,24.92 +3,Camping Equipment,Cooking Gear,TrailChef Kitchen Kit,4056008039577040000,4,34.38 +4,Camping Equipment,Cooking Gear,TrailChef Cup,4056008079584050000,2,21.64 +5,Camping Equipment,Cooking Gear,TrailChef Cook Set,4056008079591060000,1,25.98 +6,Camping Equipment,Cooking Gear,TrailChef Deluxe Cook Set,4055095999441030000,2,32.49 +7,Camping Equipment,Cooking Gear,TrailChef Single Flame,4055095979496030000,4,28.54 +8,Camping Equipment,Cooking Gear,TrailChef Double Flame,4055095989526060000,3,28.93 +9,Camping Equipment,Cooking Gear,TrailChef Kettle,4055095990359060000,3,15.26 +10,Camping Equipment,Cooking Gear,TrailChef Utensils,4055095910397050000,3,32.24 +11,Camping Equipment,Tents,Star Lite,4055095910502060000,2,28.17 +12,Camping Equipment,Tents,Star Dome,4052805633062080000,5,36.98 +13,Camping Equipment,Tents,Star Gazer 2,4052805693123080000,5,50.05 +14,Camping Equipment,Tents,Star Gazer 3,4052805683161080000,4,48.44 +15,Camping Equipment,Tents,Star Gazer 6,4055095578490050000,5,32.01 +16,Camping Equipment,Tents,Star Peg,4055095528520080000,5,29.45 +17,Camping Equipment,Sleeping Bags,Hibernator Lite,4054202337561030000,5,27.18 +18,Camping Equipment,Sleeping Bags,Hibernator,4055095376104050000,4,31.23 +19,Camping Equipment,Sleeping Bags,Hibernator Extreme,4055095395658180000,3,19.18 +20,Camping Equipment,Sleeping Bags,Hibernator Self - Inflating Mat,4054202166218110000,4,28.13 +21,Camping Equipment,Sleeping Bags,Hibernator Pad,4054202347539030000,4,50.72 +22,Camping Equipment,Sleeping Bags,Hibernator Pillow,4055095517868010000,4,41.21 +23,Camping Equipment,Sleeping Bags,Hibernator Camp Cot,4055095551627020000,3,41.11 +24,Camping Equipment,Packs,Canyon Mule Climber Backpack,4055095531634030000,5,16.62 +25,Camping Equipment,Packs,Canyon Mule Weekender Backpack,4055095581641040000,3,18.92 +26,Camping Equipment,Packs,Canyon Mule Journey Backpack,4055095551658050000,4,18.18 +27,Camping Equipment,Packs,Canyon Mule Extreme Backpack,4055095561863020000,2,30.34 +28,Camping Equipment,Packs,Canyon Mule Cooler,4055095541870030000,4,36.55 +29,Camping Equipment,Packs,Canyon Mule Carryall,4055095541887040000,1,51.48 +30,Camping Equipment,Lanterns,Firefly Lite,4055095571894050000,4,13.8 +31,Camping Equipment,Lanterns,Firefly Mapreader,4055095390326060000,3,34.77 +32,Camping Equipment,Lanterns,Firefly 2,4055095218579070000,5,35.35 +33,Camping Equipment,Lanterns,Firefly 4,4055095238791070000,5,38.57 +34,Camping Equipment,Lanterns,Firefly Extreme,4055095268920040000,5,33.82 +35,Camping Equipment,Lanterns,Firefly Multi-light,4055095228937050000,2,20.68 +36,Camping Equipment,Lanterns,EverGlow Single,4053641734341100000,2,29.28 +37,Camping Equipment,Lanterns,EverGlow Double,4043179848058010000,5,11.68 +38,Camping Equipment,Lanterns,EverGlow Kerosene,4055095264213060000,2,41.26 +39,Camping Equipment,Lanterns,EverGlow Butane,4055095214862020000,3,37.52 +40,Camping Equipment,Lanterns,EverGlow Lamp,4055095245036070000,4,23.45 +41,Camping Equipment,Lanterns,Flicker Lantern,4055095225197050000,2,30.64 +42,Personal Accessories,Watches,Mountain Man Analog,4055095232905070000,2,21.94 +43,Personal Accessories,Watches,Mountain Man Digital,4055095253087070000,2,33.91 +44,Personal Accessories,Watches,Mountain Man Deluxe,4260209870930010000,2,32.55 +45,Personal Accessories,Watches,Mountain Man Combination,4055095494439130000,1,28.96 +46,Personal Accessories,Watches,Mountain Man Extreme,4260209860947010000,1,41.6 +47,Personal Accessories,Watches,Venue,4027535829815030000,1,35.5 +48,Personal Accessories,Watches,Infinity,4027535869815030000,1,32.74 +49,Personal Accessories,Watches,Lux,4027535859815030000,2,17.69 +50,Personal Accessories,Watches,Sam,4027535889815030000,4,37.05 +51,Personal Accessories,Watches,TX,4027535829815030000,5,32 +52,Personal Accessories,Watches,Legend,4027535819815030000,1,8.15 +53,Personal Accessories,Watches,Kodiak,4027535829815030000,1,25.29 +54,Personal Accessories,Watches,Zodiak,4027535889815030000,1,28.26 +55,Personal Accessories,Eyewear,Polar Sun,4027535819815030000,4,22.63 +56,Personal Accessories,Eyewear,Polar Ice,4027535839976040000,1,46.57 +57,Personal Accessories,Eyewear,Polar Sports,4027535899976040000,3,46.45 +58,Personal Accessories,Eyewear,Polar Wave,4027535829976040000,2,23.35 +59,Personal Accessories,Eyewear,Polar Extreme,4027535859976040000,1,41.1 +60,Personal Accessories,Eyewear,Bella,4027535819976040000,1,32.58 +61,Personal Accessories,Eyewear,Capri,4027535889976040000,4,19.48 +62,Personal Accessories,Eyewear,Cat Eye,4027535839976040000,3,46.69 +63,Personal Accessories,Eyewear,Dante,4027535829976040000,5,42.74 +64,Personal Accessories,Eyewear,Fairway,4027535899976040000,5,24.65 +65,Personal Accessories,Eyewear,Inferno,4027535870835040000,3,38.79 +66,Personal Accessories,Eyewear,Maximus,4027535810835040000,5,31.2 +67,Personal Accessories,Eyewear,Trendi,4027535810866070000,4,38.56 +68,Personal Accessories,Eyewear,Zone,4027535820866070000,1,26.46 +69,Personal Accessories,Eyewear,Hawk Eye,4055095510419040000,5,32.65 +70,Personal Accessories,Eyewear,Retro,4049466237018080000,5,27.66 +71,Personal Accessories,Knives,Single Edge,4049466253126080000,3,22.78 +72,Personal Accessories,Knives,Double Edge,4049466155551080000,3,44.12 +73,Personal Accessories,Knives,Edge Extreme,4053089341760070000,5,31.08 +74,Personal Accessories,Knives,Bear Edge,4055095417941070000,1,26.91 +75,Personal Accessories,Knives,Bear Survival Edge,4055095555701030000,2,38.28 +76,Personal Accessories,Knives,Max Gizmo,4049466842035030000,3,25.06 +77,Personal Accessories,Knives,Pocket Gizmo,4053641759781010000,1,22.65 +78,Personal Accessories,Binoculars,Seeker 35,4053641749736050000,4,43.29 +79,Personal Accessories,Binoculars,Seeker 50,4052906087069070000,2,16.94 +80,Personal Accessories,Binoculars,Seeker Extreme,4052906037069070000,2,40.17 +81,Personal Accessories,Binoculars,Seeker Mini,4052906097069070000,1,33.96 +82,Personal Accessories,Binoculars,Opera Vision,4052906077069070000,2,33.31 +83,Personal Accessories,Binoculars,Ranger Vision,4052906027069070000,3,42.3 +84,Personal Accessories,Navigation,Glacier Basic,4052906077069070000,2,34.3 +85,Personal Accessories,Navigation,Glacier Deluxe,4052072061238070000,4,35.67 +86,Personal Accessories,Navigation,Glacier GPS,4052072021238070000,5,26.05 +87,Personal Accessories,Navigation,Glacier GPS Extreme,4052072041238070000,3,29.68 +88,Personal Accessories,Navigation,Trail Master,4055095368286020000,5,40.53 +89,Personal Accessories,Navigation,Trail Scout,4055095378873030000,3,37.37 +90,Personal Accessories,Navigation,Trail Star,4054202157047070000,4,35.43 +91,Personal Accessories,Navigation,Astro Pilot,4030864741194030000,3,20.36 +92,Personal Accessories,Navigation,Sky Pilot,4053227165466070000,5,30.2 +93,Personal Accessories,Navigation,Auto Pilot,4055095516572070000,2,23.11 +94,Outdoor Protection,Insect Repellents,BugShield Natural,4055095446550050000,5,47.14 +95,Outdoor Protection,Insect Repellents,BugShield Spray,4055095476604040000,2,17.72 +96,Outdoor Protection,Insect Repellents,BugShield Lotion Lite,4055095486611050000,4,34.54 +97,Outdoor Protection,Insect Repellents,BugShield Lotion,4055382374854060000,5,15.72 +98,Outdoor Protection,Insect Repellents,BugShield Extreme,4055095539176080000,2,22.58 +99,Outdoor Protection,Sunscreen,Sun Blocker,4053641943393070000,1,33.31 +100,Outdoor Protection,Sunscreen,Sun Shelter Stick,4053641936578040000,4,14.89 diff --git a/jenner-check/t005_proc_means_basket_value/input/promotions.csv b/jenner-check/t005_proc_means_basket_value/input/promotions.csv new file mode 100644 index 0000000..d761ec4 --- /dev/null +++ b/jenner-check/t005_proc_means_basket_value/input/promotions.csv @@ -0,0 +1,5 @@ +Promotion_ID,Promotion +1,0 +2,0.1 +3,0.2 +4,0.3 diff --git a/jenner-check/t005_proc_means_basket_value/meta.json b/jenner-check/t005_proc_means_basket_value/meta.json new file mode 100644 index 0000000..6f557d3 --- /dev/null +++ b/jenner-check/t005_proc_means_basket_value/meta.json @@ -0,0 +1,8 @@ +{ + "bundle": "t005_proc_means_basket_value", + "source_file": "Code/1_Data_Pre-Processing.sas (INVOICE TOTAL VALUE block, lines 36-61)", + "source_blob_sha": "7eccb563759da0420f3fff3ddd41ac0a95f29a50", + "source_commit": "6b4ee1a908440892de3523a5bec3402430fbf56e", + "tier": "real_data", + "notes": "Samples: 80 basket rows for invoices 1-30, 100 products, full promotions. Drops the PROJECT libref. Exercises PROC SORT, DATA MERGE on two keys (product_id then promotion_id), expression evaluation with FORMAT statement, and PROC MEANS with CLASS/VAR/OUTPUT/NWAY/NOPRINT producing a sum statistic." +} diff --git a/jenner-check/t005_proc_means_basket_value/script.sas b/jenner-check/t005_proc_means_basket_value/script.sas new file mode 100644 index 0000000..72ccf29 --- /dev/null +++ b/jenner-check/t005_proc_means_basket_value/script.sas @@ -0,0 +1,50 @@ +/******************************************************************************** + * Adapted from Code/1_Data_Pre-Processing.sas (INVOICE TOTAL VALUE block, + * lines 36-61). + * + * Joins basket + products + promotions, computes a discount-adjusted line + * value (1 - Promotion) * Product_Price * Quantity, then aggregates per invoice + * via PROC MEANS with CLASS / VAR / OUTPUT / NWAY / NOPRINT. + *******************************************************************************/ + +/* Step 1: merge basket with products on product_id */ +proc sort data=basket out=basket_sorted; + by product_id; +run; +proc sort data=products out=products_sorted; + by product_id; +run; + +data basket_products; + merge basket_sorted (in=b) products_sorted (in=p); + by product_id; + if b and p; +run; + +/* Step 2: merge with promotions on promotion_id, derive discounted line value */ +proc sort data=basket_products out=basket_products_sorted; + by promotion_id; +run; +proc sort data=promotions out=promotions_sorted; + by promotion_id; +run; + +data bask_prod_prom; + merge basket_products_sorted (in=a) promotions_sorted (in=b); + by promotion_id; + if a; + Value_After_Discount = (1 - Promotion) * Product_Price * Quantity; + format Value_After_Discount COMMA8.2; +run; + +/* Step 3: per-invoice totals via PROC MEANS */ +proc means data=bask_prod_prom noprint nway; + class Invoice_ID; + var Value_After_Discount; + output out=Invoice_Total_Value(drop=_type_ _freq_) + sum(Value_After_Discount) = Invoice_Total_Value; +run; + +proc print data=Invoice_Total_Value(obs=10); + title "Invoice totals after discount (first 10 invoices)"; +run; diff --git a/jenner-check/t006_supplier_freq_weight/autoexec.sas b/jenner-check/t006_supplier_freq_weight/autoexec.sas new file mode 100644 index 0000000..3fffb5a --- /dev/null +++ b/jenner-check/t006_supplier_freq_weight/autoexec.sas @@ -0,0 +1,23 @@ +options obs=200; +proc import datafile="input/basket.csv" dbms=csv out=basket replace; + getnames=yes; + guessingrows=max; +run; +proc import datafile="input/products.csv" dbms=csv out=products replace; + getnames=yes; + guessingrows=max; +run; +proc import datafile="input/suppliers.csv" dbms=csv out=suppliers replace; + getnames=yes; + guessingrows=max; +run; + +/* Recast SKU to char so substr() works the same way the upstream Code/0_Data_Import.sas + block does via put(SKU, best19.). */ +data products; + set products; + length SKU_char $ 19; + SKU_char = put(SKU, 19.); + drop SKU; + rename SKU_char = SKU; +run; diff --git a/jenner-check/t006_supplier_freq_weight/expected.json b/jenner-check/t006_supplier_freq_weight/expected.json new file mode 100644 index 0000000..4d07428 --- /dev/null +++ b/jenner-check/t006_supplier_freq_weight/expected.json @@ -0,0 +1,14 @@ +{ + "_captured_at": "2026-05-12T00:03:00Z", + "_captured_run_id": "r_019e17f9b1397ab3b009fb568cd1c228", + "_jenner_version": "0.14.2", + "status": "ok", + "exit_code": 0, + "log_contains": [ + "Wrote products_sorted (100 rows, 8 columns).", + "Wrote suppliers_sorted (9 rows, 2 columns).", + "PROC FREQ" + ], + "log_does_not_contain": ["ERROR:", "[JENNER-ERROR"], + "diagnostics": {"parse_warnings": [], "runtime_warnings": []} +} diff --git a/jenner-check/t006_supplier_freq_weight/expected/files.md b/jenner-check/t006_supplier_freq_weight/expected/files.md new file mode 100644 index 0000000..189e1c8 --- /dev/null +++ b/jenner-check/t006_supplier_freq_weight/expected/files.md @@ -0,0 +1,22 @@ +These URLs come from a specific captured run and expire when that run is reaped on the server. Re-running this bundle through the runner regenerates them against a fresh run_id. + +## Files + +| name | content_type | size_bytes | url | +|------|--------------|------------|-----| +| listing.txt | text/plain | 607 | https://api.jenneranalytics.com/v1/run/r_019e17f9b1397ab3b009fb568cd1c228/files/listing.txt | +| ods_output/freq_supplier_name.svg | image/svg+xml | 17426 | https://api.jenneranalytics.com/v1/run/r_019e17f9b1397ab3b009fb568cd1c228/files/ods_output/freq_supplier_name.svg | + +## Datasets + +| name | rows | preview_url | +|------|------|-------------| +| basket | 0 | https://api.jenneranalytics.com/v1/run/r_019e17f9b1397ab3b009fb568cd1c228/datasets/basket | +| basket_products_suppliers | 0 | https://api.jenneranalytics.com/v1/run/r_019e17f9b1397ab3b009fb568cd1c228/datasets/basket_products_suppliers | +| basket_sorted | 0 | https://api.jenneranalytics.com/v1/run/r_019e17f9b1397ab3b009fb568cd1c228/datasets/basket_sorted | +| products | 0 | https://api.jenneranalytics.com/v1/run/r_019e17f9b1397ab3b009fb568cd1c228/datasets/products | +| products_sorted | 0 | https://api.jenneranalytics.com/v1/run/r_019e17f9b1397ab3b009fb568cd1c228/datasets/products_sorted | +| products_suppliers | 0 | https://api.jenneranalytics.com/v1/run/r_019e17f9b1397ab3b009fb568cd1c228/datasets/products_suppliers | +| products_suppliers_sorted | 0 | https://api.jenneranalytics.com/v1/run/r_019e17f9b1397ab3b009fb568cd1c228/datasets/products_suppliers_sorted | +| suppliers | 0 | https://api.jenneranalytics.com/v1/run/r_019e17f9b1397ab3b009fb568cd1c228/datasets/suppliers | +| suppliers_sorted | 0 | https://api.jenneranalytics.com/v1/run/r_019e17f9b1397ab3b009fb568cd1c228/datasets/suppliers_sorted | \ No newline at end of file diff --git a/jenner-check/t006_supplier_freq_weight/expected/log.txt b/jenner-check/t006_supplier_freq_weight/expected/log.txt new file mode 100644 index 0000000..8c6fb77 --- /dev/null +++ b/jenner-check/t006_supplier_freq_weight/expected/log.txt @@ -0,0 +1,68 @@ +Jenner 0.1.0 (Unlicensed - limited to 100 observations) +Get a license at https://jenneranalytics.com/license + +NOTE: Option OBS changed to 200. +NOTE: PROC IMPORT datafile=input/basket.csv out=basket + +NOTE: Imported 80 rows from input/basket.csv. +NOTE: PROC IMPORT datafile=input/products.csv out=products + +NOTE: Imported 100 rows from input/products.csv. +NOTE: PROC IMPORT datafile=input/suppliers.csv out=suppliers + +NOTE: Imported 9 rows from input/suppliers.csv. +NOTE: DATA products + + +NOTE: Read 100 rows from products. +NOTE: Wrote products (100 rows, 7 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA products + + +NOTE: Read 100 rows from products. +NOTE: Wrote products (100 rows, 8 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: PROC SORT data=products + +NOTE: Unlicensed mode - input limited to 100 observations. +NOTE: Read 100 rows from products. +NOTE: Wrote products_sorted (100 rows, 8 columns). +NOTE: NOEQUALS option acknowledged. Note: Jenner uses stable sort, so original order of ties is always preserved. +NOTE: PROC SORT statement used. +NOTE: PROC SORT data=suppliers + +NOTE: Unlicensed mode - input limited to 100 observations. +NOTE: Read 9 rows from suppliers. +NOTE: Wrote suppliers_sorted (9 rows, 2 columns). +NOTE: NOEQUALS option acknowledged. Note: Jenner uses stable sort, so original order of ties is always preserved. +NOTE: PROC SORT statement used. +NOTE: DATA products_suppliers + +NOTE: Stream 1 processed 100 rows, max BY-group size: 13 (O(1) memory verified) +NOTE: Stream 2 processed 9 rows, max BY-group size: 1 (O(1) memory verified) +NOTE: PROC SORT data=products_suppliers + +NOTE: Unlicensed mode - input limited to 100 observations. +NOTE: Read 100 rows from products_suppliers. +NOTE: Wrote products_suppliers_sorted (100 rows, 9 columns). +NOTE: NOEQUALS option acknowledged. Note: Jenner uses stable sort, so original order of ties is always preserved. +NOTE: PROC SORT statement used. +NOTE: PROC SORT data=basket + +NOTE: Unlicensed mode - input limited to 100 observations. +NOTE: Read 80 rows from basket. +NOTE: Wrote basket_sorted (80 rows, 4 columns). +NOTE: NOEQUALS option acknowledged. Note: Jenner uses stable sort, so original order of ties is always preserved. +NOTE: PROC SORT statement used. +NOTE: DATA basket_products_suppliers + +NOTE: Stream 1 processed 100 rows, max BY-group size: 1 (O(1) memory verified) +NOTE: Stream 2 processed 80 rows, max BY-group size: 4 (O(1) memory verified) +NOTE: PROC FREQ +NOTE: ODS plot written: freq_supplier_name.spec.json +NOTE: PROC FREQ statement used. diff --git a/jenner-check/t006_supplier_freq_weight/expected/output.txt b/jenner-check/t006_supplier_freq_weight/expected/output.txt new file mode 100644 index 0000000..cd128bd --- /dev/null +++ b/jenner-check/t006_supplier_freq_weight/expected/output.txt @@ -0,0 +1,15 @@ + Percentage of Products Sold by Each Supplier + + The FREQ Procedure + +SUPPLIER_NAME Frequency Percent +------------------------------------------ +Carper & Sons 14 13.21 +Dragon SA 17 16.04 +Easy Creator 8 7.55 +Elegance SA 10 9.43 +Fabulo Ltd 21 19.81 +Maestri & Maestri 11 10.38 +Selector Ltd 1 0.94 +Toktai & Chen 7 6.60 +Viking Quality 17 16.04 diff --git a/jenner-check/t006_supplier_freq_weight/input/basket.csv b/jenner-check/t006_supplier_freq_weight/input/basket.csv new file mode 100644 index 0000000..4bdd478 --- /dev/null +++ b/jenner-check/t006_supplier_freq_weight/input/basket.csv @@ -0,0 +1,81 @@ +Invoice_ID,Product_ID,Promotion_ID,Quantity +1,32,1,2 +1,126,1,1 +1,120,1,2 +1,42,1,1 +1,110,1,1 +1,66,1,1 +1,103,1,1 +2,38,4,1 +2,131,1,3 +3,10,3,2 +3,143,1,2 +3,49,1,1 +3,121,1,2 +3,35,1,3 +3,4,3,2 +3,138,1,2 +3,53,1,1 +3,26,4,1 +3,129,1,2 +3,78,1,3 +3,82,1,2 +4,21,1,2 +4,12,3,1 +4,35,1,1 +4,45,1,2 +5,82,1,1 +6,47,1,3 +6,49,1,1 +6,132,1,2 +6,36,1,1 +6,56,1,2 +6,108,1,1 +6,100,2,2 +6,93,1,3 +6,47,1,1 +6,81,1,2 +6,119,1,2 +6,75,4,2 +6,75,4,1 +6,31,2,3 +6,136,1,1 +6,137,1,1 +6,72,1,1 +6,106,1,2 +6,6,1,2 +7,13,1,2 +8,94,4,2 +8,39,3,1 +9,107,4,3 +9,53,1,2 +9,38,4,3 +9,73,1,3 +9,70,4,2 +9,86,1,2 +9,57,1,3 +9,131,1,2 +9,26,3,2 +9,83,4,3 +9,29,1,1 +9,117,1,2 +9,36,1,1 +9,69,1,1 +9,108,1,2 +9,102,3,3 +10,19,1,3 +11,112,1,3 +11,26,1,1 +11,70,1,1 +11,56,3,1 +11,74,1,1 +11,122,1,2 +11,53,1,3 +11,92,1,2 +11,14,1,1 +11,7,2,2 +11,62,3,2 +11,88,1,2 +11,53,1,2 +11,55,1,3 +11,74,1,3 diff --git a/jenner-check/t006_supplier_freq_weight/input/products.csv b/jenner-check/t006_supplier_freq_weight/input/products.csv new file mode 100644 index 0000000..f6cad8c --- /dev/null +++ b/jenner-check/t006_supplier_freq_weight/input/products.csv @@ -0,0 +1,101 @@ +Product_ID,Product line,Product type,Product,SKU,Product_Origin,Product_Price +1,Camping Equipment,Cooking Gear,TrailChef Water Bag,4055095887499070000,1,17.79 +2,Camping Equipment,Cooking Gear,TrailChef Canteen,4055095897377050000,2,24.92 +3,Camping Equipment,Cooking Gear,TrailChef Kitchen Kit,4056008039577040000,4,34.38 +4,Camping Equipment,Cooking Gear,TrailChef Cup,4056008079584050000,2,21.64 +5,Camping Equipment,Cooking Gear,TrailChef Cook Set,4056008079591060000,1,25.98 +6,Camping Equipment,Cooking Gear,TrailChef Deluxe Cook Set,4055095999441030000,2,32.49 +7,Camping Equipment,Cooking Gear,TrailChef Single Flame,4055095979496030000,4,28.54 +8,Camping Equipment,Cooking Gear,TrailChef Double Flame,4055095989526060000,3,28.93 +9,Camping Equipment,Cooking Gear,TrailChef Kettle,4055095990359060000,3,15.26 +10,Camping Equipment,Cooking Gear,TrailChef Utensils,4055095910397050000,3,32.24 +11,Camping Equipment,Tents,Star Lite,4055095910502060000,2,28.17 +12,Camping Equipment,Tents,Star Dome,4052805633062080000,5,36.98 +13,Camping Equipment,Tents,Star Gazer 2,4052805693123080000,5,50.05 +14,Camping Equipment,Tents,Star Gazer 3,4052805683161080000,4,48.44 +15,Camping Equipment,Tents,Star Gazer 6,4055095578490050000,5,32.01 +16,Camping Equipment,Tents,Star Peg,4055095528520080000,5,29.45 +17,Camping Equipment,Sleeping Bags,Hibernator Lite,4054202337561030000,5,27.18 +18,Camping Equipment,Sleeping Bags,Hibernator,4055095376104050000,4,31.23 +19,Camping Equipment,Sleeping Bags,Hibernator Extreme,4055095395658180000,3,19.18 +20,Camping Equipment,Sleeping Bags,Hibernator Self - Inflating Mat,4054202166218110000,4,28.13 +21,Camping Equipment,Sleeping Bags,Hibernator Pad,4054202347539030000,4,50.72 +22,Camping Equipment,Sleeping Bags,Hibernator Pillow,4055095517868010000,4,41.21 +23,Camping Equipment,Sleeping Bags,Hibernator Camp Cot,4055095551627020000,3,41.11 +24,Camping Equipment,Packs,Canyon Mule Climber Backpack,4055095531634030000,5,16.62 +25,Camping Equipment,Packs,Canyon Mule Weekender Backpack,4055095581641040000,3,18.92 +26,Camping Equipment,Packs,Canyon Mule Journey Backpack,4055095551658050000,4,18.18 +27,Camping Equipment,Packs,Canyon Mule Extreme Backpack,4055095561863020000,2,30.34 +28,Camping Equipment,Packs,Canyon Mule Cooler,4055095541870030000,4,36.55 +29,Camping Equipment,Packs,Canyon Mule Carryall,4055095541887040000,1,51.48 +30,Camping Equipment,Lanterns,Firefly Lite,4055095571894050000,4,13.8 +31,Camping Equipment,Lanterns,Firefly Mapreader,4055095390326060000,3,34.77 +32,Camping Equipment,Lanterns,Firefly 2,4055095218579070000,5,35.35 +33,Camping Equipment,Lanterns,Firefly 4,4055095238791070000,5,38.57 +34,Camping Equipment,Lanterns,Firefly Extreme,4055095268920040000,5,33.82 +35,Camping Equipment,Lanterns,Firefly Multi-light,4055095228937050000,2,20.68 +36,Camping Equipment,Lanterns,EverGlow Single,4053641734341100000,2,29.28 +37,Camping Equipment,Lanterns,EverGlow Double,4043179848058010000,5,11.68 +38,Camping Equipment,Lanterns,EverGlow Kerosene,4055095264213060000,2,41.26 +39,Camping Equipment,Lanterns,EverGlow Butane,4055095214862020000,3,37.52 +40,Camping Equipment,Lanterns,EverGlow Lamp,4055095245036070000,4,23.45 +41,Camping Equipment,Lanterns,Flicker Lantern,4055095225197050000,2,30.64 +42,Personal Accessories,Watches,Mountain Man Analog,4055095232905070000,2,21.94 +43,Personal Accessories,Watches,Mountain Man Digital,4055095253087070000,2,33.91 +44,Personal Accessories,Watches,Mountain Man Deluxe,4260209870930010000,2,32.55 +45,Personal Accessories,Watches,Mountain Man Combination,4055095494439130000,1,28.96 +46,Personal Accessories,Watches,Mountain Man Extreme,4260209860947010000,1,41.6 +47,Personal Accessories,Watches,Venue,4027535829815030000,1,35.5 +48,Personal Accessories,Watches,Infinity,4027535869815030000,1,32.74 +49,Personal Accessories,Watches,Lux,4027535859815030000,2,17.69 +50,Personal Accessories,Watches,Sam,4027535889815030000,4,37.05 +51,Personal Accessories,Watches,TX,4027535829815030000,5,32 +52,Personal Accessories,Watches,Legend,4027535819815030000,1,8.15 +53,Personal Accessories,Watches,Kodiak,4027535829815030000,1,25.29 +54,Personal Accessories,Watches,Zodiak,4027535889815030000,1,28.26 +55,Personal Accessories,Eyewear,Polar Sun,4027535819815030000,4,22.63 +56,Personal Accessories,Eyewear,Polar Ice,4027535839976040000,1,46.57 +57,Personal Accessories,Eyewear,Polar Sports,4027535899976040000,3,46.45 +58,Personal Accessories,Eyewear,Polar Wave,4027535829976040000,2,23.35 +59,Personal Accessories,Eyewear,Polar Extreme,4027535859976040000,1,41.1 +60,Personal Accessories,Eyewear,Bella,4027535819976040000,1,32.58 +61,Personal Accessories,Eyewear,Capri,4027535889976040000,4,19.48 +62,Personal Accessories,Eyewear,Cat Eye,4027535839976040000,3,46.69 +63,Personal Accessories,Eyewear,Dante,4027535829976040000,5,42.74 +64,Personal Accessories,Eyewear,Fairway,4027535899976040000,5,24.65 +65,Personal Accessories,Eyewear,Inferno,4027535870835040000,3,38.79 +66,Personal Accessories,Eyewear,Maximus,4027535810835040000,5,31.2 +67,Personal Accessories,Eyewear,Trendi,4027535810866070000,4,38.56 +68,Personal Accessories,Eyewear,Zone,4027535820866070000,1,26.46 +69,Personal Accessories,Eyewear,Hawk Eye,4055095510419040000,5,32.65 +70,Personal Accessories,Eyewear,Retro,4049466237018080000,5,27.66 +71,Personal Accessories,Knives,Single Edge,4049466253126080000,3,22.78 +72,Personal Accessories,Knives,Double Edge,4049466155551080000,3,44.12 +73,Personal Accessories,Knives,Edge Extreme,4053089341760070000,5,31.08 +74,Personal Accessories,Knives,Bear Edge,4055095417941070000,1,26.91 +75,Personal Accessories,Knives,Bear Survival Edge,4055095555701030000,2,38.28 +76,Personal Accessories,Knives,Max Gizmo,4049466842035030000,3,25.06 +77,Personal Accessories,Knives,Pocket Gizmo,4053641759781010000,1,22.65 +78,Personal Accessories,Binoculars,Seeker 35,4053641749736050000,4,43.29 +79,Personal Accessories,Binoculars,Seeker 50,4052906087069070000,2,16.94 +80,Personal Accessories,Binoculars,Seeker Extreme,4052906037069070000,2,40.17 +81,Personal Accessories,Binoculars,Seeker Mini,4052906097069070000,1,33.96 +82,Personal Accessories,Binoculars,Opera Vision,4052906077069070000,2,33.31 +83,Personal Accessories,Binoculars,Ranger Vision,4052906027069070000,3,42.3 +84,Personal Accessories,Navigation,Glacier Basic,4052906077069070000,2,34.3 +85,Personal Accessories,Navigation,Glacier Deluxe,4052072061238070000,4,35.67 +86,Personal Accessories,Navigation,Glacier GPS,4052072021238070000,5,26.05 +87,Personal Accessories,Navigation,Glacier GPS Extreme,4052072041238070000,3,29.68 +88,Personal Accessories,Navigation,Trail Master,4055095368286020000,5,40.53 +89,Personal Accessories,Navigation,Trail Scout,4055095378873030000,3,37.37 +90,Personal Accessories,Navigation,Trail Star,4054202157047070000,4,35.43 +91,Personal Accessories,Navigation,Astro Pilot,4030864741194030000,3,20.36 +92,Personal Accessories,Navigation,Sky Pilot,4053227165466070000,5,30.2 +93,Personal Accessories,Navigation,Auto Pilot,4055095516572070000,2,23.11 +94,Outdoor Protection,Insect Repellents,BugShield Natural,4055095446550050000,5,47.14 +95,Outdoor Protection,Insect Repellents,BugShield Spray,4055095476604040000,2,17.72 +96,Outdoor Protection,Insect Repellents,BugShield Lotion Lite,4055095486611050000,4,34.54 +97,Outdoor Protection,Insect Repellents,BugShield Lotion,4055382374854060000,5,15.72 +98,Outdoor Protection,Insect Repellents,BugShield Extreme,4055095539176080000,2,22.58 +99,Outdoor Protection,Sunscreen,Sun Blocker,4053641943393070000,1,33.31 +100,Outdoor Protection,Sunscreen,Sun Shelter Stick,4053641936578040000,4,14.89 diff --git a/jenner-check/t006_supplier_freq_weight/input/suppliers.csv b/jenner-check/t006_supplier_freq_weight/input/suppliers.csv new file mode 100644 index 0000000..04467f4 --- /dev/null +++ b/jenner-check/t006_supplier_freq_weight/input/suppliers.csv @@ -0,0 +1,10 @@ +Supplier_ID,Supplier_Name +1,Dragon SA +2,Fabulo Ltd +3,Carper & Sons +4,Maestri & Maestri +5,Elegance SA +6,Easy Creator +7,Toktai & Chen +8,Selector Ltd +9,Viking Quality diff --git a/jenner-check/t006_supplier_freq_weight/meta.json b/jenner-check/t006_supplier_freq_weight/meta.json new file mode 100644 index 0000000..427ed42 --- /dev/null +++ b/jenner-check/t006_supplier_freq_weight/meta.json @@ -0,0 +1,8 @@ +{ + "bundle": "t006_supplier_freq_weight", + "source_file": "Code/5_Suppliers_Analysis.sas (PERCENTAGE OF PRODUCTS SOLD BY SUPPLIER block, lines 1-48)", + "source_blob_sha": "be46f001213297fd0160d14b1d0054cca1ddf140", + "source_commit": "6b4ee1a908440892de3523a5bec3402430fbf56e", + "tier": "real_data", + "notes": "Same basket and products samples as t005, plus suppliers.csv extracted from Suppliers.xlsx. SKU is recast to character in autoexec so the upstream substr() call still produces a supplier_id. Exercises SUBSTR, two-stage merging, and PROC FREQ with WEIGHT." +} diff --git a/jenner-check/t006_supplier_freq_weight/script.sas b/jenner-check/t006_supplier_freq_weight/script.sas new file mode 100644 index 0000000..dc421df --- /dev/null +++ b/jenner-check/t006_supplier_freq_weight/script.sas @@ -0,0 +1,49 @@ +/******************************************************************************** + * Adapted from Code/5_Suppliers_Analysis.sas (PERCENTAGE OF PRODUCTS SOLD BY + * SUPPLIER block, lines 1-48). + * + * Demonstrates SUBSTR() to derive a supplier code from a SKU, two PROC SORT + + * DATA MERGE joins (products->suppliers, products->basket), and PROC FREQ with + * a WEIGHT statement so the row count is weighted by Quantity. + *******************************************************************************/ + +/* Supplier_id is the 9th character of the 19-digit SKU */ +data products; + set products; + supplier_id = substr(SKU, 9, 1); +run; + +/* Join products to suppliers */ +proc sort data=products out=products_sorted; + by supplier_id; +run; +proc sort data=suppliers out=suppliers_sorted; + by supplier_id; +run; + +data products_suppliers; + merge products_sorted (in=a) suppliers_sorted (in=b); + by supplier_id; + if a and b; +run; + +/* Join again, this time products_suppliers to basket on product_id */ +proc sort data=products_suppliers out=products_suppliers_sorted; + by product_id; +run; +proc sort data=basket out=basket_sorted; + by product_id; +run; + +data basket_products_suppliers; + merge products_suppliers_sorted (in=a) basket_sorted (in=b); + by product_id; + if a and b; +run; + +/* Quantity-weighted frequency of supplier names */ +proc freq data=basket_products_suppliers; + tables Supplier_Name / nocum; + weight Quantity; + title "Percentage of Products Sold by Each Supplier"; +run; diff --git a/jenner-check/t007_rfm_proc_sql/autoexec.sas b/jenner-check/t007_rfm_proc_sql/autoexec.sas new file mode 100644 index 0000000..c010c67 --- /dev/null +++ b/jenner-check/t007_rfm_proc_sql/autoexec.sas @@ -0,0 +1,65 @@ +options obs=500; +proc import datafile="input/customers.csv" dbms=csv out=customers replace; + getnames=yes; + guessingrows=max; +run; +proc import datafile="input/invoice_sales.csv" dbms=dlm out=sales replace; + delimiter=";"; + getnames=yes; + guessingrows=max; +run; +proc import datafile="input/basket.csv" dbms=csv out=basket replace; + getnames=yes; + guessingrows=max; +run; +proc import datafile="input/products.csv" dbms=csv out=products replace; + getnames=yes; + guessingrows=max; +run; +proc import datafile="input/promotions.csv" dbms=csv out=promotions replace; + getnames=yes; + guessingrows=max; +run; + +/* Convert the upstream m/d/yyyy text date into a real SAS date so INTCK works. + The original Code/0_Data_Import.sas relies on PROC IMPORT auto-detection on + a SAS server; we do the conversion explicitly here. */ +data sales; + set sales; + InvoiceDate_num = input(InvoiceDate, mmddyy10.); + format InvoiceDate_num mmddyy10.; + drop InvoiceDate; + rename InvoiceDate_num = InvoiceDate; +run; + +/* Build invoice_total_value the same way Code/1_Data_Pre-Processing.sas does: + basket * products * promotions -> Value_After_Discount, summed per Invoice_ID. */ +proc sort data=basket out=basket_s; + by product_id; +run; +proc sort data=products out=products_s; + by product_id; +run; +data b_p; + merge basket_s (in=a) products_s (in=b); + by product_id; + if a and b; +run; +proc sort data=b_p out=b_p_s; + by promotion_id; +run; +proc sort data=promotions out=promo_s; + by promotion_id; +run; +data b_p_pr; + merge b_p_s (in=a) promo_s (in=b); + by promotion_id; + if a; + Value_After_Discount = (1 - Promotion) * Product_Price * Quantity; +run; +proc means data=b_p_pr noprint nway; + class Invoice_ID; + var Value_After_Discount; + output out=invoice_total_value + sum(Value_After_Discount) = Invoice_Total_Value; +run; diff --git a/jenner-check/t007_rfm_proc_sql/expected.json b/jenner-check/t007_rfm_proc_sql/expected.json new file mode 100644 index 0000000..9d968e8 --- /dev/null +++ b/jenner-check/t007_rfm_proc_sql/expected.json @@ -0,0 +1,16 @@ +{ + "_captured_at": "2026-05-12T00:06:00Z", + "_captured_run_id": "r_019e17fb3c5674e2b59d517c5394cdcb", + "_jenner_version": "0.14.2", + "status": "ok", + "exit_code": 0, + "log_contains": [ + "PROC MEANS", + "Output dataset invoice_total_value has 21 observations", + "Table RFM_Pre created.", + "Wrote RFM_Data (11 rows, 6 columns).", + "PROC PRINT completed: 10 observations printed, 4 variables" + ], + "log_does_not_contain": ["ERROR:", "[JENNER-ERROR"], + "diagnostics": {"parse_warnings": [], "runtime_warnings": []} +} diff --git a/jenner-check/t007_rfm_proc_sql/expected/files.md b/jenner-check/t007_rfm_proc_sql/expected/files.md new file mode 100644 index 0000000..d568081 --- /dev/null +++ b/jenner-check/t007_rfm_proc_sql/expected/files.md @@ -0,0 +1,27 @@ +These URLs come from a specific captured run and expire when that run is reaped on the server. Re-running this bundle through the runner regenerates them against a fresh run_id. + +## Files + +| name | content_type | size_bytes | url | +|------|--------------|------------|-----| +| invoice_total_value | application/octet-stream | 347 | https://api.jenneranalytics.com/v1/run/r_019e17fb3c5674e2b59d517c5394cdcb/files/invoice_total_value | +| listing.txt | text/plain | 446 | https://api.jenneranalytics.com/v1/run/r_019e17fb3c5674e2b59d517c5394cdcb/files/listing.txt | + +## Datasets + +| name | rows | preview_url | +|------|------|-------------| +| b_p | 0 | https://api.jenneranalytics.com/v1/run/r_019e17fb3c5674e2b59d517c5394cdcb/datasets/b_p | +| b_p_pr | 0 | https://api.jenneranalytics.com/v1/run/r_019e17fb3c5674e2b59d517c5394cdcb/datasets/b_p_pr | +| b_p_s | 0 | https://api.jenneranalytics.com/v1/run/r_019e17fb3c5674e2b59d517c5394cdcb/datasets/b_p_s | +| basket | 0 | https://api.jenneranalytics.com/v1/run/r_019e17fb3c5674e2b59d517c5394cdcb/datasets/basket | +| basket_s | 0 | https://api.jenneranalytics.com/v1/run/r_019e17fb3c5674e2b59d517c5394cdcb/datasets/basket_s | +| customers | 0 | https://api.jenneranalytics.com/v1/run/r_019e17fb3c5674e2b59d517c5394cdcb/datasets/customers | +| invoice_total_value | 0 | https://api.jenneranalytics.com/v1/run/r_019e17fb3c5674e2b59d517c5394cdcb/datasets/invoice_total_value | +| products | 0 | https://api.jenneranalytics.com/v1/run/r_019e17fb3c5674e2b59d517c5394cdcb/datasets/products | +| products_s | 0 | https://api.jenneranalytics.com/v1/run/r_019e17fb3c5674e2b59d517c5394cdcb/datasets/products_s | +| promo_s | 0 | https://api.jenneranalytics.com/v1/run/r_019e17fb3c5674e2b59d517c5394cdcb/datasets/promo_s | +| promotions | 0 | https://api.jenneranalytics.com/v1/run/r_019e17fb3c5674e2b59d517c5394cdcb/datasets/promotions | +| rfm_data | 0 | https://api.jenneranalytics.com/v1/run/r_019e17fb3c5674e2b59d517c5394cdcb/datasets/rfm_data | +| rfm_pre | 0 | https://api.jenneranalytics.com/v1/run/r_019e17fb3c5674e2b59d517c5394cdcb/datasets/rfm_pre | +| sales | 0 | https://api.jenneranalytics.com/v1/run/r_019e17fb3c5674e2b59d517c5394cdcb/datasets/sales | \ No newline at end of file diff --git a/jenner-check/t007_rfm_proc_sql/expected/log.txt b/jenner-check/t007_rfm_proc_sql/expected/log.txt new file mode 100644 index 0000000..86aa4a3 --- /dev/null +++ b/jenner-check/t007_rfm_proc_sql/expected/log.txt @@ -0,0 +1,81 @@ +Jenner 0.1.0 (Unlicensed - limited to 100 observations) +Get a license at https://jenneranalytics.com/license + +NOTE: Option OBS changed to 500. +NOTE: PROC IMPORT datafile=input/customers.csv out=customers + +NOTE: Imported 30 rows from input/customers.csv. +NOTE: PROC IMPORT datafile=input/invoice_sales.csv out=sales + +NOTE: Imported 48 rows from input/invoice_sales.csv. +NOTE: PROC IMPORT datafile=input/basket.csv out=basket + +NOTE: Imported 200 rows from input/basket.csv. +NOTE: PROC IMPORT datafile=input/products.csv out=products + +NOTE: Imported 100 rows from input/products.csv. +NOTE: PROC IMPORT datafile=input/promotions.csv out=promotions + +NOTE: Imported 4 rows from input/promotions.csv. +NOTE: DATA sales + + +NOTE: Read 48 rows from sales. +NOTE: Wrote sales (48 rows, 6 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: PROC SORT data=basket + +NOTE: Unlicensed mode - input limited to 100 observations. +NOTE: Read 200 rows from basket. +NOTE: Wrote basket_s (100 rows, 4 columns). +NOTE: NOEQUALS option acknowledged. Note: Jenner uses stable sort, so original order of ties is always preserved. +NOTE: PROC SORT statement used. +NOTE: PROC SORT data=products + +NOTE: Unlicensed mode - input limited to 100 observations. +NOTE: Read 100 rows from products. +NOTE: Wrote products_s (100 rows, 7 columns). +NOTE: NOEQUALS option acknowledged. Note: Jenner uses stable sort, so original order of ties is always preserved. +NOTE: PROC SORT statement used. +NOTE: DATA b_p + +NOTE: Stream 1 processed 100 rows, max BY-group size: 5 (O(1) memory verified) +NOTE: Stream 2 processed 100 rows, max BY-group size: 1 (O(1) memory verified) +NOTE: PROC SORT data=b_p + +NOTE: Unlicensed mode - input limited to 100 observations. +NOTE: Read 100 rows from b_p. +NOTE: Wrote b_p_s (100 rows, 10 columns). +NOTE: NOEQUALS option acknowledged. Note: Jenner uses stable sort, so original order of ties is always preserved. +NOTE: PROC SORT statement used. +NOTE: PROC SORT data=promotions + +NOTE: Unlicensed mode - input limited to 100 observations. +NOTE: Read 4 rows from promotions. +NOTE: Wrote promo_s (4 rows, 2 columns). +NOTE: NOEQUALS option acknowledged. Note: Jenner uses stable sort, so original order of ties is always preserved. +NOTE: PROC SORT statement used. +NOTE: DATA b_p_pr + +NOTE: Stream 1 processed 100 rows, max BY-group size: 60 (O(1) memory verified) +NOTE: Stream 2 processed 4 rows, max BY-group size: 1 (O(1) memory verified) +NOTE: PROC MEANS +NOTE: Output dataset invoice_total_value has 21 observations and 4 variables. +NOTE: PROC MEANS statement used. +NOTE: PROC SQL + +NOTE: Table RFM_Pre created. +NOTE: PROC SQL statement used. +NOTE: DATA RFM_Data + + +NOTE: Read 11 rows from RFM_Pre. +NOTE: Wrote RFM_Data (11 rows, 6 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: PROC PRINT data=RFM_Data + +NOTE: PROC PRINT completed: 10 observations printed, 4 variables diff --git a/jenner-check/t007_rfm_proc_sql/expected/output.txt b/jenner-check/t007_rfm_proc_sql/expected/output.txt new file mode 100644 index 0000000..2b1ae50 --- /dev/null +++ b/jenner-check/t007_rfm_proc_sql/expected/output.txt @@ -0,0 +1,16 @@ + Sample of 10 Customers' RFM Data + +CUSTOMER_ID R F M + 1 2 4 566.3 + 3 2 1 440.969 + 5 12 1 57.54 + 6 5 1 451.952 + 9 5 3 1240.923 + 10 29 2 843.588 + 11 5 1 111.979 + 12 9 3 941.194 + 13 21 3 688.363 + 14 20 1 546.537 + +... 1 more observations (showing 10 of 11) + diff --git a/jenner-check/t007_rfm_proc_sql/input/basket.csv b/jenner-check/t007_rfm_proc_sql/input/basket.csv new file mode 100644 index 0000000..38e7af4 --- /dev/null +++ b/jenner-check/t007_rfm_proc_sql/input/basket.csv @@ -0,0 +1,201 @@ +Invoice_ID,Product_ID,Promotion_ID,Quantity +1,32,1,2 +1,126,1,1 +1,120,1,2 +1,42,1,1 +1,110,1,1 +1,66,1,1 +1,103,1,1 +2,38,4,1 +2,131,1,3 +3,10,3,2 +3,143,1,2 +3,49,1,1 +3,121,1,2 +3,35,1,3 +3,4,3,2 +3,138,1,2 +3,53,1,1 +3,26,4,1 +3,129,1,2 +3,78,1,3 +3,82,1,2 +4,21,1,2 +4,12,3,1 +4,35,1,1 +4,45,1,2 +5,82,1,1 +6,47,1,3 +6,49,1,1 +6,132,1,2 +6,36,1,1 +6,56,1,2 +6,108,1,1 +6,100,2,2 +6,93,1,3 +6,47,1,1 +6,81,1,2 +6,119,1,2 +6,75,4,2 +6,75,4,1 +6,31,2,3 +6,136,1,1 +6,137,1,1 +6,72,1,1 +6,106,1,2 +6,6,1,2 +10,19,1,3 +11,112,1,3 +11,26,1,1 +11,70,1,1 +11,56,3,1 +11,74,1,1 +11,122,1,2 +11,53,1,3 +11,92,1,2 +11,14,1,1 +11,7,2,2 +11,62,3,2 +11,88,1,2 +11,53,1,2 +11,55,1,3 +11,74,1,3 +12,93,1,2 +12,122,1,2 +14,46,1,3 +14,107,1,1 +14,112,2,1 +14,121,1,2 +14,129,2,3 +14,42,1,1 +14,85,4,2 +14,100,4,3 +14,131,3,3 +14,66,3,3 +14,109,4,1 +14,63,2,3 +14,133,1,3 +14,25,1,1 +14,53,3,2 +14,118,1,1 +14,98,1,3 +14,114,1,1 +14,99,3,1 +15,21,1,3 +16,72,1,1 +16,77,1,1 +16,116,3,1 +16,57,1,3 +16,17,1,3 +16,27,3,1 +16,3,1,2 +16,33,3,2 +16,1,1,2 +16,29,1,2 +16,61,1,1 +16,82,1,2 +16,94,1,2 +16,138,1,2 +16,2,1,3 +16,99,1,3 +16,129,4,3 +16,130,4,2 +16,93,1,2 +16,113,1,2 +16,109,3,2 +16,85,1,1 +16,86,2,1 +16,32,4,1 +16,37,2,2 +16,108,1,3 +16,126,1,1 +16,26,4,3 +16,139,1,3 +18,45,1,1 +18,19,1,3 +18,97,1,3 +18,23,2,3 +18,28,1,1 +18,73,1,3 +18,46,4,2 +18,53,3,2 +18,61,1,3 +18,43,1,3 +18,19,3,1 +18,28,1,1 +18,103,2,2 +19,105,1,1 +19,12,2,1 +19,141,4,2 +19,56,2,3 +19,130,1,3 +19,24,1,2 +19,21,4,3 +20,131,1,1 +20,56,4,1 +20,68,1,3 +21,39,1,3 +21,132,1,3 +21,108,1,2 +21,67,1,3 +21,117,4,1 +22,19,1,3 +22,64,3,2 +22,118,1,3 +22,47,2,3 +22,97,1,1 +22,71,2,2 +22,9,2,3 +22,62,1,3 +22,67,1,3 +22,19,1,1 +22,101,1,2 +22,116,1,1 +22,137,1,1 +23,91,1,3 +23,123,1,2 +23,81,1,2 +23,114,2,2 +23,9,2,1 +23,136,1,2 +23,45,4,1 +23,123,1,2 +23,62,1,3 +23,24,2,2 +23,124,4,2 +25,61,1,2 +25,35,2,2 +25,46,1,1 +25,101,4,1 +25,98,1,3 +25,1,4,1 +25,6,2,2 +25,85,1,1 +25,127,1,1 +25,66,1,2 +26,60,1,2 +27,61,2,3 +27,116,4,3 +27,81,1,2 +27,66,1,1 +27,57,1,3 +27,5,2,3 +27,96,1,1 +27,39,4,3 +27,128,1,1 +27,92,2,1 +27,143,1,2 +28,62,1,3 +28,98,1,2 +28,108,2,1 +28,7,2,3 +28,82,1,1 +28,24,1,2 +28,11,4,1 +28,69,1,3 +28,12,1,3 +28,73,1,1 +28,26,1,2 +28,80,1,1 +28,66,1,1 +32,59,4,3 diff --git a/jenner-check/t007_rfm_proc_sql/input/customers.csv b/jenner-check/t007_rfm_proc_sql/input/customers.csv new file mode 100644 index 0000000..c3b9b99 --- /dev/null +++ b/jenner-check/t007_rfm_proc_sql/input/customers.csv @@ -0,0 +1,31 @@ +customer_id,Last_name,First_name,Address,Country,Postal_Code,City,Region,Gender,Day_Of_Birth,Month_Of_Birth,Year_Of_Birth +1,Johnson,Stanley,135 Waymore Blvd.,Brazil,14409,franca,SP,M,3,11,1955 +2,Cramer,Henry,9483 49th St.,Brazil,9790,sao bernardo do campo,SP,M,3,2,1981 +3,Von Brocken,Owen,2843 Drummin Lane,Brazil,1151,sao paulo,SP,M,1,10,1993 +4,Hoover,Wilhelm,135 Poplar St.,Brazil,8775,mogi das cruzes,SP,F,3,2,1975 +5,Von Stubben,Vern,2839 5th Ave.,Brazil,13056,campinas,SP,F,2,8,2017 +6,Anthony,Terry,2932 50th St.,Brazil,89254,jaragua do sul,SC,F,1,5,1992 +7,Smith,Marian,27391 Overton Dr.,Brazil,4534,sao paulo,SP,F,3,4,1967 +8,Ali,Jennifer,3489 Waverly St.,Brazil,35182,timoteo,MG,F,2,9,1992 +9,Pearl,Ramona,2384 Park Ct.,Brazil,81560,curitiba,PR,F,1,11,1976 +10,Hederson,Walter,2932 Waverly St.,Brazil,30575,belo horizonte,MG,F,1,3,1994 +11,Pearl,Winifred,8293 Polar Ave,Brazil,39400,montes claros,MG,M,2,6,1967 +12,Anthony,Arun,135 Wicker Way,Brazil,20231,rio de janeiro,RJ,M,2,11,2008 +13,Mueller,Jennifer,4839 73rd Ave.,Brazil,18682,lencois paulista,SP,M,3,1,1945 +14,Pearl,Ralph,283 Highview Blvd.,Brazil,5704,sao paulo,SP,F,2,5,2021 +15,Taft,Brad,2838 Becker Way,Brazil,95110,caxias do sul,RS,M,3,1,1977 +16,Goldbloom,Xia,3838 Mortimer Ct.,Brazil,13412,piracicaba,SP,M,2,10,1965 +17,Wellington,Melissa,2839 Seasame Ln.,Brazil,22750,rio de janeiro,RJ,M,2,1,1970 +18,Sipulski,Henry,3489 45th St.,Brazil,7124,guarulhos,SP,M,3,11,1984 +19,Winstrom,Walter,2833 39th Ave.,Brazil,5416,sao paulo,SP,M,2,6,1999 +20,Alvarez,Douglas,2843 Waverly St.,Brazil,68485,pacaja,PA,F,1,9,2020 +21,Petrov,Cassie,2939 73rd Ave.,Brazil,88034,florianopolis,SC,M,3,3,1990 +22,Hoover,Zeb,7472 Anchor St.,Brazil,74914,aparecida de goiania,GO,M,2,10,1969 +23,Roosevelt,Nels,135 23rd St.,Brazil,5713,sao paulo,SP,M,2,1,1967 +24,Allens,Nels,2839 Hwy 45,Brazil,82820,curitiba,PR,F,3,4,1967 +25,Popovich,Ralph,828 Seasame Ln.,Brazil,8225,sao paulo,SP,M,1,8,1990 +26,Sipulski,Ramona,8232 Baker Ave.,Brazil,9121,santo andre,SP,M,1,1,1991 +27,Roberts,Edward,2843 Birch St.,Brazil,74310,goiania,GO,F,2,3,1984 +28,Popovich,Xia,2843 5th Ave.,Brazil,4571,sao paulo,SP,M,1,5,1991 +29,Hokah,Ramona,283 Mycroft Ln.,Brazil,29311,cachoeiro de itapemirim,ES,M,2,8,2001 +30,Glinkov,Greg,29838 Seasame Ln.,Brazil,5528,sao paulo,SP,M,2,5,1995 diff --git a/jenner-check/t007_rfm_proc_sql/input/invoice_sales.csv b/jenner-check/t007_rfm_proc_sql/input/invoice_sales.csv new file mode 100644 index 0000000..ff0e4ef --- /dev/null +++ b/jenner-check/t007_rfm_proc_sql/input/invoice_sales.csv @@ -0,0 +1,49 @@ +Invoice_ID;InvoiceNo;InvoiceDate;Customer_ID;Payment_Method;Operation +1;539730;12/21/2010;1;4;Sale +2;552969;5/12/2011;1;4;Sale +3;577382;11/18/2011;1;1;Sale +4;580363;12/2/2011;1;4;Sale +5;550837;4/20/2011;2;4;Sale +6;579768;11/30/2011;3;1;Sale +10;567972;9/22/2011;5;1;Sale +11;575586;11/9/2011;6;2;Sale +12;546980;3/18/2011;8;2;Sale +14;557117;6/16/2011;9;3;Sale +15;557251;6/17/2011;9;4;Sale +16;575924;11/11/2011;9;1;Sale +18;545702;3/6/2011;10;1;Sale +19;554284;5/23/2011;10;1;Sale +20;575211;11/8/2011;11;2;Sale +21;551287;4/27/2011;12;3;Sale +22;560555;7/19/2011;12;4;Sale +23;571292;10/14/2011;12;3;Sale +25;557631;6/21/2011;13;1;Sale +26;557744;6/21/2011;13;2;Sale +27;561030;7/22/2011;13;3;Sale +28;561701;7/28/2011;14;3;Sale +32;539351;12/17/2010;16;4;Sale +33;547397;3/22/2011;16;3;Sale +34;555103;5/31/2011;17;2;Sale +35;566207;9/9/2011;17;4;Sale +37;555721;6/6/2011;18;2;Sale +38;568402;9/26/2011;18;1;Sale +39;543352;2/7/2011;19;4;Sale +40;562733;8/8/2011;19;4;Sale +41;578849;11/25/2011;19;3;Sale +43;539421;12/17/2010;20;4;Sale +44;553897;5/19/2011;20;1;Sale +45;563188;8/12/2011;20;3;Sale +46;573916;11/1/2011;20;4;Sale +48;569550;10/4/2011;22;4;Sale +49;545869;3/7/2011;23;4;Sale +50;557852;6/22/2011;23;4;Sale +51;564510;8/25/2011;23;1;Sale +53;537242;12/6/2010;24;3;Sale +54;548165;3/29/2011;24;3;Sale +55;551845;5/4/2011;24;2;Sale +56;560328;7/17/2011;24;4;Sale +57;567344;9/19/2011;24;4;Sale +58;537881;12/9/2010;25;4;Sale +59;553462;5/16/2011;26;3;Sale +60;545064;2/27/2011;27;4;Sale +61;565387;9/2/2011;27;1;Sale diff --git a/jenner-check/t007_rfm_proc_sql/input/products.csv b/jenner-check/t007_rfm_proc_sql/input/products.csv new file mode 100644 index 0000000..f6cad8c --- /dev/null +++ b/jenner-check/t007_rfm_proc_sql/input/products.csv @@ -0,0 +1,101 @@ +Product_ID,Product line,Product type,Product,SKU,Product_Origin,Product_Price +1,Camping Equipment,Cooking Gear,TrailChef Water Bag,4055095887499070000,1,17.79 +2,Camping Equipment,Cooking Gear,TrailChef Canteen,4055095897377050000,2,24.92 +3,Camping Equipment,Cooking Gear,TrailChef Kitchen Kit,4056008039577040000,4,34.38 +4,Camping Equipment,Cooking Gear,TrailChef Cup,4056008079584050000,2,21.64 +5,Camping Equipment,Cooking Gear,TrailChef Cook Set,4056008079591060000,1,25.98 +6,Camping Equipment,Cooking Gear,TrailChef Deluxe Cook Set,4055095999441030000,2,32.49 +7,Camping Equipment,Cooking Gear,TrailChef Single Flame,4055095979496030000,4,28.54 +8,Camping Equipment,Cooking Gear,TrailChef Double Flame,4055095989526060000,3,28.93 +9,Camping Equipment,Cooking Gear,TrailChef Kettle,4055095990359060000,3,15.26 +10,Camping Equipment,Cooking Gear,TrailChef Utensils,4055095910397050000,3,32.24 +11,Camping Equipment,Tents,Star Lite,4055095910502060000,2,28.17 +12,Camping Equipment,Tents,Star Dome,4052805633062080000,5,36.98 +13,Camping Equipment,Tents,Star Gazer 2,4052805693123080000,5,50.05 +14,Camping Equipment,Tents,Star Gazer 3,4052805683161080000,4,48.44 +15,Camping Equipment,Tents,Star Gazer 6,4055095578490050000,5,32.01 +16,Camping Equipment,Tents,Star Peg,4055095528520080000,5,29.45 +17,Camping Equipment,Sleeping Bags,Hibernator Lite,4054202337561030000,5,27.18 +18,Camping Equipment,Sleeping Bags,Hibernator,4055095376104050000,4,31.23 +19,Camping Equipment,Sleeping Bags,Hibernator Extreme,4055095395658180000,3,19.18 +20,Camping Equipment,Sleeping Bags,Hibernator Self - Inflating Mat,4054202166218110000,4,28.13 +21,Camping Equipment,Sleeping Bags,Hibernator Pad,4054202347539030000,4,50.72 +22,Camping Equipment,Sleeping Bags,Hibernator Pillow,4055095517868010000,4,41.21 +23,Camping Equipment,Sleeping Bags,Hibernator Camp Cot,4055095551627020000,3,41.11 +24,Camping Equipment,Packs,Canyon Mule Climber Backpack,4055095531634030000,5,16.62 +25,Camping Equipment,Packs,Canyon Mule Weekender Backpack,4055095581641040000,3,18.92 +26,Camping Equipment,Packs,Canyon Mule Journey Backpack,4055095551658050000,4,18.18 +27,Camping Equipment,Packs,Canyon Mule Extreme Backpack,4055095561863020000,2,30.34 +28,Camping Equipment,Packs,Canyon Mule Cooler,4055095541870030000,4,36.55 +29,Camping Equipment,Packs,Canyon Mule Carryall,4055095541887040000,1,51.48 +30,Camping Equipment,Lanterns,Firefly Lite,4055095571894050000,4,13.8 +31,Camping Equipment,Lanterns,Firefly Mapreader,4055095390326060000,3,34.77 +32,Camping Equipment,Lanterns,Firefly 2,4055095218579070000,5,35.35 +33,Camping Equipment,Lanterns,Firefly 4,4055095238791070000,5,38.57 +34,Camping Equipment,Lanterns,Firefly Extreme,4055095268920040000,5,33.82 +35,Camping Equipment,Lanterns,Firefly Multi-light,4055095228937050000,2,20.68 +36,Camping Equipment,Lanterns,EverGlow Single,4053641734341100000,2,29.28 +37,Camping Equipment,Lanterns,EverGlow Double,4043179848058010000,5,11.68 +38,Camping Equipment,Lanterns,EverGlow Kerosene,4055095264213060000,2,41.26 +39,Camping Equipment,Lanterns,EverGlow Butane,4055095214862020000,3,37.52 +40,Camping Equipment,Lanterns,EverGlow Lamp,4055095245036070000,4,23.45 +41,Camping Equipment,Lanterns,Flicker Lantern,4055095225197050000,2,30.64 +42,Personal Accessories,Watches,Mountain Man Analog,4055095232905070000,2,21.94 +43,Personal Accessories,Watches,Mountain Man Digital,4055095253087070000,2,33.91 +44,Personal Accessories,Watches,Mountain Man Deluxe,4260209870930010000,2,32.55 +45,Personal Accessories,Watches,Mountain Man Combination,4055095494439130000,1,28.96 +46,Personal Accessories,Watches,Mountain Man Extreme,4260209860947010000,1,41.6 +47,Personal Accessories,Watches,Venue,4027535829815030000,1,35.5 +48,Personal Accessories,Watches,Infinity,4027535869815030000,1,32.74 +49,Personal Accessories,Watches,Lux,4027535859815030000,2,17.69 +50,Personal Accessories,Watches,Sam,4027535889815030000,4,37.05 +51,Personal Accessories,Watches,TX,4027535829815030000,5,32 +52,Personal Accessories,Watches,Legend,4027535819815030000,1,8.15 +53,Personal Accessories,Watches,Kodiak,4027535829815030000,1,25.29 +54,Personal Accessories,Watches,Zodiak,4027535889815030000,1,28.26 +55,Personal Accessories,Eyewear,Polar Sun,4027535819815030000,4,22.63 +56,Personal Accessories,Eyewear,Polar Ice,4027535839976040000,1,46.57 +57,Personal Accessories,Eyewear,Polar Sports,4027535899976040000,3,46.45 +58,Personal Accessories,Eyewear,Polar Wave,4027535829976040000,2,23.35 +59,Personal Accessories,Eyewear,Polar Extreme,4027535859976040000,1,41.1 +60,Personal Accessories,Eyewear,Bella,4027535819976040000,1,32.58 +61,Personal Accessories,Eyewear,Capri,4027535889976040000,4,19.48 +62,Personal Accessories,Eyewear,Cat Eye,4027535839976040000,3,46.69 +63,Personal Accessories,Eyewear,Dante,4027535829976040000,5,42.74 +64,Personal Accessories,Eyewear,Fairway,4027535899976040000,5,24.65 +65,Personal Accessories,Eyewear,Inferno,4027535870835040000,3,38.79 +66,Personal Accessories,Eyewear,Maximus,4027535810835040000,5,31.2 +67,Personal Accessories,Eyewear,Trendi,4027535810866070000,4,38.56 +68,Personal Accessories,Eyewear,Zone,4027535820866070000,1,26.46 +69,Personal Accessories,Eyewear,Hawk Eye,4055095510419040000,5,32.65 +70,Personal Accessories,Eyewear,Retro,4049466237018080000,5,27.66 +71,Personal Accessories,Knives,Single Edge,4049466253126080000,3,22.78 +72,Personal Accessories,Knives,Double Edge,4049466155551080000,3,44.12 +73,Personal Accessories,Knives,Edge Extreme,4053089341760070000,5,31.08 +74,Personal Accessories,Knives,Bear Edge,4055095417941070000,1,26.91 +75,Personal Accessories,Knives,Bear Survival Edge,4055095555701030000,2,38.28 +76,Personal Accessories,Knives,Max Gizmo,4049466842035030000,3,25.06 +77,Personal Accessories,Knives,Pocket Gizmo,4053641759781010000,1,22.65 +78,Personal Accessories,Binoculars,Seeker 35,4053641749736050000,4,43.29 +79,Personal Accessories,Binoculars,Seeker 50,4052906087069070000,2,16.94 +80,Personal Accessories,Binoculars,Seeker Extreme,4052906037069070000,2,40.17 +81,Personal Accessories,Binoculars,Seeker Mini,4052906097069070000,1,33.96 +82,Personal Accessories,Binoculars,Opera Vision,4052906077069070000,2,33.31 +83,Personal Accessories,Binoculars,Ranger Vision,4052906027069070000,3,42.3 +84,Personal Accessories,Navigation,Glacier Basic,4052906077069070000,2,34.3 +85,Personal Accessories,Navigation,Glacier Deluxe,4052072061238070000,4,35.67 +86,Personal Accessories,Navigation,Glacier GPS,4052072021238070000,5,26.05 +87,Personal Accessories,Navigation,Glacier GPS Extreme,4052072041238070000,3,29.68 +88,Personal Accessories,Navigation,Trail Master,4055095368286020000,5,40.53 +89,Personal Accessories,Navigation,Trail Scout,4055095378873030000,3,37.37 +90,Personal Accessories,Navigation,Trail Star,4054202157047070000,4,35.43 +91,Personal Accessories,Navigation,Astro Pilot,4030864741194030000,3,20.36 +92,Personal Accessories,Navigation,Sky Pilot,4053227165466070000,5,30.2 +93,Personal Accessories,Navigation,Auto Pilot,4055095516572070000,2,23.11 +94,Outdoor Protection,Insect Repellents,BugShield Natural,4055095446550050000,5,47.14 +95,Outdoor Protection,Insect Repellents,BugShield Spray,4055095476604040000,2,17.72 +96,Outdoor Protection,Insect Repellents,BugShield Lotion Lite,4055095486611050000,4,34.54 +97,Outdoor Protection,Insect Repellents,BugShield Lotion,4055382374854060000,5,15.72 +98,Outdoor Protection,Insect Repellents,BugShield Extreme,4055095539176080000,2,22.58 +99,Outdoor Protection,Sunscreen,Sun Blocker,4053641943393070000,1,33.31 +100,Outdoor Protection,Sunscreen,Sun Shelter Stick,4053641936578040000,4,14.89 diff --git a/jenner-check/t007_rfm_proc_sql/input/promotions.csv b/jenner-check/t007_rfm_proc_sql/input/promotions.csv new file mode 100644 index 0000000..d761ec4 --- /dev/null +++ b/jenner-check/t007_rfm_proc_sql/input/promotions.csv @@ -0,0 +1,5 @@ +Promotion_ID,Promotion +1,0 +2,0.1 +3,0.2 +4,0.3 diff --git a/jenner-check/t007_rfm_proc_sql/meta.json b/jenner-check/t007_rfm_proc_sql/meta.json new file mode 100644 index 0000000..8ed4c14 --- /dev/null +++ b/jenner-check/t007_rfm_proc_sql/meta.json @@ -0,0 +1,8 @@ +{ + "bundle": "t007_rfm_proc_sql", + "source_file": "Code/6_RFM_Data_Creation.sas", + "source_blob_sha": "99e7d53fb0ce555617f937bdf9b96baa2c59989d", + "source_commit": "6b4ee1a908440892de3523a5bec3402430fbf56e", + "tier": "real_data", + "notes": "Customers (30 rows), 48 sales invoices for those customers, basket trimmed to 200 lines, full promotions, products. Autoexec rebuilds invoice_total_value via the same basket*products*promotions pipeline the upstream uses, since this script depends on it. The PROC SQL splits frequency / monetary / last-invoice-date out as columns, with the INTCK recency computed in a follow-up DATA step. Exercises PROC SQL with INNER JOIN, MAX/COUNT/SUM, GROUP BY, plus the INTCK date function in the DATA step." +} diff --git a/jenner-check/t007_rfm_proc_sql/script.sas b/jenner-check/t007_rfm_proc_sql/script.sas new file mode 100644 index 0000000..29b0e81 --- /dev/null +++ b/jenner-check/t007_rfm_proc_sql/script.sas @@ -0,0 +1,35 @@ +/******************************************************************************** + * Adapted from Code/6_RFM_Data_Creation.sas (lines 6-22). + * + * Builds a per-customer RFM (Recency / Frequency / Monetary) table from + * customers + sales + invoice_total_value. The upstream version computes R + * inside a single PROC SQL; here the SQL produces customer-level + * MAX(InvoiceDate), COUNT, and SUM, and a follow-up DATA step turns the date + * into a week-distance recency value with INTCK. The arithmetic and the + * grouping are identical to the upstream. + *******************************************************************************/ + +/* First, aggregate frequency, monetary, and most-recent-invoice date per customer */ +proc sql; + create table RFM_Pre as + select a.Customer_ID, + max(b.InvoiceDate) as Last_Invoice format=mmddyy10., + count(b.Invoice_ID) as F, + sum(Invoice_Total_Value) as M + from customers a + inner join sales b on a.customer_id = b.customer_id + inner join invoice_total_value c on b.invoice_id = c.invoice_id + group by a.Customer_ID; +quit; + +/* Then derive recency in weeks from the reference cutoff date */ +data RFM_Data; + set RFM_Pre; + R = intck('WEEK', Last_Invoice, '16dec2011'd, 'C'); + T = -1; +run; + +proc print data=RFM_Data(obs=10) noobs; + var Customer_ID R F M; + title "Sample of 10 Customers' RFM Data"; +run;