Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
b011aa3
Merge branch 'develop' into SUBMISSION-122
DavidLFielding May 18, 2026
6719318
Merge branch 'develop' into SUBMISSION-122
DavidLFielding May 20, 2026
6bed6d2
Merge branch 'develop' into SUBMISSION-122
DavidLFielding May 21, 2026
379407e
Improve PDF preview reliability: raise NotFound (route now redirects …
DavidLFielding May 26, 2026
b946aa9
Add /preview_not_available and /submission_agreement.pdf routes, and …
DavidLFielding May 26, 2026
a98b55d
Add preview_ready gate (submitter_confirmed_preview AND PDF actually …
DavidLFielding May 26, 2026
f00a3cd
Rewrite final_preview.html for the v5 Confirm-and-Submit mockup: Subm…
DavidLFielding May 26, 2026
eb53330
Update final_preview browser tab title to match v5 mockup: 'Preview a…
DavidLFielding May 26, 2026
cc3efea
Rename FinalPreview tab label and title for v5 mockup: display "Previ…
DavidLFielding May 26, 2026
eee8099
Add support to enable Submit following preview of PDF. Parameterize s…
DavidLFielding May 26, 2026
18f6c38
Two Confirm-page UX polish CSS additions: force gray + not-allowed cu…
DavidLFielding May 26, 2026
661cf87
New preview_not_available.html template rendered when /preview.pdf re…
DavidLFielding May 26, 2026
61f44e0
Add submission_agreement controller for the Confirm-page Download Sub…
DavidLFielding May 26, 2026
5f0f0c6
Handle the edge case where a file is copied into the bucket: reload B…
DavidLFielding May 26, 2026
3843908
Update uv.lock for the reportlab dependency added in the submission_a…
DavidLFielding May 26, 2026
39fd830
Adjust page title due to v5 mockup updates. [SUBMISSION-122] David
DavidLFielding May 26, 2026
eea48e5
Updated per v5 mockups. Fixed notifications messages. Removed unknown…
DavidLFielding May 27, 2026
9a8edaf
Add Download Source Package endpoint: source_package.py controller bu…
DavidLFielding May 29, 2026
438442f
Review Files sidebar: switch to v5 info_container_middle block carryi…
DavidLFielding May 29, 2026
ee96dd0
Added graceful failure iwarning when preflight is not available inste…
DavidLFielding Jun 1, 2026
ca59dbd
Review Files form rewrite of main content into v5-style cards echoing…
DavidLFielding Jun 2, 2026
184e595
Merge branch 'develop' into SUBMISSION-150
DavidLFielding Jun 2, 2026
18dab7c
Update expected error string after recent updates to the failure page…
DavidLFielding Jun 2, 2026
a917381
Reenable condition for process stage. [SUBMISSION-150] David
DavidLFielding Jun 5, 2026
f7207fe
Disable debugging. [SUNBMISSION-150] David
DavidLFielding Jun 5, 2026
6c8e03a
Restore submit_overrides.css and switch Review Files to flat metadata…
DavidLFielding Jun 8, 2026
1b52bdc
Add back container outline for Review Files page. Wrap Review Files f…
DavidLFielding Jun 8, 2026
2f60a1c
Merge develop into SUBMISSION-150. Resolve conflicts across gs_file_s…
DavidLFielding Jun 11, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ dependencies = [
"pytz==2018.7",
"pyyaml==6.0.2",
"referencing==0.35.1",
"reportlab>=4.2.0",
"requests==2.32.3",
"requests-toolbelt==1.0.0",
"retry==0.9.2",
Expand Down
24 changes: 23 additions & 1 deletion submit_ce/implementations/file_store/gs_file_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,18 @@ def get_preview(self, submission_id: str) -> FileObj:
preview_path = self._preview_path(submission_id)
preview = self.bucket.blob(preview_path)
if preview.exists():
# bucket.blob() returns a local reference with empty _properties;
# .exists() does a HEAD but doesn't populate metadata. Without
# reload(), preview.size and preview.crc32c are both None and
# the route layer's set_etag()/Content-Length header crash on
# None. This matters in particular for PDFs that arrived in
# the bucket via something other than our store_preview() path
# (e.g., hand-copied for testing).
try:
preview.reload()
except Exception as exc:
logger.debug("preview reload failed for %s: %s",
preview_path, exc)
return preview
else:
return FileDoesNotExist(preview_path)
Expand Down Expand Up @@ -464,8 +476,18 @@ def does_source_log_exist(self, submission_id: str) -> bool:
return self.bucket.blob(self._source_log_path(submission_id)).exists()

def _get_checksum(self, path: str) -> str:
"""Return the crc32c checksum of the blob at ``path``.

Returns an empty string when the blob doesn't exist or has no
crc32c (rather than ``None``), so callers feeding the value
into ``Response.set_etag()`` and ``Content-Length`` headers
don't crash. This matters in particular for blobs that arrived
in the bucket via something other than our upload paths (e.g.
hand-copied for testing), which may lack a crc32c on the
object.
"""
item = self.bucket.get_blob(path)
return item.crc32c if item is not None else ""
return (item.crc32c or "") if item is not None else ""

def _submission_path(self, submission_id: str) -> str:
"""Gets GS filesystem structure ex /{rootdir}/{first 4 digits of submission id}/{submission id}"""
Expand Down
33 changes: 32 additions & 1 deletion submit_ce/ui/controllers/new/final.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,24 @@ def finalize(method: str, params: MultiDict, session: Session,

form = FinalizationForm(params)

# Check whether a preview PDF actually exists in the bucket. The
# persisted submitter_confirmed_preview flag can drift from file-store
# reality (e.g., a PDF was removed out-of-band, a file-change event
# missed resetting the flag, or in dev when state is manipulated
# directly). The Confirm page should gate Submit on what's actually
# in the bucket, not just the persisted flag, so an absent PDF can
# never produce an enabled Submit button.
fstore = current_app.api.get_file_store()
preview_exists = fstore.does_preview_exist(str(submission_id))
preview_ready = bool(submission.submitter_confirmed_preview
and preview_exists)
logger.info(
"finalize: submission=%s confirmed_preview=%s preview_exists=%s "
"preview_ready=%s",
submission_id, submission.submitter_confirmed_preview,
preview_exists, preview_ready,
)

# The abs preview macro expects a specific struct for submission history.
# TODO submission.versions removed, what do do in final?
# submission_history = [{'submitted_date': s.created, 'version': s.version}
Expand All @@ -47,11 +65,24 @@ def finalize(method: str, params: MultiDict, session: Session,
'submission': submission,
'submitter': submitter,
'submission_history': submission_history,
'preview_ready': preview_ready,
'preview_exists': preview_exists,
}

# Only treat this POST as an actual submit attempt when the form's
# "next" action was used. The Confirm form is shared by the nav bar's
# "Go Back" and "Save & Exit" buttons too -- those also POST the form
# (so the CSRF token comes along) but they're navigation actions, not
# the submit action. If the user has the proofread checkbox ticked and
# then clicks Go Back, we must NOT fire FinalizeSubmission; flow_control
# is supposed to redirect them to the previous step instead.
action = (params.get('action') or '').strip()
is_submit_action = action == 'next'

command = FinalizeSubmission(creator=submitter)
proofread_confirmed = form.proceed.data
if method == 'POST' and form.validate() \
if method == 'POST' and is_submit_action \
and form.validate() \
and proofread_confirmed \
and validate_command(form, command, submission):
try:
Expand Down
155 changes: 152 additions & 3 deletions submit_ce/ui/controllers/new/preview.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,172 @@
"""Controller for serving the compiled PDF preview for a submission."""
"""Controller for serving the compiled PDF preview for a submission.

The single entry point :func:`file_preview` is wired to the
``/<submission_id>/preview.pdf`` route. It serves the compiled PDF (or
raises a friendly 404 when the PDF doesn't exist yet) and fires the
domain events needed to record that the submitter has viewed the
preview.
"""

import io
import logging
from http import HTTPStatus as status
from typing import Tuple, Dict, Any

from flask import current_app
from arxiv.auth.domain import Session
from arxiv.files import FileDoesNotExist
from werkzeug.exceptions import NotFound

from ...auth import user_and_client_from_session
from submit_ce.domain.event import ConfirmSourceProcessed, ConfirmPreview
from submit_ce.ui.backend import get_submission

from ...auth import user_and_client_from_session


logger = logging.getLogger(__name__)


def file_preview(params, session: Session, submission_id: str, token: str,
**kwargs: Any) -> Tuple[io.BytesIO, int, Dict[str, str]]:
"""Serve the PDF preview for a submission."""
"""Serve the PDF preview for a submission.

Raises
------
werkzeug.exceptions.NotFound
If no preview PDF exists for this submission yet (e.g., the Process
step has not run or compilation has not produced a PDF). Flask
renders this as a 404 response, which is a much friendlier failure
mode than the raw ``Exception("File does not exist")`` that
:class:`arxiv.files.FileDoesNotExist` would otherwise raise when the
route tries to ``open()`` the missing file.

Side effect: when the served PDF matches the current submission preview and
the submitter has not yet confirmed it (or has confirmed a stale checksum),
fire a ``ConfirmPreview`` event so the submitter is treated as having
reviewed the PDF. This mirrors legacy Submit 1.x behavior where opening
the PDF marked ``viewed=1`` on the submission row, which then enables the
Submit button on the Confirm page after a refresh.
"""
submitter, client = user_and_client_from_session(session)
submission, submission_events = get_submission(submission_id)
fstore = current_app.api.get_file_store()

# Check first that a preview PDF actually exists. Without this, the
# downstream ``send_file(stream.open('rb'), ...)`` in the route raises a
# generic Exception and the user sees a 500 stack trace in the new tab.
# Common cause: edge case where compilation is not working and submitter
# has Confirm and Submit page open.
stream = fstore.get_preview(submission.submission_id)
if isinstance(stream, FileDoesNotExist) or not stream.exists():
logger.info(
"PDF preview requested but not available for submission %s",
submission.submission_id,
)
raise NotFound(
"The PDF preview is not yet available. Please return to the "
"Process step, wait for compilation to finish, and try again. "
"If processing failed, you may need to fix your source files "
"and reprocess."
)

pdf_checksum = fstore.get_preview_checksum(submission.submission_id)

# ---- diagnostic logging ------------------------------------------
# Verbose INFO logging so the operator can trace exactly what the
# preview-view side effect did. If you're staring at the server
# console wondering why the Submit button is still grayed out,
# these are the lines to grep for.
preview_set = submission.preview is not None
preview_ck = submission.preview.preview_checksum if preview_set else None
logger.info(
"file_preview: submission=%s pdf_checksum=%r preview_set=%s "
"preview_checksum=%r confirmed_preview=%s",
submission.submission_id, pdf_checksum, preview_set, preview_ck,
submission.submitter_confirmed_preview,
)

# Build the list of events to save when the user views the PDF.
#
# ConfirmPreview's domain validation requires submission.preview to be
# set and to match preview_checksum -- otherwise it raises
# InvalidEvent("Preview not set on submission"). The legacy DB only
# persists submitter_confirmed_preview (via the `viewed` column); the
# Preview dataclass itself is not stored. That means reading the
# submission back with get_submission() after firing
# ConfirmSourceProcessed loses preview state, and any subsequent
# ConfirmPreview save would fail validation.
#
# Workaround: pass both events to a single save() call. The save loop
# applies events sequentially with `before = after`, so ConfirmPreview
# sees the in-memory submission with preview set by
# ConfirmSourceProcessed and validates cleanly.
#
# When to self-heal ConfirmSourceProcessed:
# - submission.preview is None (process step not run, hand-copied
# PDF, lost domain event), OR
# - preview_checksum has drifted (source was reprocessed or replaced
# but the old in-memory state is stale).
# In production this path is rare; logged at WARNING for visibility.
needs_source_processed = bool(pdf_checksum) and (
submission.preview is None
or submission.preview.preview_checksum != pdf_checksum
)
needs_confirm = (
bool(pdf_checksum)
and not submission.submitter_confirmed_preview
)
logger.info(
"file_preview: needs_source_processed=%s needs_confirm=%s",
needs_source_processed, needs_confirm,
)

events_to_save = []
if needs_source_processed:
events_to_save.append(
ConfirmSourceProcessed(
creator=submitter,
client=client,
source_id=-1, # unknown for hand-copied PDFs
source_checksum='', # unknown for hand-copied PDFs
preview_checksum=pdf_checksum,
size_bytes=getattr(stream, 'size', None) or 0,
added=getattr(stream, 'updated', None),
)
)
if needs_confirm:
events_to_save.append(
ConfirmPreview(creator=submitter, client=client,
preview_checksum=pdf_checksum)
)

if events_to_save:
try:
current_app.api.save(
*events_to_save,
submission_id=submission.submission_id,
)
if needs_source_processed:
logger.warning(
"Self-healed missing/stale ConfirmSourceProcessed for "
"submission %s from served PDF (checksum=%s). "
"Investigate the upstream Process step.",
submission.submission_id, pdf_checksum,
)
logger.info(
"file_preview: saved %d event(s) for submission %s: %s",
len(events_to_save), submission.submission_id,
[e.__class__.__name__ for e in events_to_save],
)
except Exception as exc:
# Don't silently swallow. The PDF still streams to the browser
# via the return below, but logging at ERROR with stack trace
# makes the failure obvious in the server console.
logger.exception(
"PDF-view event save failed for submission %s "
"(events=%s): %s",
submission.submission_id,
[e.__class__.__name__ for e in events_to_save], exc,
)

headers = {'Content-Type': 'application/pdf', 'ETag': pdf_checksum}
return stream, status.OK, headers
26 changes: 24 additions & 2 deletions submit_ce/ui/controllers/new/review.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from flask import current_app
from arxiv.auth.domain import Session
from arxiv.base import alerts
from markupsafe import Markup
from submit_ce.domain.event.process import (
SetDecisions,
SetDirectivesAndCleanup,
Expand Down Expand Up @@ -169,8 +170,17 @@ def review_files(method: str, params: MultiDict, session: Session,
)

if preflight_data is None:
# Preflight is what populates compiler choices, top-level TeX
# candidates, and per-file usage notes. Without it, the form
# below has nothing to render -- the template hides the form
# sections when file_notes is empty and surfaces this flash
# message + the main-area placeholder instead.
alerts.flash_warning(
f"We couldn't load preflight data for this submission. {SUPPORT}",
Markup(
"We couldn't analyze the files in your submission "
"right now because the preflight service is "
"temporarily unavailable. Please refresh this page "
"to try again. ") + SUPPORT,
title="Preflight unavailable")
return stay_on_this_stage((rdata, status.OK, {}))

Expand Down Expand Up @@ -319,7 +329,19 @@ def _load_or_create_preflight(
except InvalidEvent:
pass # nothing actionable in cleanup is fine

start_preflight(params, session, submission_id, token)
# Preflight calls into the external tex2pdf service; that service
# may be unreachable (local dev without the service running, a
# transient outage in production, etc.). If it fails, log and
# continue with preflight_data=None -- review_files() already has
# a clean handler for that case (flashes "Preflight unavailable"
# and stays on this stage) instead of bubbling up as a 500.
try:
start_preflight(params, session, submission_id, token)
except Exception as exc:
logger.warning(
"Could not run preflight for submission %s: %s",
submission_id, exc,
)
preflight_data = _get_preflight_data(submission_id)
_store_source_format(preflight_data, session, submission_id)

Expand Down
Loading
Loading