Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions submit_ce/api/submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,20 @@

from submit_ce.api.compile_service import CompileService
from submit_ce.domain import Submission, Event, License
from submit_ce.domain.size_limits import SizeLimits
from submit_ce.api.file_store import SubmissionFileStore


class SubmitApi(ABC):

def get_size_limits(self) -> SizeLimits:
"""The size limits used to flag oversize submissions.

Defaults to the built-in 50 MB limits. Implementations with access to
application config (e.g. the Flask implementation) override this to
honor the configured ``MAX_*_KB`` values."""
return SizeLimits.defaults()

@abstractmethod
def get(self, submission_id: str) -> Submission:
"""
Expand Down
37 changes: 37 additions & 0 deletions submit_ce/domain/event/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from .base import EventWithSideEffect
from ..submission import Submission
from ..uploads import SubmitFile
from .. import size_limits

import logging
logger = logging.getLogger(__name__)
Expand All @@ -25,6 +26,26 @@ def _common_file_change_execute(api: SubmitApi, submission: Submission) -> None:
file_store.delete_preview(str(submission.submission_id))


def _evaluate_oversize(api: SubmitApi, submission: Submission) -> bool:
"""Measure the current workspace against the configured size limits.

Called from ``execute`` (which has file-store access). The boolean result
is stored on the event so ``project`` can apply it deterministically on
replay, when ``execute`` does not run. Reads the authoritative post-change
workspace so the flag reflects *all* current files, not just the ones this
event touched.
"""
workspace = api.get_file_store().get_workspace(str(submission.submission_id))
if workspace is None:
return False
per_file = {file.path: file.bytes for file in workspace.files}
total = workspace.size or 0

@bmaltzan bmaltzan Jun 10, 2026

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So per_file is the sum of file space used in src/
Is per_file the same as workspace.size or is the comment wrong?
or is workspace.size out of date?

class Workspace(BaseModel):
size: Optional[int] = None
"""Size in bytes of the uncompressed upload workspace."""

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The per_file here is a dict of file.path -> file.bytes

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

workspace size should still be the uncompressed size of workspace.

The per_file is to detect single files that are over the size limit.

category = (submission.primary_classification.category
if submission.primary_classification else None)
return size_limits.is_oversize(total, per_file, primary_category=category,
limits=api.get_size_limits())


class UploadArchive(EventWithSideEffect):
"""Uploads a zip or tgz file to the workspace, unpacking all the files."""

Expand All @@ -37,6 +58,9 @@ class UploadArchive(EventWithSideEffect):
bytes_added: int = 0
"""Bytes added by uploading this archive."""

oversize: bool = False
"""Whether the submission is oversize after this change (set in execute)."""

def validate(self, submission: Submission) -> None:
validators.submission_is_not_finalized(self, submission)

Expand All @@ -45,9 +69,11 @@ def execute(self, api: SubmitApi, submission: Submission) -> None:
files = api.get_file_store().store_source_package(str(submission.submission_id), self.file, 4098)
self.bytes_added = sum([file.bytes for file in files])
_common_file_change_execute(api, submission)
self.oversize = _evaluate_oversize(api, submission)

def project(self, submission: Submission) -> Submission:
submission.uncompressed_size += self.bytes_added
submission.is_oversize = self.oversize
_common_file_change_project(submission)
return submission

Expand All @@ -66,6 +92,9 @@ class UploadFiles(EventWithSideEffect):
Field(default_factory=list, exclude=True)
bytes_added: int = 0

oversize: bool = False
"""Whether the submission is oversize after this change (set in execute)."""

def validate(self, submission: Submission) -> None:
validators.submission_is_not_finalized(self, submission)

Expand All @@ -76,9 +105,11 @@ def execute(self, api: SubmitApi, submission: Submission) -> None:
stat=file_store.store_source_file(str(submission.submission_id), f, chunk_size=4096)
self.bytes_added += stat.bytes
_common_file_change_execute(api, submission)
self.oversize = _evaluate_oversize(api, submission)

def project(self, submission: Submission) -> Submission:
submission.uncompressed_size += self.bytes_added
submission.is_oversize = self.oversize
_common_file_change_project(submission)
return submission

Expand All @@ -96,6 +127,9 @@ class RemoveFiles(EventWithSideEffect):
bytes_removed:int = 0
"""Bytes removed by removing these files."""

oversize: bool = False
"""Whether the submission is oversize after this change (set in execute)."""

def validate(self, submission: Submission) -> None:
validators.submission_is_not_finalized(self, submission)

Expand All @@ -109,9 +143,11 @@ def execute(self, api: SubmitApi, submission: Submission) -> None:
self.bytes_removed += file.bytes

_common_file_change_execute(api, submission)
self.oversize = _evaluate_oversize(api, submission)

def project(self, submission: Submission) -> Submission:
submission.uncompressed_size -= self.bytes_removed
submission.is_oversize = self.oversize
_common_file_change_project(submission)
return submission

Expand All @@ -134,5 +170,6 @@ def execute(self, api: SubmitApi, submission: Submission) -> None:
def project(self, submission: Submission) -> Submission:
submission.source_format = None
submission.uncompressed_size = 0
submission.is_oversize = False
_common_file_change_project(submission)
return submission
157 changes: 157 additions & 0 deletions submit_ce/domain/event/tests/test_oversize_detection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
"""Tests for oversize detection wired into the file-upload events.

These exercise ``execute``/``project`` directly with a hand-rolled fake API so
no Flask app or real file store is needed.
"""

from datetime import datetime
from types import SimpleNamespace

from pytz import UTC

from submit_ce.domain import submission as submod, agent
from submit_ce.domain.meta import Classification
from submit_ce.domain.event.file import (
UploadFiles,
UploadArchive,
RemoveFiles,
RemoveAllFiles,
)
from submit_ce.domain.size_limits import SizeLimits

MB = 1024 * 1024


class _FakeStore:
def __init__(self, workspace):
self.workspace = workspace

def store_source_package(self, sid, content, chunk_size):
return []

def store_source_file(self, sid, content, chunk_size):
return SimpleNamespace(bytes=0, path=getattr(content, "filename", "f"))

def delete_source_file(self, sid, name):
return None

def delete_all_source_files(self, sid):
pass

def get_workspace(self, sid):
return self.workspace

def delete_preflight(self, sid):
pass

def delete_preview(self, sid):
pass


class _FakeApi:
def __init__(self, workspace, limits=None):
self._store = _FakeStore(workspace)
self._limits = limits or SizeLimits.defaults()

def get_file_store(self):
return self._store

def get_size_limits(self):
return self._limits


def _ws(total, per_file=None):
per_file = per_file or {}
files = [SimpleNamespace(path=p, bytes=b) for p, b in per_file.items()]
return SimpleNamespace(size=total, files=files)


def _user(uid="u1"):
return agent.PublicUser(name="Test User", user_id=uid,
email=f"{uid}@example.org", endorsements=[])


def _submission(category="astro-ph.GA"):
u = _user()
return submod.Submission(
creator=u, owner=u, created=datetime.now(UTC),
primary_classification=Classification(category=category))


def test_submission_defaults_not_oversize():
assert _submission().is_oversize is False


def test_upload_files_flags_oversize():
s = _submission()
api = _FakeApi(_ws(60 * MB, {"huge.pdf": 60 * MB}))
e = UploadFiles(creator=s.creator, files=[])
e.execute(api, s)
assert e.oversize is True
s = e.project(s)
assert s.is_oversize is True


def test_upload_files_within_limit_not_oversize():
s = _submission()
api = _FakeApi(_ws(10 * MB, {"ok.pdf": 10 * MB}))
e = UploadFiles(creator=s.creator, files=[])
e.execute(api, s)
s = e.project(s)
assert s.is_oversize is False


def test_upload_archive_flags_oversize():
s = _submission()
api = _FakeApi(_ws(80 * MB, {"a.tex": 80 * MB}))
e = UploadArchive(creator=s.creator, file=None)
e.execute(api, s)
s = e.project(s)
assert s.is_oversize is True


def test_remove_files_clears_oversize():
s = _submission()
s.is_oversize = True
api = _FakeApi(_ws(5 * MB, {"small.tex": 5 * MB}))
e = RemoveFiles(creator=s.creator, files=[])
e.execute(api, s)
s = e.project(s)
assert s.is_oversize is False


def test_remove_all_files_clears_oversize():
s = _submission()
s.is_oversize = True
e = RemoveAllFiles(creator=s.creator)
s = e.project(s)
assert s.is_oversize is False


def test_project_uses_persisted_flag_on_replay():
# On replay execute() does not run; the flag comes from the stored event.
s = _submission()
e = UploadFiles(creator=s.creator, files=[], oversize=True)
s = e.project(s)
assert s.is_oversize is True


def test_no_workspace_is_not_oversize():
s = _submission()
api = _FakeApi(None)
e = UploadFiles(creator=s.creator, files=[])
e.execute(api, s)
assert e.oversize is False


def test_per_archive_limit_used_in_event():
s = _submission(category="astro-ph.GA")
limits = SizeLimits(
max_uncompressed_total={"default": 100 * MB, "astro-ph": 5 * MB},
max_uncompressed_per_file={"default": 100 * MB},
max_compressed={"default": 100 * MB},
)
api = _FakeApi(_ws(10 * MB, {"f": 10 * MB}), limits=limits)
e = UploadFiles(creator=s.creator, files=[])
e.execute(api, s)
assert e.oversize is True # 10 MB exceeds the 5 MB astro-ph total limit
Loading
Loading