From 8fed368e13536f2886d04121efbd7f47779d1c5e Mon Sep 17 00:00:00 2001 From: Taksh Date: Wed, 10 Jun 2026 16:30:18 +0530 Subject: [PATCH] fix: shorten dataset cache filenames to avoid NAME_MAX errors url_to_filename appended the full URL tail (e.g. tfidf_vectors_sparse.npz) after double sha256 hashes, producing paths over eCryptfs NAME_MAX and raising OSError on cache writes (fixes #539). Co-authored-by: Cursor --- scispacy/file_cache.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scispacy/file_cache.py b/scispacy/file_cache.py index 9ff99180..ac900ed5 100644 --- a/scispacy/file_cache.py +++ b/scispacy/file_cache.py @@ -67,7 +67,9 @@ def url_to_filename(url: str, etag: Optional[str] = None) -> str: etag_hash = sha256(etag_bytes) filename += "." + etag_hash.hexdigest() - filename += "." + last_part + _, ext = os.path.splitext(last_part) + if ext: + filename += ext return filename