|
22 | 22 | DEFAULT_DIR_PATH = "funread/legado/snapshot/lasted" |
23 | 23 | EXPORT_BATCH_SIZE = 500 |
24 | 24 | INITIAL_COUNTER = 1000 |
| 25 | +MIN_UPLOAD_BATCH_SIZE = 20 |
| 26 | + |
25 | 27 |
|
26 | 28 | # 组织仓库列表 |
27 | 29 | ORG_REPOS = [ |
@@ -257,24 +259,46 @@ def _export_and_upload(self, path: str) -> None: |
257 | 259 | counter = INITIAL_COUNTER |
258 | 260 | for data in runner.export_sources(size=EXPORT_BATCH_SIZE): |
259 | 261 | if data: |
260 | | - self._upload_batch(data, counter) |
261 | | - counter += 1 |
| 262 | + counter = self._upload_batch(data, counter) |
262 | 263 | except Exception as e: |
263 | 264 | logger.error(f"Failed to export and upload: {e}") |
264 | 265 | raise |
265 | 266 |
|
266 | | - def _upload_batch(self, data: List[Dict[str, Any]], counter: int) -> None: |
267 | | - """上传数据批次""" |
| 267 | + @staticmethod |
| 268 | + def _is_file_too_large_error(error: Exception) -> bool: |
| 269 | + """判断是否为远端文件过大错误""" |
| 270 | + message = str(error).lower() |
| 271 | + return "too large" in message or "422" in message |
| 272 | + |
| 273 | + def _upload_single_batch(self, data: List[Dict[str, Any]], counter: int) -> None: |
| 274 | + """上传单个数据批次""" |
| 275 | + git_path = f"{self.dir_path}/progress-{counter}.json" |
| 276 | + self.drive.upload_file( |
| 277 | + content=json.dumps(data), |
| 278 | + fid=self.dir_path, |
| 279 | + filepath=None, |
| 280 | + filename=f"progress-{counter}.json", |
| 281 | + ) |
| 282 | + logger.info(f"Uploaded {len(data)} sources to {git_path}") |
| 283 | + |
| 284 | + def _upload_batch(self, data: List[Dict[str, Any]], counter: int) -> int: |
| 285 | + """上传数据批次,必要时自动拆分为更小文件""" |
268 | 286 | try: |
269 | | - git_path = f"{self.dir_path}/progress-{counter}.json" |
270 | | - self.drive.upload_file( |
271 | | - content=json.dumps(data), |
272 | | - fid=self.dir_path, |
273 | | - filepath=None, |
274 | | - filename=f"progress-{counter}.json", |
275 | | - ) |
276 | | - logger.info(f"Uploaded {len(data)} sources to {git_path}") |
| 287 | + self._upload_single_batch(data, counter) |
| 288 | + return counter + 1 |
277 | 289 | except Exception as e: |
| 290 | + if self._is_file_too_large_error(e) and len(data) > MIN_UPLOAD_BATCH_SIZE: |
| 291 | + split_size = max(len(data) // 2, MIN_UPLOAD_BATCH_SIZE) |
| 292 | + logger.warning( |
| 293 | + f"Batch {counter} too large with {len(data)} sources, split into chunks of {split_size}" |
| 294 | + ) |
| 295 | + next_counter = counter |
| 296 | + for start in range(0, len(data), split_size): |
| 297 | + next_counter = self._upload_batch( |
| 298 | + data[start : start + split_size], next_counter |
| 299 | + ) |
| 300 | + return next_counter |
| 301 | + |
278 | 302 | logger.error(f"Failed to upload batch {counter}: {e}") |
279 | 303 | raise |
280 | 304 |
|
|
0 commit comments