-
Notifications
You must be signed in to change notification settings - Fork 141
adding support for Drop frame #354
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -78,6 +78,7 @@ | |
| just carried over when implementing positioning. | ||
| """ | ||
|
|
||
| import os | ||
| import math | ||
| import re | ||
| import textwrap | ||
|
|
@@ -367,13 +368,13 @@ def _handle_double_command(self, word): | |
| # doubled special characters and doubled extended characters | ||
| # with only one member of each pair being displayed. | ||
|
|
||
| doubled_types = (word != "94a1" and word in COMMANDS) or _is_pac_command(word) or word in SPECIAL_CHARS | ||
| doubled_types = ( | ||
| (word != "94a1" and word in COMMANDS) | ||
| or _is_pac_command(word) | ||
| or word in SPECIAL_CHARS | ||
| ) | ||
| if self.double_starter: | ||
| doubled_types = ( | ||
| doubled_types | ||
| or word in EXTENDED_CHARS | ||
| or word == "94a1" | ||
| ) | ||
| doubled_types = doubled_types or word in EXTENDED_CHARS or word == "94a1" | ||
|
|
||
| if word in CUE_STARTING_COMMAND and word != self.last_command: | ||
| self.double_starter = False | ||
|
|
@@ -535,8 +536,9 @@ def _pop_on(self, end=0): | |
|
|
||
|
|
||
| class SCCWriter(BaseWriter): | ||
| def __init__(self, *args, **kw): | ||
| def __init__(self, *args, drop_frame=True, **kw): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. before this PR, _format_timestamp produced non-drop-frame timecodes . |
||
| super().__init__(*args, **kw) | ||
| self.drop_frame = drop_frame | ||
|
|
||
| def write(self, caption_set): | ||
| output = HEADER + "\n\n" | ||
|
|
@@ -549,7 +551,10 @@ def write(self, caption_set): | |
| # Only support one language. | ||
| lang = list(caption_set.get_languages())[0] | ||
| captions = caption_set.get_captions(lang) | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use is instead of == for boolean comparisons. |
||
| if self.drop_frame == True: | ||
| MICROSECONDS_PER_CODEWORD = 1000.0 * 1000.0 / (30.0 * 1000.0 / 1001.0) | ||
| elif self.drop_frame == False: | ||
| MICROSECONDS_PER_CODEWORD = 1000.0 * 1000.0 / (30.0) | ||
| # PASS 1: compute codes for each caption | ||
| codes = [ | ||
| (self._text_to_code(caption), caption.start, caption.end) | ||
|
|
@@ -560,25 +565,128 @@ def write(self, caption_set): | |
| # Advance start times so as to have time to write to the pop-on | ||
| # buffer; possibly remove the previous clear-screen command | ||
| for index, (code, start, end) in enumerate(codes): | ||
| code_words = len(code) / 5 + 8 | ||
| code_time_microseconds = code_words * MICROSECONDS_PER_CODEWORD | ||
| # code_words = len(code) / 5 + 8 | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. remove this comment |
||
| code_words = len(code.split()) + 8 | ||
|
|
||
| code_time_microseconds = (code_words + 2) * MICROSECONDS_PER_CODEWORD | ||
| code_start = start - code_time_microseconds | ||
| if index == 0: | ||
| # also back-shift first cue so load-time is accounted for | ||
| codes[index] = (code, code_start, end) | ||
| continue | ||
| previous_code, previous_start, previous_end = codes[index - 1] | ||
| if previous_end + 3 * MICROSECONDS_PER_CODEWORD >= code_start: | ||
| # concatenate overlapping code | ||
| if code_start <= (previous_start + MICROSECONDS_PER_CODEWORD): | ||
| prev_words = len(previous_code.split()) + 8 # ENM/RCL + overhead | ||
| code_start = max( | ||
| code_start, previous_start + prev_words * MICROSECONDS_PER_CODEWORD | ||
| ) | ||
| codes[index] = (code, code_start, end) | ||
| # Also ensure previous ends before this starts if they nearly touch | ||
| codes[index - 1] = (previous_code, previous_start, None) | ||
| codes[index] = (code, code_start, end) | ||
| else: | ||
| if ( | ||
| previous_end is not None | ||
| and previous_end + 3 * MICROSECONDS_PER_CODEWORD >= code_start | ||
| ): | ||
| codes[index - 1] = (previous_code, previous_start, None) | ||
| codes[index] = (code, code_start, end) | ||
|
|
||
| # PASS 3: | ||
| # Remove empty captions due to code concatenation | ||
| codes = list(filter(None, codes)) | ||
|
|
||
| # PASS 4: | ||
| # Write captions. | ||
| def _df_frames(us: int) -> int: | ||
| # must mirror _format_timestamp_df quantization | ||
| return math.floor(us * 30 / 1_000_000 * 1000 / 1001 + 1e-9) | ||
|
|
||
| last_emitted_frames = -1 | ||
|
|
||
| for code, start, end in codes: | ||
| output += f"{self._format_timestamp(start)}\t" | ||
| # bump by one frame if this quantizes to the same DF frame as previous | ||
| cur_frames = _df_frames(start) if self.drop_frame else None | ||
| if self.drop_frame and cur_frames <= last_emitted_frames: | ||
| # add exactly one codeword's worth of time until frame advances | ||
| while _df_frames(start) <= last_emitted_frames: | ||
| start += MICROSECONDS_PER_CODEWORD | ||
| cur_frames = _df_frames(start) | ||
| if self.drop_frame: | ||
| last_emitted_frames = cur_frames | ||
|
|
||
| if not self.drop_frame: | ||
| # bump by one codeword if same HH:MM:SS:FF as last | ||
| def _ndf_frames(us: int) -> int: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please move the function outside the loop. Also, this uses int() but _format_timestamp_ndf uses math.floor() |
||
| # mirror _format_timestamp_ndf | ||
| seconds_float = us / 1_000_000.0 * 1000.0 / 1001.0 | ||
| total_frames = int(seconds_float * 30) # floor | ||
| return total_frames | ||
|
|
||
| cur_frames = _ndf_frames(start) | ||
| if cur_frames <= last_emitted_frames: | ||
| while _ndf_frames(start) <= last_emitted_frames: | ||
| start += MICROSECONDS_PER_CODEWORD | ||
| last_emitted_frames = _ndf_frames(start) | ||
|
|
||
| # ---- MINIMAL SPLIT (only if >80 tokens) ---- | ||
| _prefix = ["94ae", "94ae", "9420", "9420"] | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. _prefix/_suffix recreated every iteration |
||
| _suffix = ["942f", "942f"] | ||
| _code_tokens = code.split() | ||
| _total = len(_prefix) + len(_code_tokens) + len(_suffix) | ||
| if _total > 80: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Define number 80 as a named constant with rationale for the limit. An edge case that's unlikely in practice:
I'd say it's worth a low-priority mention — something like "the split |
||
| # first chunk at 'start' | ||
| _keep = 80 - len(_prefix) - len(_suffix) # 74 | ||
| _first = _prefix + _code_tokens[:_keep] + _suffix | ||
| if self.drop_frame: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if self.drop_frame: sequence appears 6 times. def _format_timestamp(self, us): That would turn each 4-line block into just: |
||
| ts_start = self._format_timestamp_df(start) | ||
| else: | ||
| ts_start = self._format_timestamp_ndf(start) | ||
| output += f"{ts_start}\t" + " ".join(_first) + "\n\n" | ||
|
|
||
| # second chunk at start + 1 codeword (and guard again) | ||
| start2 = start + MICROSECONDS_PER_CODEWORD | ||
| cur2 = _df_frames(start2) if self.drop_frame else None | ||
| if self.drop_frame and cur2 <= last_emitted_frames: | ||
| while _df_frames(start2) <= last_emitted_frames: | ||
| start2 += MICROSECONDS_PER_CODEWORD | ||
| cur2 = _df_frames(start2) | ||
| if self.drop_frame: | ||
| last_emitted_frames = cur2 | ||
| _rest = _prefix + _code_tokens[_keep:] + _suffix | ||
| if self.drop_frame: | ||
| ts2 = self._format_timestamp_df(start2) | ||
| else: | ||
| ts2 = self._format_timestamp_ndf(start2) | ||
| output += f"{ts2}\t" + " ".join(_rest) + "\n\n" | ||
|
|
||
| if end is not None: | ||
| ts_end = ( | ||
| self._format_timestamp_df(end) | ||
| if self.drop_frame | ||
| else self._format_timestamp_ndf(end) | ||
| ) | ||
| output += f"{ts_end}\t942c 942c\n\n" | ||
| continue | ||
| # ---- /MINIMAL SPLIT ---- | ||
| # choose formatter based on flag | ||
| if self.drop_frame: | ||
| ts_start = self._format_timestamp_df(start) | ||
| else: | ||
| ts_start = self._format_timestamp_ndf(start) | ||
|
|
||
| output += f"{ts_start}\t" | ||
| output += "94ae 94ae 9420 9420 " | ||
| output += code | ||
| output += "942c 942c 942f 942f\n\n" | ||
| output += code + " " | ||
| output += "942f 942f\n\n" | ||
|
|
||
| if end is not None: | ||
| output += f"{self._format_timestamp(end)}\t942c 942c\n\n" | ||
| if self.drop_frame: | ||
| ts_end = self._format_timestamp_df(end) | ||
| else: | ||
| ts_end = self._format_timestamp_ndf(end) | ||
|
|
||
| output += f"{ts_end}\t942c 942c\n\n" | ||
|
|
||
| return output | ||
|
|
||
|
|
@@ -626,7 +734,7 @@ def _text_to_code(self, s): | |
| for row, line in enumerate(lines): | ||
| row += 16 - len(lines) | ||
| # Move cursor to column 0 of the destination row | ||
| for _ in range(2): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. SCC spec requires control codes (including PACs) to be sent in pairs for error resilience. Therefore we should revert this to range(2) as it was. |
||
| for _ in range(1): | ||
| code += ( | ||
| PAC_HIGH_BYTE_BY_ROW[row] | ||
| + f"{PAC_LOW_BYTE_BY_ROW_RESTRICTED[row]} " | ||
|
|
@@ -639,17 +747,55 @@ def _text_to_code(self, s): | |
| return code | ||
|
|
||
| @staticmethod | ||
| def _format_timestamp(microseconds): | ||
| seconds_float = microseconds / 1000.0 / 1000.0 | ||
| # Convert to non-drop-frame timecode | ||
| def _format_timestamp_df(us: int) -> str: | ||
| """ | ||
| 29.97 fps DROP-FRAME timecode (HH:MM:SS;FF) from microseconds. | ||
| Uses exact 30000/1001 math and the standard 10-minute block method. | ||
| """ | ||
| # 1) Real frames at 30000/1001 (quantize to nearest frame) | ||
| total_frames = int(round(us * 30 / 1_000_000 * 1000 / 1001 + 1e-9)) | ||
|
|
||
| # 2) Add back the dropped frames to get "timecode counting" frames | ||
| fps = 30 | ||
| FRAMES_PER_10MIN = 17982 # 10*60*30 - 2*9 | ||
| d = total_frames // FRAMES_PER_10MIN # number of full 10-min blocks | ||
| m = total_frames % FRAMES_PER_10MIN # remainder into current block | ||
| # Within each 10-min block, 2 frames are dropped at the top of minutes 1–9. | ||
| if m < 2: | ||
| tc_frames = total_frames + 18 * d | ||
| else: | ||
| tc_frames = total_frames + 18 * d + 2 * ((m - 2) // 1798) | ||
|
|
||
| # 3) Derive HH:MM:SS;FF in "timecode counting" space (30 fps) | ||
| hours = tc_frames // (fps * 60 * 60) | ||
| rem = tc_frames % (fps * 60 * 60) | ||
| minutes = rem // (fps * 60) | ||
| rem = rem % (fps * 60) | ||
| seconds = rem // fps | ||
| frames = rem % fps | ||
|
|
||
| return f"{hours:02d}:{minutes:02d}:{seconds:02d};{frames:02d}" | ||
|
|
||
| @staticmethod | ||
| def _format_timestamp_ndf(microseconds: int) -> str: | ||
| """ | ||
| Convert microseconds → 29.97fps non-drop-frame timecode (HH:MM:SS:FF). | ||
| Uses 30fps frame counting with NTSC correction factor. | ||
| """ | ||
| # Convert to elapsed seconds | ||
| seconds_float = microseconds / 1_000_000.0 | ||
| # Apply NTSC fudge factor to align to 29.97 fps | ||
| seconds_float *= 1000.0 / 1001.0 | ||
|
|
||
| hours = math.floor(seconds_float / 3600) | ||
| seconds_float -= hours * 3600 | ||
| minutes = math.floor(seconds_float / 60) | ||
| seconds_float -= minutes * 60 | ||
| seconds = math.floor(seconds_float) | ||
| seconds_float -= seconds | ||
| frames = math.floor(seconds_float * 30) | ||
|
|
||
| # Return non-drop-frame formatted timecode (colon!) | ||
| return f"{hours:02}:{minutes:02}:{seconds:02}:{frames:02}" | ||
|
|
||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The os module is imported but never used anywhere in the file. Please remove it