diff --git a/README.md b/README.md index 24583648..5a77f908 100644 --- a/README.md +++ b/README.md @@ -125,6 +125,9 @@ These re-used Media Objects have their original media timeline, and each grain's Flow Segments can also re-use parts of a Media Object, as in Flow C in the diagram below. Notice that the `timerange` still refers to the Flow timeline (and `0:50...` etc. is used as shorthand for `0:500000000`), however a reduced number of grains have been selected, taking only part of the first Media Object and part of the last Media Object. +The Flow `timerange` and `object_timerange` internal to the Media Object have a 1:1 mapping (`ts_offset` is `0:0`). +The first Media Object has an `object_timerange` of `[0:0_1:0)` but the Flow Segment which uses it has a `timerange` of `[0:500000000_1:0)`. +This indicates this Flow Segment is using the last half of the Media Object. ![Graphic showing the Flow timeline and 3 Flow Segments in Flow C, where the Media Objects have been re-used from Flow A however only half of the first and last Media Object has been used](./docs/images/Flow%20and%20Media%20Timelines-Flow%20C.drawio.png) diff --git a/docs/images/Flow and Media Timelines-Flow A.drawio.png b/docs/images/Flow and Media Timelines-Flow A.drawio.png index c7a99c28..e34f336e 100644 Binary files a/docs/images/Flow and Media Timelines-Flow A.drawio.png and b/docs/images/Flow and Media Timelines-Flow A.drawio.png differ diff --git a/docs/images/Flow and Media Timelines-Flow B.drawio.png b/docs/images/Flow and Media Timelines-Flow B.drawio.png index f8ec4fb9..6025ad14 100644 Binary files a/docs/images/Flow and Media Timelines-Flow B.drawio.png and b/docs/images/Flow and Media Timelines-Flow B.drawio.png differ diff --git a/docs/images/Flow and Media Timelines-Flow C.drawio.png b/docs/images/Flow and Media Timelines-Flow C.drawio.png index fed7a717..7df20c06 100644 Binary files a/docs/images/Flow and Media Timelines-Flow C.drawio.png and b/docs/images/Flow and Media Timelines-Flow C.drawio.png differ diff --git a/examples/outgest_file.py b/examples/outgest_file.py index c5e131f7..43371676 100755 --- a/examples/outgest_file.py +++ b/examples/outgest_file.py @@ -38,7 +38,9 @@ async def get_flow_segments( timerange: TimeRange ) -> AsyncGenerator[dict, None]: """Generator of Flow Segment dicts for the given Flow ID and timerange""" - segments_url = f"{tams_url}/flows/{flow['id']}/segments?timerange={timerange!s}&presigned=true" + segments_url = ( + f"{tams_url}/flows/{flow['id']}/segments?timerange={timerange!s}" + "&presigned=true&include_object_timerange=true") async with aiohttp.ClientSession(trust_env=True) as session: while True: async with get_request(session, credentials, segments_url) as resp: @@ -56,7 +58,7 @@ async def get_flow_segments( ) try: - segments_url = resp.links["next"]["url"] + segments_url = str(resp.links["next"]["url"]) except KeyError: break @@ -104,22 +106,46 @@ def normalise_and_transfer_media( else: raise NotImplementedError() - try: - discard_before_count = int(segment["sample_offset"]) - except KeyError: - discard_before_count = 0 + ts_offset = Timestamp.from_str(segment.get("ts_offset", "0:0")) + + segment_timerange = TimeRange.from_str(segment["timerange"]) + assert (segment_timerange.start is not None) + assert (segment_timerange.end is not None) try: - keep_after_count = int(segment["sample_count"]) - if keep_after_count == 0: - # Corner case - no media units are used from the segment - return TimeRange.never() + object_timerange = TimeRange.from_str(segment["object_timerange"]) except KeyError: - keep_after_count = -1 + if "sample_offset" in segment or "sample_count" in segment: + raise NotImplementedError( + "object_timerange is not set but the deprecated sample_offset or sample_count are. " + "This is conflicting data and the Segment metadata is invalid. " + "This script does not support pre TAMS v8.0 sample-only Segment metadata.") + object_timerange = TimeRange( + start=segment_timerange.start - ts_offset, + end=segment_timerange.end - ts_offset, + inclusivity=segment_timerange.inclusivity + ) + logger.warning( + f"Object TimeRange not found. Using Segment TimeRange offset by ts_offset ({object_timerange})") + assert (object_timerange.start is not None) + assert (object_timerange.end is not None) + + offset_object_timerange = TimeRange( + object_timerange.start + ts_offset, + object_timerange.end + ts_offset, + object_timerange.inclusivity + ) + assert (offset_object_timerange.start is not None) + assert (offset_object_timerange.end is not None) + if segment_timerange not in offset_object_timerange: + logger.warning( + f"Segment TimeRange ({segment_timerange}) is not contained by Object TimeRange " + f"({object_timerange}) + ts_offset ({ts_offset}) in Segment metadata") - ts_offset = Timestamp.from_str(segment.get("ts_offset", "0:0")) + skip_start_duration = segment_timerange.start - offset_object_timerange.start + skip_end_duration = offset_object_timerange.end - segment_timerange.end - discarding_samples = discard_before_count > 0 or keep_after_count >= 0 + discarding_samples = skip_start_duration > 0 or skip_end_duration > 0 output_timerange = TimeRange.never() first_packet = True with av.open(media_essence, mode="r", format="mpegts") as av_input: @@ -146,10 +172,10 @@ def normalise_and_transfer_media( # Don't attempt to get the media unit count if it isn't required to # process FlowSegment.sample_offset and sample_count. This avoids potential # NotImplementedError because the packet duration is not set. - process_media_unit_count = discard_before_count > 0 or keep_after_count >= 0 + process_media_packet_offsets = skip_start_duration > 0 or skip_end_duration > 0 # Get the number of media units (samples) in the packet - if process_media_unit_count: + if process_media_packet_offsets: if pkt.duration is not None: # We assume the packet duration is accurate enough to provide a media unit count pkt_duration = Timestamp.from_count(pkt.duration, 1/pkt.time_base) @@ -160,15 +186,13 @@ def normalise_and_transfer_media( else: raise NotImplementedError("Packet doesn't provide a duration") - media_unit_count = pkt_duration.to_count(media_rate) - # Discard media units before FlowSegment.sample_offset - if process_media_unit_count and discard_before_count > 0: - discard_before_count -= media_unit_count - if discard_before_count < 0: + if process_media_packet_offsets and skip_start_duration > 0: + skip_start_duration -= pkt_duration + if skip_start_duration < 0: logger.warning( - "Segment 'sample_offset' is not a whole number of packets. " - f"Included {-discard_before_count} samples at the start. " + "Segment TimeRange Start is not at a packet boundary. " + f"Included {Timestamp() - skip_start_duration} samples at the start. " "A transcode would be required to get the correct number of samples" ) continue @@ -204,24 +228,24 @@ def normalise_and_transfer_media( av_output.mux([pkt]) - # Discard media units >= FlowSegment.sample_offset + FlowSegment.sample_count - if process_media_unit_count and keep_after_count >= 0: - keep_after_count -= media_unit_count - if keep_after_count <= 0: - if keep_after_count < 0: - logger.warning( - "Segment 'sample_count' is not a whole number of packets. " - f"Included {-keep_after_count} samples at the end. " - "A transcode would be required to get the correct number of samples" - ) - break + # Discard media units after segment_timerange end + if process_media_packet_offsets and not output_timerange.ends_earlier_than_timerange(segment_timerange): + if not output_timerange.ends_inside_timerange(segment_timerange): + # If output doesn't end before or during the segment, last packet caused it to end after the segment + assert (output_timerange.end is not None) + output_end_diff = output_timerange.end - segment_timerange.end + logger.warning( + "Segment timerange end is not at a packet boundary. " + f"Included {output_end_diff} samples at the end. " + "A transcode would be required to get the correct number of samples" + ) + break if check_timing and not discarding_samples: # Warn if the normalised timerange calculated from the media pts and FlowSegment.ts_offset # does not equal the normalised FlowSegment.timerange. # Note that normalisation will hide differences that are less than 1/2 the media unit duration # and the assumption is that those differences are rounding errors - segment_timerange = TimeRange.from_str(segment["timerange"]).normalise(media_rate) norm_output_timerange = output_timerange.normalise(media_rate) norm_segment_timerange = segment_timerange.normalise(media_rate) if norm_output_timerange != norm_segment_timerange: diff --git a/examples/simple_edit.py b/examples/simple_edit.py index c9a2ea80..2a25854a 100755 --- a/examples/simple_edit.py +++ b/examples/simple_edit.py @@ -22,6 +22,7 @@ FLOW_FRAME_RATE = 50 + async def put_flow( session: aiohttp.ClientSession, credentials: Credentials, @@ -174,7 +175,8 @@ async def simple_edit( }) ) as resp: resp.raise_for_status() - print(f"Added segment from Flow {input_2_flow_id} and timerange {segment['timerange']} to {new_seg_tr!s}") + print(f"Added segment from Flow {input_2_flow_id} and timerange " + "{segment['timerange']} to {new_seg_tr!s}") print(f"Finished writing output {output_flow_id}") @@ -248,10 +250,16 @@ async def interval_edit( # Rest of this cut fits in the current segment, so we can write a new segment new_seg_tr = TimeRange(working_time, next_switch_at, TimeRange.INCLUDE_START) else: - # We need to add all of the rest of this segment, and then some more of the next one before cutting + # We need to add all of the rest of this segment, + # and then some more of the next one before cutting new_seg_tr = TimeRange.from_start_length(working_time, segment_length_remaining, TimeRange.INCLUDE_START) + # Note that `sample_offset` and `sample_count` are deprecated but still set for backwards compatibility. + # They have been replaced by `object_timerange`. + # As this is referencing an existing Object, `object_timerange` will already be set against the Object and + # will not need setting here. + # When `sample_offset` and `sample_count` are dropped from the spec, they will be deleted here. new_segment = { "object_id": next_seg["object_id"], "timerange": new_seg_tr,