aboutsummaryrefslogtreecommitdiff
path: root/src/subtitle_extractor_aishit.rs
diff options
context:
space:
mode:
authorMalte Voos <git@mal.tc>2025-11-14 15:30:49 +0100
committerMalte Voos <git@mal.tc>2025-11-14 15:30:49 +0100
commita8457a25ccb9b1ef47f5ce9d7ac1a84c47600c9e (patch)
tree542b42d3316138043272faba42e0d1005f8403b6 /src/subtitle_extractor_aishit.rs
parenta42a73378b7c527a5e4600544b2d7a86d68c5aac (diff)
downloadlleap-a8457a25ccb9b1ef47f5ce9d7ac1a84c47600c9e.tar.gz
lleap-a8457a25ccb9b1ef47f5ce9d7ac1a84c47600c9e.zip
implement file/url open dialog
Diffstat (limited to 'src/subtitle_extractor_aishit.rs')
-rw-r--r--src/subtitle_extractor_aishit.rs732
1 files changed, 732 insertions, 0 deletions
diff --git a/src/subtitle_extractor_aishit.rs b/src/subtitle_extractor_aishit.rs
new file mode 100644
index 0000000..c615f6c
--- /dev/null
+++ b/src/subtitle_extractor_aishit.rs
@@ -0,0 +1,732 @@
+use std::collections::BTreeMap;
+
+use anyhow::Result;
+
+use ffmpeg::Rational;
+use log::{debug, error, info, warn};
+use relm4::{ComponentSender, SharedState, Worker};
+
+pub type StreamIndex = usize;
+
+#[derive(Debug, Clone)]
+pub struct SubtitleCue {
+ pub start: gst::ClockTime,
+ pub end: gst::ClockTime,
+ pub text: String,
+}
+
+#[derive(Debug, Clone)]
+pub struct SubtitleTrack {
+ pub language: Option<isolang::Language>,
+ pub title: Option<String>,
+ pub cues: Vec<SubtitleCue>,
+ pub is_generated: bool, // true if generated from audio
+}
+
+pub static TRACKS: SharedState<BTreeMap<StreamIndex, SubtitleTrack>> = SharedState::new();
+
+pub struct SubtitleExtractor {}
+
+#[derive(Debug)]
+pub enum SubtitleExtractorMsg {
+ ExtractFromUrl(String),
+}
+
+#[derive(Debug)]
+pub enum SubtitleExtractorOutput {
+ NewOrUpdatedTrackMetadata(StreamIndex),
+ NewCue(StreamIndex, SubtitleCue),
+ ExtractionComplete,
+}
+
+impl Worker for SubtitleExtractor {
+ type Init = ();
+ type Input = SubtitleExtractorMsg;
+ type Output = SubtitleExtractorOutput;
+
+ fn init(_init: Self::Init, _sender: ComponentSender<Self>) -> Self {
+ Self {}
+ }
+
+ fn update(&mut self, msg: SubtitleExtractorMsg, sender: ComponentSender<Self>) {
+ match msg {
+ SubtitleExtractorMsg::ExtractFromUrl(url) => {
+ self.handle_extract_from_url(url, sender);
+ }
+ }
+ }
+}
+
+impl SubtitleExtractor {
+ fn handle_extract_from_url(&mut self, url: String, sender: ComponentSender<Self>) {
+ // Clear existing tracks
+ TRACKS.write().clear();
+
+ // Try to extract subtitles using ffmpeg
+ match self.extract_subtitles_ffmpeg(&url, &sender) {
+ Ok(_) => {
+ info!("Subtitle extraction completed successfully");
+ sender
+ .output(SubtitleExtractorOutput::ExtractionComplete)
+ .unwrap();
+ }
+ Err(e) => {
+ error!("FFmpeg extraction failed: {}", e);
+ }
+ }
+ }
+
+ fn extract_subtitles_ffmpeg(&self, url: &str, sender: &ComponentSender<Self>) -> Result<()> {
+ info!("Starting subtitle extraction from: {}", url);
+ let mut input = ffmpeg::format::input(&url)?;
+
+ // Log input format info
+ info!(
+ "Input format: {} ({} streams)",
+ input.format().name(),
+ input.streams().count()
+ );
+
+ // Check if whisper filter is available
+ if let Some(whisper_filter) = ffmpeg::filter::find("whisper") {
+ info!("Whisper filter found: {}", whisper_filter.name());
+ } else {
+ warn!("Whisper filter not found - audio transcription will be skipped");
+ }
+
+ let mut subtitle_decoders = BTreeMap::new();
+ let mut audio_decoder: Option<ffmpeg::decoder::Audio> = None;
+ let mut _whisper_filter_graph: Option<ffmpeg::filter::Graph> = None;
+ let mut whisper_source: Option<ffmpeg::filter::Context> = None;
+ let mut whisper_sink: Option<ffmpeg::filter::Context> = None;
+ let mut best_audio_stream_index: Option<usize> = None;
+
+ // Find best audio stream for whisper processing
+ if let Some(audio_stream) = input.streams().best(ffmpeg::media::Type::Audio) {
+ best_audio_stream_index = Some(audio_stream.index());
+
+ // Get audio parameters safely
+ let codec_id = audio_stream.parameters().id();
+ let channels = if let Ok(context) =
+ ffmpeg::codec::context::Context::from_parameters(audio_stream.parameters())
+ {
+ if let Ok(audio) = context.decoder().audio() {
+ audio.channels()
+ } else {
+ 0
+ }
+ } else {
+ 0
+ };
+ let sample_rate = if let Ok(context) =
+ ffmpeg::codec::context::Context::from_parameters(audio_stream.parameters())
+ {
+ if let Ok(audio) = context.decoder().audio() {
+ audio.rate()
+ } else {
+ 0
+ }
+ } else {
+ 0
+ };
+
+ info!(
+ "Found best audio stream: index {} (codec: {:?}, channels: {}, sample_rate: {})",
+ audio_stream.index(),
+ codec_id,
+ channels,
+ sample_rate
+ );
+ } else {
+ info!("No audio stream found for whisper processing");
+ }
+
+ // Set up whisper filter graph if we found an audio stream
+ if let Some(audio_index) = best_audio_stream_index {
+ info!("Setting up whisper filter for audio stream {}", audio_index);
+
+ let audio_stream = input.stream(audio_index).unwrap();
+ if let Ok(context) =
+ ffmpeg::codec::context::Context::from_parameters(audio_stream.parameters())
+ {
+ if let Ok(decoder) = context.decoder().audio() {
+ // Get decoder properties before moving it
+ let decoder_rate = decoder.rate();
+ let decoder_format = decoder.format();
+ let decoder_channel_layout = decoder.channel_layout().bits();
+
+ audio_decoder = Some(decoder);
+
+ // Set up whisper filter graph
+ debug!("Creating whisper filter graph...");
+ debug!(
+ "Audio stream time_base: {}, decoder rate: {}, format: {:?}, channel_layout: 0x{:x}",
+ audio_stream.time_base(),
+ decoder_rate,
+ decoder_format,
+ decoder_channel_layout
+ );
+ match self.setup_whisper_filter(&audio_stream) {
+ Ok((graph, source, sink)) => {
+ info!("Whisper filter graph created successfully");
+ _whisper_filter_graph = Some(graph);
+ whisper_source = Some(source);
+ whisper_sink = Some(sink);
+ debug!("Whisper source and sink contexts stored");
+
+ // Create a generated subtitle track
+ let track = SubtitleTrack {
+ language: Some(isolang::Language::from_639_1("en").unwrap_or_else(
+ || isolang::Language::from_639_3("eng").unwrap(),
+ )),
+ title: Some("Generated from Audio (Whisper)".to_string()),
+ cues: Vec::new(),
+ is_generated: true,
+ };
+
+ let whisper_stream_index = 1000; // Use high index for generated tracks
+ TRACKS.write().insert(whisper_stream_index, track);
+
+ sender
+ .output(SubtitleExtractorOutput::NewOrUpdatedTrackMetadata(
+ whisper_stream_index,
+ ))
+ .unwrap();
+ }
+ Err(e) => {
+ error!("Failed to setup whisper filter: {}", e);
+ debug!("Whisper filter error details: {:?}", e);
+ warn!(
+ "Audio transcription will be skipped due to filter setup failure"
+ );
+ }
+ }
+ }
+ }
+ }
+
+ // Create decoder for each subtitle stream
+ for (stream_index, stream) in input.streams().enumerate() {
+ if stream.parameters().medium() == ffmpeg::media::Type::Subtitle {
+ let language_code = stream.metadata().get("language").map(|s| s.to_string());
+ let title = stream.metadata().get("title").map(|s| s.to_string());
+
+ let track = SubtitleTrack {
+ language: language_code.and_then(|code| isolang::Language::from_639_2b(&code)),
+ title,
+ cues: Vec::new(),
+ is_generated: false,
+ };
+
+ TRACKS.write().insert(stream_index, track);
+
+ sender
+ .output(SubtitleExtractorOutput::NewOrUpdatedTrackMetadata(
+ stream_index,
+ ))
+ .unwrap();
+
+ let context =
+ ffmpeg::codec::context::Context::from_parameters(stream.parameters())?;
+ if let Ok(decoder) = context.decoder().subtitle() {
+ subtitle_decoders.insert(stream_index, decoder);
+ debug!("Created decoder for subtitle stream {}", stream_index);
+ } else {
+ error!(
+ "Failed to create decoder for subtitle stream {}",
+ stream_index
+ );
+ }
+ } else {
+ debug!(
+ "Failed to create context for subtitle stream {}",
+ stream_index
+ );
+ }
+ }
+
+ // Process packets
+ for (stream, packet) in input.packets() {
+ let stream_index = stream.index();
+
+ // Process subtitle packets
+ if let Some(decoder) = subtitle_decoders.get_mut(&stream_index) {
+ let mut subtitle = ffmpeg::Subtitle::new();
+ if decoder.decode(&packet, &mut subtitle).is_ok() {
+ if let Some(cue) = Self::subtitle_to_cue(&subtitle, &packet, stream.time_base())
+ {
+ if let Some(track) = TRACKS.write().get_mut(&stream_index) {
+ track.cues.push(cue.clone());
+ }
+
+ sender
+ .output(SubtitleExtractorOutput::NewCue(stream_index, cue))
+ .unwrap();
+ }
+ }
+ }
+
+ // Process audio packets for whisper
+ if Some(stream_index) == best_audio_stream_index {
+ debug!(
+ "Processing audio packet for whisper (stream: {}, pts: {:?}, duration: {:?})",
+ stream_index,
+ packet.pts(),
+ packet.duration()
+ );
+ debug!(
+ "Audio decoder available: {}, Whisper source available: {}",
+ audio_decoder.is_some(),
+ whisper_source.is_some()
+ );
+ if let (Some(decoder), Some(source)) = (&mut audio_decoder, &mut whisper_source) {
+ debug!("Both audio decoder and whisper source are available, processing...");
+ // Send packet to audio decoder
+ if let Err(e) = decoder.send_packet(&packet) {
+ debug!("Failed to send packet to audio decoder: {}", e);
+ }
+
+ // Get decoded frames and send to whisper filter
+ let mut frame = unsafe { ffmpeg::Frame::empty() };
+ let mut frame_count = 0;
+ while decoder.receive_frame(&mut frame).is_ok() {
+ frame_count += 1;
+ debug!(
+ "Decoded audio frame {} (pts: {:?})",
+ frame_count,
+ frame.pts()
+ );
+
+ // Add frame to whisper filter
+ if let Err(e) = source.source().add(&frame) {
+ error!("Failed to add frame to whisper filter: {}", e);
+ } else {
+ debug!("Successfully added frame to whisper filter");
+ }
+
+ // Check for whisper output after adding each frame
+ if let Some(sink) = &mut whisper_sink {
+ self.check_whisper_output(sink, sender)?;
+ }
+ }
+ if frame_count > 0 {
+ debug!("Processed {} audio frames for whisper", frame_count);
+ }
+ } else {
+ debug!("Skipping audio packet - decoder or whisper source not available");
+ }
+ }
+ }
+
+ // Flush audio decoder and whisper filter
+ if let (Some(decoder), Some(source), Some(sink)) =
+ (&mut audio_decoder, &mut whisper_source, &mut whisper_sink)
+ {
+ info!("Flushing audio decoder and whisper filter...");
+ // Flush decoder
+ if let Err(e) = decoder.send_eof() {
+ debug!("Failed to send EOF to decoder: {}", e);
+ }
+ let mut frame = unsafe { ffmpeg::Frame::empty() };
+ let mut final_frame_count = 0;
+ while decoder.receive_frame(&mut frame).is_ok() {
+ final_frame_count += 1;
+ source.source().add(&frame).ok();
+ }
+ debug!("Flushed {} final frames from decoder", final_frame_count);
+
+ // Flush filter and get results
+ debug!("Flushing whisper filter...");
+ if let Err(e) = source.source().flush() {
+ error!("Failed to flush whisper filter: {}", e);
+ }
+
+ info!("Processing final whisper filter output...");
+ self.check_whisper_output(sink, sender)?;
+ }
+
+ Ok(())
+ }
+
+ fn setup_whisper_filter(
+ &self,
+ audio_stream: &ffmpeg::Stream,
+ ) -> Result<(
+ ffmpeg::filter::Graph,
+ ffmpeg::filter::Context,
+ ffmpeg::filter::Context,
+ )> {
+ debug!("Setting up whisper filter graph...");
+ let mut filter_graph = ffmpeg::filter::Graph::new();
+ debug!("Filter graph created successfully");
+
+ // Get audio parameters
+ debug!("Getting audio parameters...");
+ let time_base = audio_stream.time_base();
+ let audio_params = audio_stream.parameters();
+ debug!("Creating context from parameters...");
+ let context = ffmpeg::codec::context::Context::from_parameters(audio_params)?;
+ debug!("Getting audio decoder from context...");
+ let audio_decoder = context.decoder().audio()?;
+ debug!("Audio decoder created successfully");
+
+ // Create buffer source
+ let buffer_args = format!(
+ "time_base={}:sample_rate={}:sample_fmt={}:channel_layout=0x{:x}",
+ time_base,
+ audio_decoder.rate(),
+ audio_decoder.format().name(),
+ audio_decoder.channel_layout().bits()
+ );
+ debug!("Buffer args: {}", buffer_args);
+
+ debug!("Looking for abuffer filter...");
+ let abuffer_filter = ffmpeg::filter::find("abuffer")
+ .ok_or_else(|| anyhow::anyhow!("abuffer filter not found"))?;
+ debug!("abuffer filter found: {}", abuffer_filter.name());
+
+ debug!("Adding abuffer filter...");
+ match filter_graph.add(&abuffer_filter, "src", &buffer_args) {
+ Ok(_) => debug!("abuffer filter added successfully"),
+ Err(e) => {
+ error!("Failed to add abuffer filter: {}", e);
+ return Err(anyhow::anyhow!("Failed to add abuffer filter: {}", e));
+ }
+ }
+
+ // Create whisper filter with parameters
+ // Try absolute path and different parameter formats
+ let model_path = std::path::Path::new("./whisper-models/ggml-large-v3.bin");
+ let absolute_path = if model_path.exists() {
+ model_path
+ .canonicalize()
+ .map(|p| p.to_string_lossy().to_string())
+ .unwrap_or_else(|_| "./whisper-models/ggml-large-v3.bin".to_string())
+ } else {
+ warn!("Whisper model file not found at: {:?}", model_path);
+ "./whisper-models/ggml-large-v3.bin".to_string()
+ };
+
+ debug!("Model path exists: {}", model_path.exists());
+ debug!("Using absolute path: {}", absolute_path);
+
+ debug!("Looking for whisper filter...");
+ let whisper_filter = ffmpeg::filter::find("whisper").ok_or_else(|| {
+ error!("Whisper filter not found! Make sure FFmpeg was compiled with whisper support");
+ anyhow::anyhow!("Whisper filter not available")
+ })?;
+
+ debug!("Whisper filter found: {}", whisper_filter.name());
+ // We'll create the whisper filter through the parse method instead of adding it manually
+
+ // Create audio buffer sink for whisper output (whisper outputs audio + metadata)
+ debug!("Looking for abuffersink filter for audio output...");
+ let abuffersink_filter = ffmpeg::filter::find("abuffersink")
+ .ok_or_else(|| anyhow::anyhow!("abuffersink filter not found"))?;
+ debug!("abuffersink filter found: {}", abuffersink_filter.name());
+
+ debug!("Adding abuffersink filter...");
+ match filter_graph.add(&abuffersink_filter, "sink", "") {
+ Ok(_) => debug!("abuffersink filter added successfully"),
+ Err(e) => {
+ error!("Failed to add abuffersink filter: {}", e);
+ return Err(anyhow::anyhow!("Failed to add abuffersink filter: {}", e));
+ }
+ }
+
+ // Connect filters using the complete filter chain description
+ debug!("Connecting filter graph with complete chain: src -> whisper -> sink");
+
+ let filter_chain = format!(
+ "[src]whisper=model={}:queue=30:format=json[sink]",
+ "/Users/malte/repos/lleap/whisper-models/ggml-large-v3.bin",
+ //"/Users/malte/repos/lleap/whisper-models/ggml-silero-v5.1.2.bin"
+ );
+ debug!("Using filter chain: {}", filter_chain);
+
+ if let Err(e) = filter_graph
+ .output("src", 0)
+ .and_then(|o| o.input("sink", 0))
+ .and_then(|i| i.parse(&filter_chain))
+ {
+ error!("Failed to connect filter graph: {}", e);
+ return Err(anyhow::anyhow!("Failed to connect filter graph: {}", e));
+ }
+ debug!("Filter graph connected successfully");
+
+ // Validate filter graph
+ debug!("Validating filter graph...");
+ match filter_graph.validate() {
+ Ok(_) => {
+ info!("Filter graph validated successfully");
+ debug!("Filter graph dump:\n{}", filter_graph.dump());
+ }
+ Err(e) => {
+ error!("Filter graph validation failed: {}", e);
+ debug!(
+ "Filter graph dump before validation failure:\n{}",
+ filter_graph.dump()
+ );
+ return Err(anyhow::anyhow!("Filter graph validation failed: {}", e));
+ }
+ }
+
+ debug!("Getting final source and sink contexts...");
+ let source_ctx = filter_graph
+ .get("src")
+ .ok_or_else(|| anyhow::anyhow!("Source context not found"))?;
+ let sink_ctx = filter_graph
+ .get("sink")
+ .ok_or_else(|| anyhow::anyhow!("Sink context not found"))?;
+ debug!("Final contexts retrieved successfully");
+
+ Ok((filter_graph, source_ctx, sink_ctx))
+ }
+
+ fn check_whisper_output(
+ &self,
+ sink: &mut ffmpeg::filter::Context,
+ sender: &ComponentSender<Self>,
+ ) -> Result<()> {
+ debug!("Attempting to read audio frames from whisper filter output...");
+
+ // The whisper filter outputs audio frames with subtitle data in "lavfi.whisper.text" metadata
+ let mut frame = unsafe { ffmpeg::Frame::empty() };
+ let mut output_count = 0;
+
+ while sink.sink().frame(&mut frame).is_ok() {
+ output_count += 1;
+ debug!(
+ "Received audio frame {} from whisper filter (pts: {:?})",
+ output_count,
+ frame.pts()
+ );
+
+ // Look specifically for lavfi.whisper.text metadata
+ if let Some(whisper_text) = frame.metadata().get("lavfi.whisper.text") {
+ info!("Found whisper transcription: {}", whisper_text);
+
+ let start_time = if let Some(pts) = frame.pts() {
+ // Convert PTS to nanoseconds based on whisper filter's time base (16kHz)
+ gst::ClockTime::from_nseconds((pts as u64 * 1_000_000_000) / 16000)
+ } else {
+ gst::ClockTime::ZERO
+ };
+
+ // Log all available metadata keys to help debug
+ let metadata_entries: Vec<(String, String)> = frame
+ .metadata()
+ .iter()
+ .map(|(k, v)| (k.to_string(), v.to_string()))
+ .collect();
+ if !metadata_entries.is_empty() {
+ let metadata_keys: Vec<String> =
+ metadata_entries.iter().map(|(k, _)| k.clone()).collect();
+ debug!("Frame metadata keys: {:?}", metadata_keys);
+ }
+
+ // Parse the whisper text (might be JSON format)
+ self.parse_whisper_text(whisper_text, start_time, sender)?;
+ }
+ }
+
+ if output_count > 0 {
+ info!("Processed {} frames from whisper filter", output_count);
+ } else {
+ debug!("No frames available from whisper filter");
+ }
+
+ Ok(())
+ }
+
+ fn parse_whisper_text(
+ &self,
+ whisper_text: &str,
+ base_time: gst::ClockTime,
+ sender: &ComponentSender<Self>,
+ ) -> Result<()> {
+ debug!("Parsing whisper text: {}", whisper_text);
+
+ // The whisper text might be in different formats depending on the filter configuration
+ // For now, treat it as plain text and create a single cue
+ let cue = SubtitleCue {
+ start: base_time,
+ end: base_time + gst::ClockTime::from_seconds(3), // Default 3 second duration
+ text: whisper_text.to_string(),
+ };
+
+ let whisper_stream_index = 1000;
+ if let Some(track) = TRACKS.write().get_mut(&whisper_stream_index) {
+ track.cues.push(cue.clone());
+ }
+
+ sender
+ .output(SubtitleExtractorOutput::NewCue(whisper_stream_index, cue))
+ .unwrap();
+
+ Ok(())
+ }
+
+ fn parse_whisper_subtitle_data(
+ &self,
+ subtitle_data: &str,
+ sender: &ComponentSender<Self>,
+ ) -> Result<()> {
+ // Parse SRT-format output from whisper
+ info!(
+ "Parsing whisper subtitle data ({} characters)",
+ subtitle_data.len()
+ );
+ debug!("Subtitle data content:\n{}", subtitle_data);
+ let lines: Vec<&str> = subtitle_data.lines().collect();
+ let mut i = 0;
+
+ while i < lines.len() {
+ // Skip subtitle number
+ if lines[i].trim().parse::<i32>().is_ok() {
+ i += 1;
+ }
+
+ // Parse timestamp line
+ if i < lines.len() {
+ if let Some((start, end)) = self.parse_srt_timestamp(lines[i]) {
+ i += 1;
+
+ // Collect text lines
+ let mut text_lines = Vec::new();
+ while i < lines.len() && !lines[i].trim().is_empty() {
+ text_lines.push(lines[i].to_string());
+ i += 1;
+ }
+
+ if !text_lines.is_empty() {
+ let cue = SubtitleCue {
+ start,
+ end,
+ text: text_lines.join("\n"),
+ };
+
+ let whisper_stream_index = 1000;
+ if let Some(track) = TRACKS.write().get_mut(&whisper_stream_index) {
+ track.cues.push(cue.clone());
+ }
+
+ sender
+ .output(SubtitleExtractorOutput::NewCue(whisper_stream_index, cue))
+ .unwrap();
+ }
+ }
+ }
+ i += 1;
+ }
+
+ Ok(())
+ }
+
+ fn parse_srt_timestamp(&self, line: &str) -> Option<(gst::ClockTime, gst::ClockTime)> {
+ // Parse SRT timestamp format: "00:00:01,234 --> 00:00:05,678"
+ let parts: Vec<&str> = line.split(" --> ").collect();
+ if parts.len() != 2 {
+ return None;
+ }
+
+ let start = self.parse_srt_time(parts[0])?;
+ let end = self.parse_srt_time(parts[1])?;
+
+ Some((start, end))
+ }
+
+ fn parse_srt_time(&self, time_str: &str) -> Option<gst::ClockTime> {
+ // Parse SRT time format: "00:00:01,234"
+ let parts: Vec<&str> = time_str.split(',').collect();
+ if parts.len() != 2 {
+ return None;
+ }
+
+ let time_part = parts[0];
+ let millis: u32 = parts[1].parse().ok()?;
+
+ let time_components: Vec<&str> = time_part.split(':').collect();
+ if time_components.len() != 3 {
+ return None;
+ }
+
+ let hours: u32 = time_components[0].parse().ok()?;
+ let minutes: u32 = time_components[1].parse().ok()?;
+ let seconds: u32 = time_components[2].parse().ok()?;
+
+ let total_millis = hours * 3600000 + minutes * 60000 + seconds * 1000 + millis;
+ let nanoseconds = total_millis as u64 * 1_000_000;
+
+ Some(gst::ClockTime::from_nseconds(nanoseconds))
+ }
+
+ fn subtitle_to_cue(
+ subtitle: &ffmpeg::Subtitle,
+ packet: &ffmpeg::Packet,
+ time_base: Rational,
+ ) -> Option<SubtitleCue> {
+ let time_to_clock_time = |time: i64| {
+ let nseconds: i64 = (time * time_base.numerator() as i64 * 1_000_000_000)
+ / time_base.denominator() as i64;
+ gst::ClockTime::from_nseconds(nseconds as u64)
+ };
+
+ let text = subtitle
+ .rects()
+ .into_iter()
+ .map(|rect| match rect {
+ ffmpeg::subtitle::Rect::Text(text) => text.get().to_string(),
+ ffmpeg::subtitle::Rect::Ass(ass) => {
+ Self::extract_dialogue_text(ass.get()).unwrap_or(String::new())
+ }
+ _ => String::new(),
+ })
+ .collect::<Vec<String>>()
+ .join("\n— ");
+
+ let start = time_to_clock_time(packet.pts()?);
+ let end = time_to_clock_time(packet.pts()? + packet.duration());
+
+ Some(SubtitleCue { start, end, text })
+ }
+
+ fn extract_dialogue_text(dialogue_line: &str) -> Option<String> {
+ // ASS dialogue format: ReadOrder,Layer,Style,Name,MarginL,MarginR,MarginV,Effect,Text
+ // we need the 9th field (Text), so split on comma but only take first 9 splits
+ // see also https://github.com/FFmpeg/FFmpeg/blob/a700f0f72d1f073e5adcfbb16f4633850b0ef51c/libavcodec/ass_split.c#L433
+ let text = dialogue_line.splitn(9, ',').last()?;
+
+ // remove ASS override codes (formatting tags) like {\b1}, {\i1}, {\c&Hffffff&}, etc.
+ let mut result = String::new();
+ let mut in_tag = false;
+ let mut char_iter = text.chars().peekable();
+
+ while let Some(c) = char_iter.next() {
+ if c == '{' && char_iter.peek() == Some(&'\\') {
+ in_tag = true;
+ } else if c == '}' {
+ in_tag = false;
+ } else if !in_tag {
+ // process line breaks and hard spaces
+ if c == '\\' {
+ match char_iter.peek() {
+ Some(&'N') => {
+ char_iter.next();
+ result.push('\n');
+ }
+ Some(&'n') | Some(&'h') => {
+ char_iter.next();
+ result.push(' ');
+ }
+ _ => result.push(c),
+ }
+ } else {
+ result.push(c);
+ }
+ }
+ }
+
+ Some(result)
+ }
+}