aboutsummaryrefslogtreecommitdiff
path: root/src/subtitle_extractor.rs
diff options
context:
space:
mode:
authorMalte Voos <git@mal.tc>2025-10-01 00:20:10 +0200
committerMalte Voos <git@mal.tc>2025-10-01 00:20:10 +0200
commit338babaad2189f7ff1ee088994c8c20a0646ff4d (patch)
tree29fb2620f748d32a42c1d1eb3346771600a8d75b /src/subtitle_extractor.rs
downloadlleap-338babaad2189f7ff1ee088994c8c20a0646ff4d.tar.gz
lleap-338babaad2189f7ff1ee088994c8c20a0646ff4d.zip
init
Diffstat (limited to 'src/subtitle_extractor.rs')
-rw-r--r--src/subtitle_extractor.rs209
1 files changed, 209 insertions, 0 deletions
diff --git a/src/subtitle_extractor.rs b/src/subtitle_extractor.rs
new file mode 100644
index 0000000..53655a0
--- /dev/null
+++ b/src/subtitle_extractor.rs
@@ -0,0 +1,209 @@
+use std::collections::BTreeMap;
+
+use anyhow::Result;
+
+use ffmpeg::Rational;
+use log::{debug, error, info};
+use relm4::{ComponentSender, SharedState, Worker};
+
+pub type StreamIndex = usize;
+
+#[derive(Debug, Clone)]
+pub struct SubtitleCue {
+ pub start: gst::ClockTime,
+ pub end: gst::ClockTime,
+ pub text: String,
+}
+
+#[derive(Debug, Clone)]
+pub struct SubtitleTrack {
+ pub language_code: Option<String>,
+ pub title: Option<String>,
+ pub cues: Vec<SubtitleCue>,
+}
+
+pub static TRACKS: SharedState<BTreeMap<StreamIndex, SubtitleTrack>> = SharedState::new();
+
+pub struct SubtitleExtractor {}
+
+#[derive(Debug)]
+pub enum SubtitleExtractorMsg {
+ ExtractFromUrl(String),
+}
+
+#[derive(Debug)]
+pub enum SubtitleExtractorOutput {
+ NewOrUpdatedTrackMetadata(StreamIndex),
+ NewCue(StreamIndex, SubtitleCue),
+ ExtractionComplete,
+}
+
+impl Worker for SubtitleExtractor {
+ type Init = ();
+ type Input = SubtitleExtractorMsg;
+ type Output = SubtitleExtractorOutput;
+
+ fn init(_init: Self::Init, _sender: ComponentSender<Self>) -> Self {
+ Self {}
+ }
+
+ fn update(&mut self, msg: SubtitleExtractorMsg, sender: ComponentSender<Self>) {
+ match msg {
+ SubtitleExtractorMsg::ExtractFromUrl(url) => {
+ self.handle_extract_from_url(url, sender);
+ }
+ }
+ }
+}
+
+impl SubtitleExtractor {
+ fn handle_extract_from_url(&mut self, url: String, sender: ComponentSender<Self>) {
+ // Clear existing tracks
+ TRACKS.write().clear();
+
+ // Try to extract subtitles using ffmpeg
+ match self.extract_subtitles_ffmpeg(&url, &sender) {
+ Ok(_) => {
+ info!("Subtitle extraction completed successfully");
+ sender
+ .output(SubtitleExtractorOutput::ExtractionComplete)
+ .unwrap();
+ }
+ Err(e) => {
+ error!("FFmpeg extraction failed: {}", e);
+ }
+ }
+ }
+
+ fn extract_subtitles_ffmpeg(&self, url: &str, sender: &ComponentSender<Self>) -> Result<()> {
+ let mut input = ffmpeg::format::input(&url)?;
+
+ let mut subtitle_decoders = BTreeMap::new();
+
+ // create decoder for each subtitle stream
+ for (stream_index, stream) in input.streams().enumerate() {
+ if stream.parameters().medium() == ffmpeg::media::Type::Subtitle {
+ let language_code = stream.metadata().get("language").map(|s| s.to_string());
+ let title = stream.metadata().get("title").map(|s| s.to_string());
+
+ let track = SubtitleTrack {
+ language_code,
+ title,
+ cues: Vec::new(),
+ };
+
+ TRACKS.write().insert(stream_index, track);
+
+ sender
+ .output(SubtitleExtractorOutput::NewOrUpdatedTrackMetadata(
+ stream_index,
+ ))
+ .unwrap();
+
+ let context =
+ ffmpeg::codec::context::Context::from_parameters(stream.parameters())?;
+ if let Ok(decoder) = context.decoder().subtitle() {
+ subtitle_decoders.insert(stream_index, decoder);
+ debug!("Created decoder for subtitle stream {}", stream_index);
+ } else {
+ error!(
+ "Failed to create decoder for subtitle stream {}",
+ stream_index
+ );
+ }
+ }
+ }
+
+ // process packets
+ for (stream, packet) in input.packets() {
+ let stream_index = stream.index();
+
+ if let Some(decoder) = subtitle_decoders.get_mut(&stream_index) {
+ let mut subtitle = ffmpeg::Subtitle::new();
+ if decoder.decode(&packet, &mut subtitle).is_ok() {
+ if let Some(cue) = Self::subtitle_to_cue(&subtitle, &packet, stream.time_base())
+ {
+ if let Some(track) = TRACKS.write().get_mut(&stream_index) {
+ track.cues.push(cue.clone());
+ }
+
+ sender
+ .output(SubtitleExtractorOutput::NewCue(stream_index, cue))
+ .unwrap();
+ }
+ }
+ }
+ }
+
+ Ok(())
+ }
+
+ fn subtitle_to_cue(
+ subtitle: &ffmpeg::Subtitle,
+ packet: &ffmpeg::Packet,
+ time_base: Rational,
+ ) -> Option<SubtitleCue> {
+ let time_to_clock_time = |time: i64| {
+ let nseconds: i64 = (time * time_base.numerator() as i64 * 1_000_000_000)
+ / time_base.denominator() as i64;
+ gst::ClockTime::from_nseconds(nseconds as u64)
+ };
+
+ let text = subtitle
+ .rects()
+ .into_iter()
+ .map(|rect| match rect {
+ ffmpeg::subtitle::Rect::Text(text) => text.get().to_string(),
+ ffmpeg::subtitle::Rect::Ass(ass) => {
+ Self::extract_dialogue_text(ass.get()).unwrap_or(String::new())
+ }
+ _ => String::new(),
+ })
+ .collect::<Vec<String>>()
+ .join("\n— ");
+
+ let start = time_to_clock_time(packet.pts()?);
+ let end = time_to_clock_time(packet.pts()? + packet.duration());
+
+ Some(SubtitleCue { start, end, text })
+ }
+
+ fn extract_dialogue_text(dialogue_line: &str) -> Option<String> {
+ // ASS dialogue format: ReadOrder,Layer,Style,Name,MarginL,MarginR,MarginV,Effect,Text
+ // we need the 9th field (Text), so split on comma but only take first 9 splits
+ // see also https://github.com/FFmpeg/FFmpeg/blob/a700f0f72d1f073e5adcfbb16f4633850b0ef51c/libavcodec/ass_split.c#L433
+ let text = dialogue_line.splitn(9, ',').last()?;
+
+ // remove ASS override codes (formatting tags) like {\b1}, {\i1}, {\c&Hffffff&}, etc.
+ let mut result = String::new();
+ let mut in_tag = false;
+ let mut char_iter = text.chars().peekable();
+
+ while let Some(c) = char_iter.next() {
+ if c == '{' && char_iter.peek() == Some(&'\\') {
+ in_tag = true;
+ } else if c == '}' {
+ in_tag = false;
+ } else if !in_tag {
+ // process line breaks and hard spaces
+ if c == '\\' {
+ match char_iter.peek() {
+ Some(&'N') => {
+ char_iter.next();
+ result.push('\n');
+ }
+ Some(&'n') | Some(&'h') => {
+ char_iter.next();
+ result.push(' ');
+ }
+ _ => result.push(c),
+ }
+ } else {
+ result.push(c);
+ }
+ }
+ }
+
+ Some(result)
+ }
+}