aboutsummaryrefslogtreecommitdiff
path: root/src/subtitle_extraction/whisper.rs
diff options
context:
space:
mode:
authorMalte Voos <git@mal.tc>2025-11-14 15:30:49 +0100
committerMalte Voos <git@mal.tc>2025-11-14 15:30:49 +0100
commita8457a25ccb9b1ef47f5ce9d7ac1a84c47600c9e (patch)
tree542b42d3316138043272faba42e0d1005f8403b6 /src/subtitle_extraction/whisper.rs
parenta42a73378b7c527a5e4600544b2d7a86d68c5aac (diff)
downloadlleap-a8457a25ccb9b1ef47f5ce9d7ac1a84c47600c9e.tar.gz
lleap-a8457a25ccb9b1ef47f5ce9d7ac1a84c47600c9e.zip
implement file/url open dialog
Diffstat (limited to 'src/subtitle_extraction/whisper.rs')
-rw-r--r--src/subtitle_extraction/whisper.rs75
1 files changed, 75 insertions, 0 deletions
diff --git a/src/subtitle_extraction/whisper.rs b/src/subtitle_extraction/whisper.rs
new file mode 100644
index 0000000..5622d6f
--- /dev/null
+++ b/src/subtitle_extraction/whisper.rs
@@ -0,0 +1,75 @@
+use std::sync::mpsc;
+
+use anyhow::Context;
+use ffmpeg::filter;
+
+use crate::{subtitle_extraction::*, tracks::StreamIndex};
+
+pub fn generate_whisper_subtitles(
+ // stream index to use when storing generated subtitles, this index
+ // already has to be in TRACKS when this function is called!
+ stream_ix: StreamIndex,
+ context: ffmpeg::codec::Context,
+ time_base: ffmpeg::Rational,
+ packet_rx: mpsc::Receiver<ffmpeg::Packet>,
+ sender: ComponentSender<SubtitleExtractor>,
+) -> anyhow::Result<()> {
+ let mut decoder = context
+ .decoder()
+ .audio()
+ .with_context(|| format!("error creating subtitle decoder for stream {}", stream_ix))?;
+
+ let mut filter = filter::Graph::new();
+
+ let abuffer_args = format!(
+ "time_base={}:sample_rate={}:sample_fmt={}:channel_layout=0x{:x}",
+ decoder.time_base(),
+ decoder.rate(),
+ decoder.format().name(),
+ decoder.channel_layout().bits()
+ );
+ let whisper_args = format!(
+ "model={}:queue={}:format=json",
+ "/Users/malte/repos/lleap/whisper-models/ggml-large-v3.bin", 30
+ );
+ let filter_spec = format!("[src] whisper={} [sink]", whisper_args);
+
+ filter.add(&filter::find("abuffer").unwrap(), "src", &abuffer_args)?;
+ filter.add(&filter::find("abuffersink").unwrap(), "sink", "")?;
+ filter
+ .output("src", 0)?
+ .input("sink", 0)?
+ .parse(&filter_spec)?;
+ filter.validate()?;
+
+ let mut source_ctx = filter.get("src").unwrap();
+ let mut sink_ctx = filter.get("sink").unwrap();
+
+ while let Ok(packet) = packet_rx.recv() {
+ handle_packet(&mut decoder, source_ctx.source(), sink_ctx.sink(), packet)
+ .unwrap_or_else(|e| log::error!("error handling audio packet: {}", e))
+ }
+
+ Ok(())
+}
+
+fn handle_packet(
+ decoder: &mut ffmpeg::decoder::Audio,
+ mut source: filter::Source,
+ mut sink: filter::Sink,
+ packet: ffmpeg::Packet,
+) -> anyhow::Result<()> {
+ let mut in_frame = unsafe { ffmpeg::Frame::empty() };
+ decoder.send_packet(&packet)?;
+ decoder.receive_frame(&mut in_frame)?;
+ source.add(&in_frame)?;
+
+ let mut out_frame = unsafe { ffmpeg::Frame::empty() };
+ sink.frame(&mut out_frame)?;
+
+ if let Some(text) = out_frame.metadata().get("lavfi.whisper.text") {
+ println!("{}", text);
+ }
+
+ Ok(())
+}