Alan Tseng

更快的影片學習方法

  1. ytd get youtube video
  2. whisper video -> text
  3. video -> 關鍵影格 (變動threshold>0.2)
  • learn-fast/
import sys import os import subprocess from yt_dlp import YoutubeDL from mlx_whisper import transcribe import cv2 import numpy as np import json from slugify import slugify # create mp.json if not exist if not os.path.exists("mp.json"): with open("mp.json", "w") as f: json.dump({}, f) def download_youtube_video(url, output_path='./videos'): mp={} ydl_opts = { 'verbose': True, 'format': 'bestvideo+bestaudio/best', 'outtmpl': os.path.join(output_path, '%(id)s.%(ext)s'), 'merge_output_format': 'mp4', 'quiet': False, 'writesubtitles': True, 'writeautomaticsub': True, } info_save = None with YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(url, download=True) video_title = info['title'] info_save = info video_file = os.path.join(output_path, f"{id}.mp4") try: with open("mp.json", "r") as f: mp = json.load(f) except json.JSONDecodeError as e: print(f"Error reading mp.json: {e}") print(f"Error position: {e.pos}") with open("mp.json", "r") as f: json_data = f.read() print(f"JSON content up to error position: {json_data[:e.pos]}") # mp = {} mp[url] = info_save with open("mp.json", "w") as f: json.dump(mp, f) return f'{output_path}/{mp[url]['id']}.mp4' def video_to_text(output_folder, audio_file): print("==", audio_file) output = transcribe(audio_file, word_timestamps=True) with open(f"{audio_file}.transcription.json", "w") as f: json.dump(output, f, default=lambda x: x.item() if isinstance(x, np.float16) else print(x)) return output def extract_key_frames(video_path, output_folder, threshold=0.2): cap = cv2.VideoCapture(video_path) path = os.path.join(output_folder, os.path.splitext(os.path.basename(video_path))[0]) basename = os.path.basename(path) os.makedirs(output_folder, exist_ok=True) os.makedirs(f"{output_folder}/{basename}", exist_ok=True) success, prev_frame = cap.read() if success: cv2.imwrite(f"{output_folder}/{basename}/frame_0.jpg", prev_frame) count = 0 ans = [] while success: success, curr_frame = cap.read() if not success: break diff = cv2.absdiff(cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY), cv2.cvtColor(curr_frame, cv2.COLOR_BGR2GRAY)) non_zero_count = np.count_nonzero(diff) non_zero_ratio = non_zero_count / diff.size if non_zero_ratio > threshold: frame_time = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000.0 frame_name = f"{output_folder}/{basename}/frame_{int(frame_time)}.jpg" cv2.imwrite(frame_name, curr_frame) print(f"Saved frame at {frame_time:.2f} seconds as {frame_name}") ans.append( { "frame": frame_name, "time": frame_time } ) prev_frame = curr_frame count += 1 cap.release() with open(f"{output_folder}/{basename}/keyframes.json", "w") as f: json.dump(ans, f) print(f"Extracted frames saved in {output_folder}") def video_to_audio(video_file=""): audio_file = f"{video_file}.wav" subprocess.run(["ffmpeg", "-i", video_file, audio_file], check=True) return audio_file def main(): if len(sys.argv) != 2: print("Usage: python pt.py <YouTube URL>") return youtube_url = sys.argv[1] video_output_path = './videos' frames_output_folder = './frames' threshold = 0.2 # 畫面變動的閾值 video_file = download_youtube_video(youtube_url, video_output_path) audio_file = video_to_audio(video_file) transcription = video_to_text("videos", audio_file) # extract_key_frames(video_file, frames_output_folder, threshold) if __name__ == "__main__": main()
👈Go Back

@alanhc