更快的影片學習方法

title: 更快的影片學習方法 date: 2024-08-14 tags:
學習 updated: 2024-08-14 up:
"[[學習]]"
ytd get youtube video
whisper video -> text
video -> 關鍵影格（變動threshold>0.2）
learn-fast/
import sys

import os

import subprocess

from yt_dlp import YoutubeDL

from mlx_whisper import transcribe

import cv2

import numpy as np

import json

from slugify import slugify

# create mp.json if not exist

if not os.path.exists("mp.json"):

with open("mp.json", "w") as f:

json.dump({}, f)

def download_youtube_video(url, output_path='./videos'):

mp={}

ydl_opts = {

'verbose': True,

'format': 'bestvideo+bestaudio/best',

'outtmpl': os.path.join(output_path, '%(id)s.%(ext)s'),

'merge_output_format': 'mp4',

'quiet': False,

'writesubtitles': True,

'writeautomaticsub': True,

}

info_save = None

with YoutubeDL(ydl_opts) as ydl:

info = ydl.extract_info(url, download=True)

video_title = info['title']

info_save = info

video_file = os.path.join(output_path, f"{id}.mp4")

try:

with open("mp.json", "r") as f:

mp = json.load(f)

except json.JSONDecodeError as e:

print(f"Error reading mp.json: {e}")

print(f"Error position: {e.pos}")

with open("mp.json", "r") as f:

json_data = f.read()

print(f"JSON content up to error position: {json_data[:e.pos]}")

# mp = {}

  

mp[url] = info_save

with open("mp.json", "w") as f:

json.dump(mp, f)

return f'{output_path}/{mp[url]['id']}.mp4'

  

def video_to_text(output_folder, audio_file):

print("==", audio_file)

output = transcribe(audio_file, word_timestamps=True)

with open(f"{audio_file}.transcription.json", "w") as f:

json.dump(output, f, default=lambda x: x.item() if isinstance(x, np.float16) else print(x))

return output

  

def extract_key_frames(video_path, output_folder, threshold=0.2):

cap = cv2.VideoCapture(video_path)

path = os.path.join(output_folder, os.path.splitext(os.path.basename(video_path))[0])

basename = os.path.basename(path)

  

os.makedirs(output_folder, exist_ok=True)

os.makedirs(f"{output_folder}/{basename}", exist_ok=True)

  

success, prev_frame = cap.read()

if success:

cv2.imwrite(f"{output_folder}/{basename}/frame_0.jpg", prev_frame)

  

count = 0

ans = []

while success:

success, curr_frame = cap.read()

if not success:

break

  

diff = cv2.absdiff(cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY),

cv2.cvtColor(curr_frame, cv2.COLOR_BGR2GRAY))

non_zero_count = np.count_nonzero(diff)

non_zero_ratio = non_zero_count / diff.size

if non_zero_ratio > threshold:

frame_time = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000.0

frame_name = f"{output_folder}/{basename}/frame_{int(frame_time)}.jpg"

cv2.imwrite(frame_name, curr_frame)

print(f"Saved frame at {frame_time:.2f} seconds as {frame_name}")

ans.append(

{

"frame": frame_name,

"time": frame_time

}

)

prev_frame = curr_frame

count += 1

  

cap.release()

with open(f"{output_folder}/{basename}/keyframes.json", "w") as f:

json.dump(ans, f)

print(f"Extracted frames saved in {output_folder}")

  

def video_to_audio(video_file=""):

audio_file = f"{video_file}.wav"

subprocess.run(["ffmpeg", "-i", video_file, audio_file], check=True)

return audio_file

  

def main():

if len(sys.argv) != 2:

print("Usage: python pt.py <YouTube URL>")

return

  

youtube_url = sys.argv[1]

video_output_path = './videos'

frames_output_folder = './frames'

threshold = 0.2 # 畫面變動的閾值

  

video_file = download_youtube_video(youtube_url, video_output_path)

audio_file = video_to_audio(video_file)

transcription = video_to_text("videos", audio_file)

# extract_key_frames(video_file, frames_output_folder, threshold)

  

if __name__ == "__main__":

main()
alanhc-til