title: 更快的影片學習方法 date: 2024-08-14 tags:
- 學習 updated: 2024-08-14 up:
- "[[學習]]"
- ytd get youtube video
- whisper video -> text
- video -> 關鍵影格 (變動threshold>0.2)
learn-fast/
import sys
import os
import subprocess
from yt_dlp import YoutubeDL
from mlx_whisper import transcribe
import cv2
import numpy as np
import json
from slugify import slugify
# create mp.json if not exist
if not os.path.exists("mp.json"):
with open("mp.json", "w") as f:
json.dump({}, f)
def download_youtube_video(url, output_path='./videos'):
mp={}
ydl_opts = {
'verbose': True,
'format': 'bestvideo+bestaudio/best',
'outtmpl': os.path.join(output_path, '%(id)s.%(ext)s'),
'merge_output_format': 'mp4',
'quiet': False,
'writesubtitles': True,
'writeautomaticsub': True,
}
info_save = None
with YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url, download=True)
video_title = info['title']
info_save = info
video_file = os.path.join(output_path, f"{id}.mp4")
try:
with open("mp.json", "r") as f:
mp = json.load(f)
except json.JSONDecodeError as e:
print(f"Error reading mp.json: {e}")
print(f"Error position: {e.pos}")
with open("mp.json", "r") as f:
json_data = f.read()
print(f"JSON content up to error position: {json_data[:e.pos]}")
# mp = {}
mp[url] = info_save
with open("mp.json", "w") as f:
json.dump(mp, f)
return f'{output_path}/{mp[url]['id']}.mp4'
def video_to_text(output_folder, audio_file):
print("==", audio_file)
output = transcribe(audio_file, word_timestamps=True)
with open(f"{audio_file}.transcription.json", "w") as f:
json.dump(output, f, default=lambda x: x.item() if isinstance(x, np.float16) else print(x))
return output
def extract_key_frames(video_path, output_folder, threshold=0.2):
cap = cv2.VideoCapture(video_path)
path = os.path.join(output_folder, os.path.splitext(os.path.basename(video_path))[0])
basename = os.path.basename(path)
os.makedirs(output_folder, exist_ok=True)
os.makedirs(f"{output_folder}/{basename}", exist_ok=True)
success, prev_frame = cap.read()
if success:
cv2.imwrite(f"{output_folder}/{basename}/frame_0.jpg", prev_frame)
count = 0
ans = []
while success:
success, curr_frame = cap.read()
if not success:
break
diff = cv2.absdiff(cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY),
cv2.cvtColor(curr_frame, cv2.COLOR_BGR2GRAY))
non_zero_count = np.count_nonzero(diff)
non_zero_ratio = non_zero_count / diff.size
if non_zero_ratio > threshold:
frame_time = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000.0
frame_name = f"{output_folder}/{basename}/frame_{int(frame_time)}.jpg"
cv2.imwrite(frame_name, curr_frame)
print(f"Saved frame at {frame_time:.2f} seconds as {frame_name}")
ans.append(
{
"frame": frame_name,
"time": frame_time
}
)
prev_frame = curr_frame
count += 1
cap.release()
with open(f"{output_folder}/{basename}/keyframes.json", "w") as f:
json.dump(ans, f)
print(f"Extracted frames saved in {output_folder}")
def video_to_audio(video_file=""):
audio_file = f"{video_file}.wav"
subprocess.run(["ffmpeg", "-i", video_file, audio_file], check=True)
return audio_file
def main():
if len(sys.argv) != 2:
print("Usage: python pt.py <YouTube URL>")
return
youtube_url = sys.argv[1]
video_output_path = './videos'
frames_output_folder = './frames'
threshold = 0.2 # 畫面變動的閾值
video_file = download_youtube_video(youtube_url, video_output_path)
audio_file = video_to_audio(video_file)
transcription = video_to_text("videos", audio_file)
# extract_key_frames(video_file, frames_output_folder, threshold)
if __name__ == "__main__":
main()