alanhc

更快的影片學習方法

Anonymous published on 2024-08-14

ytd get youtube video
whisper video -> text
video -> 關鍵影格（變動threshold>0.2）

learn-fast/

import sys

import os

import subprocess

from yt_dlp import YoutubeDL

from mlx_whisper import transcribe

import cv2

import numpy as np

import json

from slugify import slugify

# create mp.json if not exist

if not os.path.exists("mp.json"):

with open("mp.json", "w") as f:

json.dump({}, f)

def download_youtube_video(url, output_path='./videos'):

mp={}

ydl_opts = {

'verbose': True,

'format': 'bestvideo+bestaudio/best',

'outtmpl': os.path.join(output_path, '%(id)s.%(ext)s'),

'merge_output_format': 'mp4',

'quiet': False,

'writesubtitles': True,

'writeautomaticsub': True,

}

info_save = None

with YoutubeDL(ydl_opts) as ydl:

info = ydl.extract_info(url, download=True)

video_title = info['title']

info_save = info

video_file = os.path.join(output_path, f"{id}.mp4")

try:

with open("mp.json", "r") as f:

mp = json.load(f)

except json.JSONDecodeError as e:

print(f"Error reading mp.json: {e}")

print(f"Error position: {e.pos}")

with open("mp.json", "r") as f:

json_data = f.read()

print(f"JSON content up to error position: {json_data[:e.pos]}")

# mp = {}

  

mp[url] = info_save

with open("mp.json", "w") as f:

json.dump(mp, f)

return f'{output_path}/{mp[url]['id']}.mp4'

  

def video_to_text(output_folder, audio_file):

print("==", audio_file)

output = transcribe(audio_file, word_timestamps=True)

with open(f"{audio_file}.transcription.json", "w") as f:

json.dump(output, f, default=lambda x: x.item() if isinstance(x, np.float16) else print(x))

return output

  

def extract_key_frames(video_path, output_folder, threshold=0.2):

cap = cv2.VideoCapture(video_path)

path = os.path.join(output_folder, os.path.splitext(os.path.basename(video_path))[0])

basename = os.path.basename(path)

  

os.makedirs(output_folder, exist_ok=True)

os.makedirs(f"{output_folder}/{basename}", exist_ok=True)

  

success, prev_frame = cap.read()

if success:

cv2.imwrite(f"{output_folder}/{basename}/frame_0.jpg", prev_frame)

  

count = 0

ans = []

while success:

success, curr_frame = cap.read()

if not success:

break

  

diff = cv2.absdiff(cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY),

cv2.cvtColor(curr_frame, cv2.COLOR_BGR2GRAY))

non_zero_count = np.count_nonzero(diff)

non_zero_ratio = non_zero_count / diff.size

if non_zero_ratio > threshold:

frame_time = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000.0

frame_name = f"{output_folder}/{basename}/frame_{int(frame_time)}.jpg"

cv2.imwrite(frame_name, curr_frame)

print(f"Saved frame at {frame_time:.2f} seconds as {frame_name}")

ans.append(

{

"frame": frame_name,

"time": frame_time

}

)

prev_frame = curr_frame

count += 1

  

cap.release()

with open(f"{output_folder}/{basename}/keyframes.json", "w") as f:

json.dump(ans, f)

print(f"Extracted frames saved in {output_folder}")

  

def video_to_audio(video_file=""):

audio_file = f"{video_file}.wav"

subprocess.run(["ffmpeg", "-i", video_file, audio_file], check=True)

return audio_file

  

def main():

if len(sys.argv) != 2:

print("Usage: python pt.py <YouTube URL>")

return

  

youtube_url = sys.argv[1]

video_output_path = './videos'

frames_output_folder = './frames'

threshold = 0.2 # 畫面變動的閾值

  

video_file = download_youtube_video(youtube_url, video_output_path)

audio_file = video_to_audio(video_file)

transcription = video_to_text("videos", audio_file)

# extract_key_frames(video_file, frames_output_folder, threshold)

  

if __name__ == "__main__":

main()

Llm-K8s

Anonymous published on 2024-07-17

https://microk8s.io/docs/install-macos https://sarinsuriyakoon.medium.com/deploy-ollama-on-local-kubernetes-microk8s-6ca22bfb7fa3 multipass 是甚麼??

Ref

Building Your Own Database Agent

Anonymous published on 2024-07-16

Building Your Own Database Agent

https://learn.deeplearning.ai/courses/building-your-own-database-agent/lesson/1/introduction ![[Database Agent 1.png]] ![[Artificial Intelligence.png]] ![[Fine tuning.png]] ![[Database Agents.png]]

from langchain.agents.agent_types import AgentType

from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent

agent = create_pandas_dataframe_agent(llm=model,df=df,verbose=True)

agent.invoke(“how many rows are there?”)

> Entering new AgentExecutor chain…

Thought: To find out the number of rows in a pandas DataFrame, I can use the shape attribute which returns a tuple with the number of rows and columns. The first element of the tuple will give me the number of rows.

Hugging Face LLM

Anonymous published on 2024-07-10

確認有 Hugging face token，記得要有read權限

run.py

access_token = "hf_..." # 這裡放 Hugging face token

# pip install accelerate

from transformers import AutoTokenizer, AutoModelForCausalLM

import torch

  

tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it", token=access_token)

model = AutoModelForCausalLM.from_pretrained(

    "google/gemma-2b-it",

    device_map="auto",

    torch_dtype=torch.bfloat16

    , token=access_token

)

  

input_text = "Write me a poem about Machine Learning."

input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")

  

outputs = model.generate(**input_ids)

print(tokenizer.decode(outputs[0]))

<bos>Write me a poem about Machine Learning.

Machines, they weave and they learn,
From

Ref

https://huggingface.co/google/gemma-2b/discussions/28 https://huggingface.co/google/gemma-2b-it

Inferencing-Llm

Anonymous published on 2024-07-04

                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               ## mac

安裝

pip install mlx-lm

使用 mac cli

python -m mlx_lm.generate --model mlx-community/Phi-3-mini-4k-instruct-4bit --prompt "how are you"

import bs4

from langchain import hub

from langchain_community.document_loaders import WebBaseLoader

from langchain_chroma import Chroma

from langchain_core.output_parsers import StrOutputParser

from langchain_core.runnables import RunnablePassthrough

from langchain_openai import OpenAIEmbeddings

from langchain_text_splitters import RecursiveCharacterTextSplitter

import getpass

import os

  

os.environ["OPENAI_API_KEY"] = "sk-proj-"#getpass.getpass()
from langchain_community.document_loaders import PyPDFLoader

  

loader = PyPDFLoader("files/替代役管理作業規定.pdf")

pages = loader.load_and_split()

from langchain_community.llms.mlx_pipeline import MLXPipeline

  

llm = MLXPipeline.from_model_id(

"mlx-community/quantized-gemma-2b-it",

# pipeline_kwargs={"max_tokens": 512, "temp": 0.1},

pipeline_kwargs={"temp": 0.1},

)

from huggingface_hub import login

login(token="")

import os

  
  

docs = loader.load()

  

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

splits = text_splitter.split_documents(docs)

vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

  

# Retrieve and generate using the relevant snippets of the blog.

retriever = vectorstore.as_retriever()

prompt = hub.pull("rlm/rag-prompt")

  

def format_docs(docs):

return "\n\n".join(doc.page_content for doc in docs)

  
  

rag_chain = (

{"context": retriever | format_docs, "question": RunnablePassthrough()}

| prompt

| llm

| StrOutputParser()

)

question = "替代役管理作業規定是依據甚麼法律?"

rag_chain.invoke(f"{question} ")
# ' 根據上述法律，替代役管理作業規定主要依據兵役法第 26條。'

Ollama

使用 Ollama 執行 TAIDE 的 TAIDE-LX-7B-Chat-4bit 大語言模型
- ollama create taide-lx-7b-chat-4bit:latest -f Modelfile
介紹好用工具：Ollama 快速在本地啟動並執行大型語言模型透過Ollama在本機電腦執行大型語言模型（LLM）：Windows與VS Code篇