更快的影片學習方法

  1. ytd get youtube video
  2. whisper video -> text
  3. video -> 關鍵影格 (變動threshold>0.2)
  • learn-fast/
import sys

import os

import subprocess

from yt_dlp import YoutubeDL

from mlx_whisper import transcribe

import cv2

import numpy as np

import json

from slugify import slugify

# create mp.json if not exist

if not os.path.exists("mp.json"):

with open("mp.json", "w") as f:

json.dump({}, f)

def download_youtube_video(url, output_path='./videos'):

mp={}

ydl_opts = {

'verbose': True,

'format': 'bestvideo+bestaudio/best',

'outtmpl': os.path.join(output_path, '%(id)s.%(ext)s'),

'merge_output_format': 'mp4',

'quiet': False,

'writesubtitles': True,

'writeautomaticsub': True,

}

info_save = None

with YoutubeDL(ydl_opts) as ydl:

info = ydl.extract_info(url, download=True)

video_title = info['title']

info_save = info

video_file = os.path.join(output_path, f"{id}.mp4")

try:

with open("mp.json", "r") as f:

mp = json.load(f)

except json.JSONDecodeError as e:

print(f"Error reading mp.json: {e}")

print(f"Error position: {e.pos}")

with open("mp.json", "r") as f:

json_data = f.read()

print(f"JSON content up to error position: {json_data[:e.pos]}")

# mp = {}

  

mp[url] = info_save

with open("mp.json", "w") as f:

json.dump(mp, f)

return f'{output_path}/{mp[url]['id']}.mp4'

  

def video_to_text(output_folder, audio_file):

print("==", audio_file)

output = transcribe(audio_file, word_timestamps=True)

with open(f"{audio_file}.transcription.json", "w") as f:

json.dump(output, f, default=lambda x: x.item() if isinstance(x, np.float16) else print(x))

return output

  

def extract_key_frames(video_path, output_folder, threshold=0.2):

cap = cv2.VideoCapture(video_path)

path = os.path.join(output_folder, os.path.splitext(os.path.basename(video_path))[0])

basename = os.path.basename(path)

  

os.makedirs(output_folder, exist_ok=True)

os.makedirs(f"{output_folder}/{basename}", exist_ok=True)

  

success, prev_frame = cap.read()

if success:

cv2.imwrite(f"{output_folder}/{basename}/frame_0.jpg", prev_frame)

  

count = 0

ans = []

while success:

success, curr_frame = cap.read()

if not success:

break

  

diff = cv2.absdiff(cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY),

cv2.cvtColor(curr_frame, cv2.COLOR_BGR2GRAY))

non_zero_count = np.count_nonzero(diff)

non_zero_ratio = non_zero_count / diff.size

if non_zero_ratio > threshold:

frame_time = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000.0

frame_name = f"{output_folder}/{basename}/frame_{int(frame_time)}.jpg"

cv2.imwrite(frame_name, curr_frame)

print(f"Saved frame at {frame_time:.2f} seconds as {frame_name}")

ans.append(

{

"frame": frame_name,

"time": frame_time

}

)

prev_frame = curr_frame

count += 1

  

cap.release()

with open(f"{output_folder}/{basename}/keyframes.json", "w") as f:

json.dump(ans, f)

print(f"Extracted frames saved in {output_folder}")

  

def video_to_audio(video_file=""):

audio_file = f"{video_file}.wav"

subprocess.run(["ffmpeg", "-i", video_file, audio_file], check=True)

return audio_file

  

def main():

if len(sys.argv) != 2:

print("Usage: python pt.py <YouTube URL>")

return

  

youtube_url = sys.argv[1]

video_output_path = './videos'

frames_output_folder = './frames'

threshold = 0.2 # 畫面變動的閾值

  

video_file = download_youtube_video(youtube_url, video_output_path)

audio_file = video_to_audio(video_file)

transcription = video_to_text("videos", audio_file)

# extract_key_frames(video_file, frames_output_folder, threshold)

  

if __name__ == "__main__":

main()

Fastapi-K8s

![[Pasted image 20240717010914.png]]

FROM tiangolo/uvicorn-gunicorn-fastapi:python3.9
COPY ./main.py /app/main.py
  • fastapi-deployment.yaml

apiVersion: apps/v1

kind: Deployment

metadata:

  name: fastapi-deployment

spec:

  replicas: 1

  selector:

    matchLabels:

      app: fastapi

  template:

    metadata:

      labels:

        app: fastapi

    spec:

      containers:

        - name: fastapi-container

          image: alanhc/test:latest

          ports:

            - containerPort: 80
  • fastapi-service.yaml
apiVersion: v1

kind: Service

metadata:

  name: fastapi-service

spec:

  selector:

    app: fastapi

  ports:

    - protocol: TCP

      port: 80

      targetPort: 80

  type: NodePort
from fastapi import FastAPI

app = FastAPI()

@app.get("/")

def read_root():

    return {"Hello": "World"}

kubectl apply -f

Building Your Own Database Agent

Building Your Own Database Agent

https://learn.deeplearning.ai/courses/building-your-own-database-agent/lesson/1/introduction ![[Database Agent 1.png]] ![[Artificial Intelligence.png]] ![[Fine tuning.png]] ![[Database Agents.png]]

from langchain.agents.agent_types import AgentType

from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent

agent = create_pandas_dataframe_agent(llm=model,df=df,verbose=True)

agent.invoke(“how many rows are there?”)

> Entering new AgentExecutor chain…

Thought: To find out the number of rows in a pandas DataFrame, I can use the shape attribute which returns a tuple with the number of rows and columns. The first element of the tuple will give me the number of rows.

Hugging Face LLM

確認有 Hugging face token,記得要有read權限

  • run.py
access_token = "hf_..." # 這裡放 Hugging face token

# pip install accelerate

from transformers import AutoTokenizer, AutoModelForCausalLM

import torch

  

tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it", token=access_token)

model = AutoModelForCausalLM.from_pretrained(

    "google/gemma-2b-it",

    device_map="auto",

    torch_dtype=torch.bfloat16

    , token=access_token

)

  

input_text = "Write me a poem about Machine Learning."

input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")

  

outputs = model.generate(**input_ids)

print(tokenizer.decode(outputs[0]))
<bos>Write me a poem about Machine Learning.

Machines, they weave and they learn,
From

Ref

https://huggingface.co/google/gemma-2b/discussions/28 https://huggingface.co/google/gemma-2b-it

Inferencing-Llm

                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               ## mac

安裝

  • pip install mlx-lm

使用 mac cli

  • python -m mlx_lm.generate --model mlx-community/Phi-3-mini-4k-instruct-4bit --prompt "how are you"
import bs4

from langchain import hub

from langchain_community.document_loaders import WebBaseLoader

from langchain_chroma import Chroma

from langchain_core.output_parsers import StrOutputParser

from langchain_core.runnables import RunnablePassthrough

from langchain_openai import OpenAIEmbeddings

from langchain_text_splitters import RecursiveCharacterTextSplitter
import getpass

import os

  

os.environ["OPENAI_API_KEY"] = "sk-proj-"#getpass.getpass()
from langchain_community.document_loaders import PyPDFLoader

  

loader = PyPDFLoader("files/替代役管理作業規定.pdf")

pages = loader.load_and_split()
from langchain_community.llms.mlx_pipeline import MLXPipeline

  

llm = MLXPipeline.from_model_id(

"mlx-community/quantized-gemma-2b-it",

# pipeline_kwargs={"max_tokens": 512, "temp": 0.1},

pipeline_kwargs={"temp": 0.1},

)
from huggingface_hub import login

login(token="")
import os

  
  

docs = loader.load()

  

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

splits = text_splitter.split_documents(docs)

vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

  

# Retrieve and generate using the relevant snippets of the blog.

retriever = vectorstore.as_retriever()

prompt = hub.pull("rlm/rag-prompt")

  

def format_docs(docs):

return "\n\n".join(doc.page_content for doc in docs)

  
  

rag_chain = (

{"context": retriever | format_docs, "question": RunnablePassthrough()}

| prompt

| llm

| StrOutputParser()

)
question = "替代役管理作業規定是依據甚麼法律?"

rag_chain.invoke(f"{question} ")
# ' 根據上述法律,替代役管理作業規定主要依據兵役法第 26條。'

Ollama

Ref

0%