[add] youtube plugin, [improve] initial prompt (JSON)
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
This commit is contained in:
parent
ca8c306534
commit
ebfea97af7
16
README.md
16
README.md
@ -13,7 +13,7 @@
|
||||
|
||||
---
|
||||
|
||||
AI Chat Bot with Plugins (RAG VectorDB - ChromaDB, DuckDuckGo Search, Home Assistant, Vehicle Lookup)
|
||||
AI Chat Bot with Plugins (RAG VectorDB - ChromaDB, DuckDuckGo Search, Home Assistant, Vehicle Lookup, YouTube)
|
||||
|
||||
[![Build Status](https://drone.va.reichard.io/api/badges/evan/minyma/status.svg)](https://drone.va.reichard.io/evan/minyma)
|
||||
|
||||
@ -37,6 +37,20 @@ Assistant: Some common symptoms of COVID-19 mentioned in the context are
|
||||
"Normalizing & Loading Data" section. We include a PubMed data normalizer as an
|
||||
example.
|
||||
|
||||
### YouTube
|
||||
|
||||
This utilizes `yt-dlp` to download a videos subtitles. Ask questions about YouTube videos!
|
||||
|
||||
```
|
||||
User: Tell me about this youtube video: https://www.youtube.com/watch?v=ZWgr7qP6yhY
|
||||
Assistant: The YouTube video you provided is a review of the new MacBook Pro by
|
||||
Apple. The host discusses the laptop's features, including its new
|
||||
color and chip. They mention that the laptop still retains its ports,
|
||||
HDMI, and high-quality display, but also notes some shortcomings like
|
||||
the notch and lack of face unlock. The host shares their impressions
|
||||
of the new black color [...]
|
||||
```
|
||||
|
||||
### DuckDuckGo
|
||||
|
||||
This utilizes DuckDuckGo Search by scraping the top 5 results.
|
||||
|
@ -1,18 +1,20 @@
|
||||
import json
|
||||
from textwrap import indent
|
||||
from dataclasses import dataclass
|
||||
from textwrap import indent
|
||||
from typing import Any, List
|
||||
import openai
|
||||
import json
|
||||
import minyma
|
||||
import openai
|
||||
|
||||
INITIAL_PROMPT_TEMPLATE = """
|
||||
You are a helpful assistant. You are connected to various external functions that can provide you with more personalized and up-to-date information and have already been granted the permissions to execute these functions at will. DO NOT say you don't have access to real time information, instead attempt to call one or more of the listed functions:
|
||||
You are connected to various functions that can be used to answer the users questions. Your options are only "functions". Functions should be an array of strings containing the desired function calls (e.g. "function_name()").
|
||||
|
||||
Available Functions:
|
||||
|
||||
{functions}
|
||||
|
||||
The user will not see your response. You must only respond with a comma separated list of function calls: "FUNCTION_CALLS: function(), function(), etc". It must be prepended by "FUNCTION_CALLS:".
|
||||
You must respond in JSON only with no other fluff or bad things will happen. The JSON keys must ONLY be "functions". Be sure to call the functions with the right arguments.
|
||||
|
||||
User Message: {question}
|
||||
User Message: {message}
|
||||
"""
|
||||
|
||||
FOLLOW_UP_PROMPT_TEMPLATE = """
|
||||
@ -20,7 +22,7 @@ You are a helpful assistant. This is a follow up message to provide you with mor
|
||||
|
||||
{response}
|
||||
|
||||
User Message: {question}
|
||||
User Message: {message}
|
||||
"""
|
||||
|
||||
@dataclass
|
||||
@ -32,13 +34,15 @@ class ChatCompletion:
|
||||
choices: List[dict]
|
||||
usage: dict
|
||||
|
||||
|
||||
class OpenAIConnector:
|
||||
def __init__(self, api_key: str):
|
||||
self.model = "gpt-3.5-turbo"
|
||||
self.word_cap = 1000
|
||||
openai.api_key = api_key
|
||||
|
||||
def query(self, question: str) -> Any:
|
||||
|
||||
def query(self, message: str) -> Any:
|
||||
# Track Usage
|
||||
prompt_tokens = 0
|
||||
completion_tokens = 0
|
||||
@ -48,7 +52,7 @@ class OpenAIConnector:
|
||||
functions = "\n".join(list(map(lambda x: "- %s" % x["def"], minyma.plugins.plugin_defs().values())))
|
||||
|
||||
# Create Initial Prompt
|
||||
prompt = INITIAL_PROMPT_TEMPLATE.format(question = question, functions = functions)
|
||||
prompt = INITIAL_PROMPT_TEMPLATE.format(message = message, functions = indent(functions, ' ' * 2))
|
||||
messages = [{"role": "user", "content": prompt}]
|
||||
|
||||
print("[OpenAIConnector] Running Initial OAI Query")
|
||||
@ -63,14 +67,7 @@ class OpenAIConnector:
|
||||
print("[OpenAIConnector] No Results -> TODO", response)
|
||||
|
||||
content = response.choices[0]["message"]["content"]
|
||||
|
||||
# Get Called Functions (TODO - Better Validation -> Failback Prompt?)
|
||||
all_funcs = list(
|
||||
map(
|
||||
lambda x: x.strip() if x.endswith(")") else x.strip() + ")",
|
||||
content.split("FUNCTION_CALLS:")[1].strip().split("),")
|
||||
)
|
||||
)
|
||||
all_funcs = json.loads(content).get("functions")
|
||||
|
||||
# Update Usage
|
||||
prompt_tokens += response.usage.get("prompt_tokens", 0)
|
||||
@ -105,7 +102,7 @@ class OpenAIConnector:
|
||||
func_response = "\n".join(func_response)
|
||||
|
||||
# Create Follow Up Prompt
|
||||
prompt = FOLLOW_UP_PROMPT_TEMPLATE.format(question = question, response = func_response)
|
||||
prompt = FOLLOW_UP_PROMPT_TEMPLATE.format(message = message, response = func_response)
|
||||
messages = [{"role": "user", "content": prompt}]
|
||||
|
||||
print("[OpenAIConnector] Running Follup Up OAI Query")
|
||||
|
@ -14,9 +14,9 @@ class DuckDuckGoPlugin(MinymaPlugin):
|
||||
def __init__(self, config):
|
||||
self.config = config
|
||||
self.name = "duck_duck_go"
|
||||
self.functions = [self.duck_duck_go_search]
|
||||
self.functions = [self.search_duck_duck_go]
|
||||
|
||||
def duck_duck_go_search(self, query: str):
|
||||
def search_duck_duck_go(self, query: str):
|
||||
"""Search DuckDuckGo"""
|
||||
resp = requests.get("https://html.duckduckgo.com/html/?q=%s" % query, headers=HEADERS)
|
||||
soup = BeautifulSoup(resp.text, features="html.parser")
|
||||
|
53
minyma/plugins/youtube.py
Normal file
53
minyma/plugins/youtube.py
Normal file
@ -0,0 +1,53 @@
|
||||
import os
|
||||
from yt_dlp import YoutubeDL
|
||||
import xml.etree.ElementTree as ET
|
||||
from minyma.plugin import MinymaPlugin
|
||||
|
||||
class YouTubePlugin(MinymaPlugin):
|
||||
"""Transcribe YouTube Video"""
|
||||
|
||||
def __init__(self, config):
|
||||
self.config = config
|
||||
self.name = "youtube"
|
||||
self.functions = [self.transcribe_youtube]
|
||||
|
||||
|
||||
def transcribe_youtube(self, youtube_video_id: str):
|
||||
URLS = [youtube_video_id]
|
||||
|
||||
vid = YoutubeDL({
|
||||
"skip_download": True,
|
||||
"writesubtitles": True,
|
||||
"writeautomaticsub": True,
|
||||
"subtitleslangs": ["en"],
|
||||
"subtitlesformat": "ttml",
|
||||
"outtmpl": "transcript"
|
||||
})
|
||||
|
||||
vid.download(URLS)
|
||||
content = self.convert_ttml_to_plain_text("transcript.en.ttml")
|
||||
os.remove("transcript.en.ttml")
|
||||
|
||||
return {
|
||||
"content": content,
|
||||
"metadata": URLS,
|
||||
"error": "TTML Conversion Error" if content is None else None
|
||||
}
|
||||
|
||||
|
||||
def convert_ttml_to_plain_text(self, ttml_file_path):
|
||||
try:
|
||||
# Parse the TTML file
|
||||
tree = ET.parse(ttml_file_path)
|
||||
root = tree.getroot()
|
||||
|
||||
# Process Text
|
||||
plain_text = ""
|
||||
for elem in root.iter():
|
||||
if elem.text:
|
||||
plain_text += elem.text + " "
|
||||
|
||||
return plain_text.strip()
|
||||
except ET.ParseError as e:
|
||||
print("[YouTubePlugin] TTML Conversion Error:", e)
|
||||
return None
|
@ -16,7 +16,8 @@ dependencies = [
|
||||
"chromadb",
|
||||
"sqlite-utils",
|
||||
"click",
|
||||
"beautifulsoup4"
|
||||
"beautifulsoup4",
|
||||
"yt-dlp"
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
|
Loading…
Reference in New Issue
Block a user