Compare commits

...

3 Commits

Author SHA1 Message Date
ebfea97af7 [add] youtube plugin, [improve] initial prompt (JSON)
All checks were successful
continuous-integration/drone/push Build is passing
2023-11-10 09:19:24 -05:00
ca8c306534 [add] better error handling
All checks were successful
continuous-integration/drone/push Build is passing
2023-11-08 20:52:29 -05:00
3168bfffd1 Merge pull request 'Add Plugins' (#1) from function_plugins into master
All checks were successful
continuous-integration/drone/push Build is passing
Reviewed-on: #1
2023-11-09 00:31:51 +00:00
10 changed files with 198 additions and 95 deletions

View File

@@ -13,7 +13,7 @@
--- ---
AI Chat Bot with Plugins (RAG VectorDB - ChromaDB, DuckDuckGo Search, Home Assistant, Vehicle Lookup) AI Chat Bot with Plugins (RAG VectorDB - ChromaDB, DuckDuckGo Search, Home Assistant, Vehicle Lookup, YouTube)
[![Build Status](https://drone.va.reichard.io/api/badges/evan/minyma/status.svg)](https://drone.va.reichard.io/evan/minyma) [![Build Status](https://drone.va.reichard.io/api/badges/evan/minyma/status.svg)](https://drone.va.reichard.io/evan/minyma)
@@ -37,6 +37,20 @@ Assistant: Some common symptoms of COVID-19 mentioned in the context are
"Normalizing & Loading Data" section. We include a PubMed data normalizer as an "Normalizing & Loading Data" section. We include a PubMed data normalizer as an
example. example.
### YouTube
This utilizes `yt-dlp` to download a videos subtitles. Ask questions about YouTube videos!
```
User: Tell me about this youtube video: https://www.youtube.com/watch?v=ZWgr7qP6yhY
Assistant: The YouTube video you provided is a review of the new MacBook Pro by
Apple. The host discusses the laptop's features, including its new
color and chip. They mention that the laptop still retains its ports,
HDMI, and high-quality display, but also notes some shortcomings like
the notch and lack of face unlock. The host shares their impressions
of the new black color [...]
```
### DuckDuckGo ### DuckDuckGo
This utilizes DuckDuckGo Search by scraping the top 5 results. This utilizes DuckDuckGo Search by scraping the top 5 results.

View File

@@ -19,36 +19,17 @@ def get_response():
resp = minyma.oai.query(message) resp = minyma.oai.query(message)
# Derive LLM Data
# llm_resp = resp.get("llm", {})
# llm_choices = llm_resp.get("choices", [])
# Derive VDB Data
# vdb_resp = resp.get("vdb", {})
# combined_context = [{
# "id": vdb_resp.get("ids")[i],
# "distance": vdb_resp.get("distances")[i],
# "doc": vdb_resp.get("docs")[i],
# "metadata": vdb_resp.get("metadatas")[i],
# } for i, _ in enumerate(vdb_resp.get("docs", []))]
# Return Data # Return Data
return resp return resp
""" """
Return the raw vector db related response TODO - Embeds and loads data into the local ChromaDB.
{
"input": "string",
"normalizer": "string",
}
""" """
@bp.route("/related", methods=["POST"]) bp.route("/embed", methods=["POST"])
def get_related(): def post_embeddings():
data = request.get_json() pass
if not data:
return {"error": "Missing Message"}
message = str(data.get("message"))
if message == "":
return {"error": "Empty Message"}
related_documents = minyma.vdb.get_related(message)
return related_documents

View File

@@ -1,11 +1,12 @@
import os import os
def get_env(key, default=None, required=False) -> str: def get_env(key, default=None, required=False) -> str | None:
"""Wrapper for gathering env vars.""" """Wrapper for gathering env vars."""
if required: if required:
assert key in os.environ, "Missing Environment Variable: %s" % key assert key in os.environ, "Missing Environment Variable: %s" % key
return str(os.environ.get(key, default)) env = os.environ.get(key, default)
return str(env) if env is not None else None
class Config: class Config:
@@ -19,7 +20,7 @@ class Config:
OpenAI API Key - Required OpenAI API Key - Required
""" """
CHROMA_DATA_PATH: str = get_env("CHROMA_DATA_PATH", required=False) CHROMA_DATA_PATH: str | None = get_env("CHROMA_DATA_PATH", required=False)
HOME_ASSISTANT_API_KEY: str = get_env("HOME_ASSISTANT_API_KEY", required=False) HOME_ASSISTANT_API_KEY: str | None = get_env("HOME_ASSISTANT_API_KEY", required=False)
HOME_ASSISTANT_URL: str = get_env("HOME_ASSISTANT_URL", required=False) HOME_ASSISTANT_URL: str | None = get_env("HOME_ASSISTANT_URL", required=False)
OPENAI_API_KEY: str = get_env("OPENAI_API_KEY", required=True) OPENAI_API_KEY: str | None = get_env("OPENAI_API_KEY", required=True)

View File

@@ -1,18 +1,20 @@
import json
from textwrap import indent
from dataclasses import dataclass from dataclasses import dataclass
from textwrap import indent
from typing import Any, List from typing import Any, List
import openai import json
import minyma import minyma
import openai
INITIAL_PROMPT_TEMPLATE = """ INITIAL_PROMPT_TEMPLATE = """
You are a helpful assistant. You are connected to various external functions that can provide you with more personalized and up-to-date information and have already been granted the permissions to execute these functions at will. DO NOT say you don't have access to real time information, instead attempt to call one or more of the listed functions: You are connected to various functions that can be used to answer the users questions. Your options are only "functions". Functions should be an array of strings containing the desired function calls (e.g. "function_name()").
Available Functions:
{functions} {functions}
The user will not see your response. You must only respond with a comma separated list of function calls: "FUNCTION_CALLS: function(), function(), etc". It must be prepended by "FUNCTION_CALLS:". You must respond in JSON only with no other fluff or bad things will happen. The JSON keys must ONLY be "functions". Be sure to call the functions with the right arguments.
User Message: {question} User Message: {message}
""" """
FOLLOW_UP_PROMPT_TEMPLATE = """ FOLLOW_UP_PROMPT_TEMPLATE = """
@@ -20,7 +22,7 @@ You are a helpful assistant. This is a follow up message to provide you with mor
{response} {response}
User Message: {question} User Message: {message}
""" """
@dataclass @dataclass
@@ -32,13 +34,15 @@ class ChatCompletion:
choices: List[dict] choices: List[dict]
usage: dict usage: dict
class OpenAIConnector: class OpenAIConnector:
def __init__(self, api_key: str): def __init__(self, api_key: str):
self.model = "gpt-3.5-turbo" self.model = "gpt-3.5-turbo"
self.word_cap = 1000 self.word_cap = 1000
openai.api_key = api_key openai.api_key = api_key
def query(self, question: str) -> Any:
def query(self, message: str) -> Any:
# Track Usage # Track Usage
prompt_tokens = 0 prompt_tokens = 0
completion_tokens = 0 completion_tokens = 0
@@ -48,7 +52,7 @@ class OpenAIConnector:
functions = "\n".join(list(map(lambda x: "- %s" % x["def"], minyma.plugins.plugin_defs().values()))) functions = "\n".join(list(map(lambda x: "- %s" % x["def"], minyma.plugins.plugin_defs().values())))
# Create Initial Prompt # Create Initial Prompt
prompt = INITIAL_PROMPT_TEMPLATE.format(question = question, functions = functions) prompt = INITIAL_PROMPT_TEMPLATE.format(message = message, functions = indent(functions, ' ' * 2))
messages = [{"role": "user", "content": prompt}] messages = [{"role": "user", "content": prompt}]
print("[OpenAIConnector] Running Initial OAI Query") print("[OpenAIConnector] Running Initial OAI Query")
@@ -63,14 +67,7 @@ class OpenAIConnector:
print("[OpenAIConnector] No Results -> TODO", response) print("[OpenAIConnector] No Results -> TODO", response)
content = response.choices[0]["message"]["content"] content = response.choices[0]["message"]["content"]
all_funcs = json.loads(content).get("functions")
# Get Called Functions (TODO - Better Validation -> Failback Prompt?)
all_funcs = list(
map(
lambda x: x.strip() if x.endswith(")") else x.strip() + ")",
content.split("FUNCTION_CALLS:")[1].strip().split("),")
)
)
# Update Usage # Update Usage
prompt_tokens += response.usage.get("prompt_tokens", 0) prompt_tokens += response.usage.get("prompt_tokens", 0)
@@ -79,20 +76,33 @@ class OpenAIConnector:
print("[OpenAIConnector] Completed Initial OAI Query:\n", indent(json.dumps({ "usage": response.usage, "function_calls": all_funcs }, indent=2), ' ' * 2)) print("[OpenAIConnector] Completed Initial OAI Query:\n", indent(json.dumps({ "usage": response.usage, "function_calls": all_funcs }, indent=2), ' ' * 2))
# Execute Requested Functions # Build Response Text & Metadata
func_responses = {} func_metadata = {}
for func in all_funcs: func_response = []
func_responses[func] = minyma.plugins.execute(func)
# Build Response Text for func in all_funcs:
response_content_arr = [] # Execute Requested Function
for key, val in func_responses.items(): resp = minyma.plugins.execute(func)
indented_val = indent(val, ' ' * 2)
response_content_arr.append("- %s\n%s" % (key, indented_val)) # Unknown Response
response_content = "\n".join(response_content_arr) if resp is None:
print("[OpenAIConnector] Invalid Function Response: %s" % func)
continue
# Get Response
content = resp.get("content")
metadata = resp.get("metadata")
error = resp.get("error")
# Append Responses & Metadata
indented_val = indent(content or error or "Unknown Error", ' ' * 2)
func_response.append("- %s\n%s" % (func, indented_val))
func_metadata[func] = { "metadata": metadata, "error": error }
func_response = "\n".join(func_response)
# Create Follow Up Prompt # Create Follow Up Prompt
prompt = FOLLOW_UP_PROMPT_TEMPLATE.format(question = question, response = response_content) prompt = FOLLOW_UP_PROMPT_TEMPLATE.format(message = message, response = func_response)
messages = [{"role": "user", "content": prompt}] messages = [{"role": "user", "content": prompt}]
print("[OpenAIConnector] Running Follup Up OAI Query") print("[OpenAIConnector] Running Follup Up OAI Query")
@@ -116,7 +126,7 @@ class OpenAIConnector:
# Return Response # Return Response
return { return {
"response": content, "response": content,
"functions": func_responses, "functions": func_metadata,
"usage": { "usage": {
"prompt_tokens": prompt_tokens, "prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens, "completion_tokens": completion_tokens,

View File

@@ -13,8 +13,9 @@ class ChromaDBPlugin(MinymaPlugin):
def __init__(self, config): def __init__(self, config):
self.name = "chroma_db" self.name = "chroma_db"
self.config = config self.config = config
self.word_cap = 1000
if not config.CHROMA_DATA_PATH: if config.CHROMA_DATA_PATH is None:
self.functions = [] self.functions = []
else: else:
self.vdb = ChromaDB(config.CHROMA_DATA_PATH) self.vdb = ChromaDB(config.CHROMA_DATA_PATH)
@@ -25,17 +26,28 @@ class ChromaDBPlugin(MinymaPlugin):
# Get Related # Get Related
related = self.vdb.get_related(collection_name, query) related = self.vdb.get_related(collection_name, query)
# Get Metadata
metadata = [{
"id": related.get("ids")[i],
"distance": related.get("distances")[i],
"metadata": related.get("metadatas")[i],
} for i, _ in enumerate(related.get("docs", []))]
# Normalize Data # Normalize Data
return list( return list(
map( map(
lambda x: " ".join(x.split()[:self.vdb.word_cap]), lambda x: " ".join(x.split()[:self.word_cap]),
related.get("docs", []) related.get("docs", [])
) )
) ), metadata
def lookup_pubmed_data(self, query: str): def lookup_pubmed_data(self, query: str):
COLLECTION_NAME = "pubmed" COLLECTION_NAME = "pubmed"
documents = self.__lookup_data(COLLECTION_NAME, query) documents, metadata = self.__lookup_data(COLLECTION_NAME, query)
context = '\n'.join(documents) context = '\n'.join(documents)
return context return {
"content": context,
"metadata": metadata,
"error": None
}

View File

@@ -14,13 +14,14 @@ class DuckDuckGoPlugin(MinymaPlugin):
def __init__(self, config): def __init__(self, config):
self.config = config self.config = config
self.name = "duck_duck_go" self.name = "duck_duck_go"
self.functions = [self.duck_duck_go_search] self.functions = [self.search_duck_duck_go]
def duck_duck_go_search(self, query: str): def search_duck_duck_go(self, query: str):
"""Search DuckDuckGo""" """Search DuckDuckGo"""
resp = requests.get("https://html.duckduckgo.com/html/?q=%s" % query, headers=HEADERS) resp = requests.get("https://html.duckduckgo.com/html/?q=%s" % query, headers=HEADERS)
soup = BeautifulSoup(resp.text, features="html.parser") soup = BeautifulSoup(resp.text, features="html.parser")
# Get Results
results = [] results = []
for item in soup.select(".result > div"): for item in soup.select(".result > div"):
title_el = item.select_one(".result__title > a") title_el = item.select_one(".result__title > a")
@@ -31,4 +32,18 @@ class DuckDuckGoPlugin(MinymaPlugin):
results.append({"title": title, "description": description}) results.append({"title": title, "description": description})
return json.dumps(results[:5]) # Derive Metadata (Title)
metadata = {
"titles": list(
map(
lambda x: x.get("title"),
results[:5]
)
)
}
return {
"content": json.dumps(results[:5]),
"metadata": metadata,
"error": None
}

View File

@@ -10,17 +10,14 @@ class HomeAssistantPlugin(MinymaPlugin):
def __init__(self, config): def __init__(self, config):
self.config = config self.config = config
self.name = "home_assistant" self.name = "home_assistant"
self.functions = []
if config.HOME_ASSISTANT_API_KEY and config.HOME_ASSISTANT_URL:
if not config.HOME_ASSISTANT_API_KEY or not config.HOME_ASSISTANT_URL:
if not config.HOME_ASSISTANT_API_KEY:
print("[HomeAssistantPlugin] Missing HOME_ASSISTANT_API_KEY")
if not config.HOME_ASSISTANT_URL:
print("[HomeAssistantPlugin] Missing HOME_ASSISTANT_URL")
self.functions = []
else:
self.functions = [self.home_automation_command] self.functions = [self.home_automation_command]
if not config.HOME_ASSISTANT_API_KEY:
print("[HomeAssistantPlugin] Missing HOME_ASSISTANT_API_KEY")
if not config.HOME_ASSISTANT_URL:
print("[HomeAssistantPlugin] Missing HOME_ASSISTANT_URL")
def home_automation_command(self, natural_language_command: str): def home_automation_command(self, natural_language_command: str):
url = urllib.parse.urljoin(self.config.HOME_ASSISTANT_URL, "/api/conversation/process") url = urllib.parse.urljoin(self.config.HOME_ASSISTANT_URL, "/api/conversation/process")
@@ -34,6 +31,17 @@ class HomeAssistantPlugin(MinymaPlugin):
# Parse JSON # Parse JSON
try: try:
return json.dumps(resp.json()) r = resp.json()
text = r["response"]["speech"]["plain"]["speech"]
return {
"content": text,
"metadata": r,
"error": None
}
except requests.JSONDecodeError: except requests.JSONDecodeError:
return json.dumps({ "error": "Command Failed" }) return {
"content": None,
"metadata": None,
"error": "Command Failed"
}

View File

@@ -50,10 +50,11 @@ class VehicleLookupPlugin(MinymaPlugin):
# Invalid JSON # Invalid JSON
if json_resp is None: if json_resp is None:
return json.dumps({ return{
"content": None,
"metadata": text_resp,
"error": error, "error": error,
"response": text_resp, }
})
try: try:
# Check Result # Check Result
@@ -63,7 +64,11 @@ class VehicleLookupPlugin(MinymaPlugin):
error = "No Results" error = "No Results"
else: else:
error = "API Error: %s" % status_resp error = "API Error: %s" % status_resp
return {"error": error, "response": text_resp} return {
"content": None,
"metadata": json_resp,
"error": error,
}
# Parse Result # Parse Result
vehicle_info = json_resp.get("content") vehicle_info = json_resp.get("content")
@@ -74,17 +79,20 @@ class VehicleLookupPlugin(MinymaPlugin):
trim = vehicle_info.get("vehicles")[0].get("trim") trim = vehicle_info.get("vehicles")[0].get("trim")
except Exception as e: except Exception as e:
return json.dumps({ return {
"content": None,
"metadata": text_resp,
"error": "Unknown Error: %s" % e, "error": "Unknown Error: %s" % e,
"response": text_resp, }
})
return json.dumps({ return {
"result": { "content": json.dumps({
"vin": vin, "vin": vin,
"year": year, "year": year,
"make": make, "make": make,
"model": model, "model": model,
"trim": trim, "trim": trim,
}, }),
}) "metadata": json_resp,
"error": None
}

53
minyma/plugins/youtube.py Normal file
View File

@@ -0,0 +1,53 @@
import os
from yt_dlp import YoutubeDL
import xml.etree.ElementTree as ET
from minyma.plugin import MinymaPlugin
class YouTubePlugin(MinymaPlugin):
"""Transcribe YouTube Video"""
def __init__(self, config):
self.config = config
self.name = "youtube"
self.functions = [self.transcribe_youtube]
def transcribe_youtube(self, youtube_video_id: str):
URLS = [youtube_video_id]
vid = YoutubeDL({
"skip_download": True,
"writesubtitles": True,
"writeautomaticsub": True,
"subtitleslangs": ["en"],
"subtitlesformat": "ttml",
"outtmpl": "transcript"
})
vid.download(URLS)
content = self.convert_ttml_to_plain_text("transcript.en.ttml")
os.remove("transcript.en.ttml")
return {
"content": content,
"metadata": URLS,
"error": "TTML Conversion Error" if content is None else None
}
def convert_ttml_to_plain_text(self, ttml_file_path):
try:
# Parse the TTML file
tree = ET.parse(ttml_file_path)
root = tree.getroot()
# Process Text
plain_text = ""
for elem in root.iter():
if elem.text:
plain_text += elem.text + " "
return plain_text.strip()
except ET.ParseError as e:
print("[YouTubePlugin] TTML Conversion Error:", e)
return None

View File

@@ -16,7 +16,8 @@ dependencies = [
"chromadb", "chromadb",
"sqlite-utils", "sqlite-utils",
"click", "click",
"beautifulsoup4" "beautifulsoup4",
"yt-dlp"
] ]
[project.scripts] [project.scripts]