[add] youtube plugin, [improve] initial prompt (JSON)

[add] better error handling
Merge pull request 'Add Plugins' (#1 ) from function_plugins into master
2023-11-10 09:19:24 -05:00 · 2023-11-08 20:52:29 -05:00 · 2023-11-09 00:31:51 +00:00 · 2023-11-08 18:35:56 -05:00
11 changed files with 199 additions and 96 deletions
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@

 ---

-AI Chat Bot with Plugins (RAG VectorDB - ChromaDB, DuckDuckGo Search, Home Assistant, Vehicle Lookup)
+AI Chat Bot with Plugins (RAG VectorDB - ChromaDB, DuckDuckGo Search, Home Assistant, Vehicle Lookup, YouTube)

 [![Build Status](https://drone.va.reichard.io/api/badges/evan/minyma/status.svg)](https://drone.va.reichard.io/evan/minyma)

@@ -37,6 +37,20 @@ Assistant: Some common symptoms of COVID-19 mentioned in the context are
 "Normalizing & Loading Data" section. We include a PubMed data normalizer as an
 example.

+### YouTube
+
+This utilizes `yt-dlp` to download a videos subtitles. Ask questions about YouTube videos!
+
+```
+User:      Tell me about this youtube video: https://www.youtube.com/watch?v=ZWgr7qP6yhY
+Assistant: The YouTube video you provided is a review of the new MacBook Pro by
+           Apple. The host discusses the laptop's features, including its new
+           color and chip. They mention that the laptop still retains its ports,
+           HDMI, and high-quality display, but also notes some shortcomings like
+           the notch and lack of face unlock. The host shares their impressions
+           of the new black color [...]
+```
+
 ### DuckDuckGo

 This utilizes DuckDuckGo Search by scraping the top 5 results.
--- a/minyma/api/v1.py
+++ b/minyma/api/v1.py
@@ -19,36 +19,17 @@ def get_response():

    resp = minyma.oai.query(message)

-    # Derive LLM Data
-    # llm_resp = resp.get("llm", {})
-    # llm_choices = llm_resp.get("choices", [])
-
-    # Derive VDB Data
-    # vdb_resp = resp.get("vdb", {})
-    # combined_context  = [{
-    #         "id": vdb_resp.get("ids")[i],
-    #         "distance": vdb_resp.get("distances")[i],
-    #         "doc": vdb_resp.get("docs")[i],
-    #         "metadata": vdb_resp.get("metadatas")[i],
-    # } for i, _ in enumerate(vdb_resp.get("docs", []))]
-
    # Return Data
    return resp

-
-
 """
-Return the raw vector db related response
+TODO - Embeds and loads data into the local ChromaDB.
+
+{
+  "input": "string",
+  "normalizer": "string",
+}
 """
-@bp.route("/related", methods=["POST"])
-def get_related():
-    data = request.get_json()
-    if not data:
-        return {"error": "Missing Message"}
-
-    message = str(data.get("message"))
-    if message == "":
-        return {"error": "Empty Message"}
-
-    related_documents = minyma.vdb.get_related(message)
-    return related_documents
+bp.route("/embed", methods=["POST"])
+def post_embeddings():
+    pass
--- a/minyma/config.py
+++ b/minyma/config.py
@@ -1,11 +1,12 @@
 import os


-def get_env(key, default=None, required=False) -> str:
+def get_env(key, default=None, required=False) -> str | None:
    """Wrapper for gathering env vars."""
    if required:
        assert key in os.environ, "Missing Environment Variable: %s" % key
-    return str(os.environ.get(key, default))
+    env = os.environ.get(key, default)
+    return str(env) if env is not None else None


 class Config:
@@ -19,7 +20,7 @@ class Config:
        OpenAI API Key - Required
    """

-    CHROMA_DATA_PATH: str = get_env("CHROMA_DATA_PATH", required=False)
-    HOME_ASSISTANT_API_KEY: str = get_env("HOME_ASSISTANT_API_KEY", required=False)
-    HOME_ASSISTANT_URL: str = get_env("HOME_ASSISTANT_URL", required=False)
-    OPENAI_API_KEY: str = get_env("OPENAI_API_KEY", required=True)
+    CHROMA_DATA_PATH: str | None = get_env("CHROMA_DATA_PATH", required=False)
+    HOME_ASSISTANT_API_KEY: str | None = get_env("HOME_ASSISTANT_API_KEY", required=False)
+    HOME_ASSISTANT_URL: str | None = get_env("HOME_ASSISTANT_URL", required=False)
+    OPENAI_API_KEY: str | None = get_env("OPENAI_API_KEY", required=True)
--- a/minyma/oai.py
+++ b/minyma/oai.py
@@ -1,18 +1,20 @@
-import json
-from textwrap import indent
 from dataclasses import dataclass
+from textwrap import indent
 from typing import Any, List
-import openai
+import json
 import minyma
+import openai

 INITIAL_PROMPT_TEMPLATE = """
-You are a helpful assistant. You are connected to various external functions that can provide you with more personalized and up-to-date information and have already been granted the permissions to execute these functions at will. DO NOT say you don't have access to real time information, instead attempt to call one or more of the listed functions:
+You are connected to various functions that can be used to answer the users questions. Your options are only "functions". Functions should be an array of strings containing the desired function calls (e.g. "function_name()").
+
+Available Functions:

 {functions}

-The user will not see your response. You must only respond with a comma separated list of function calls: "FUNCTION_CALLS: function(), function(), etc". It must be prepended by "FUNCTION_CALLS:".
+You must respond in JSON only with no other fluff or bad things will happen. The JSON keys must ONLY be "functions". Be sure to call the functions with the right arguments.

-User Message: {question}
+User Message: {message}
 """

 FOLLOW_UP_PROMPT_TEMPLATE = """
@@ -20,7 +22,7 @@ You are a helpful assistant. This is a follow up message to provide you with mor

 {response}

-User Message: {question}
+User Message: {message}
 """

@dataclass
@@ -32,13 +34,15 @@ class ChatCompletion:
    choices: List[dict]
    usage: dict

+
 class OpenAIConnector:
    def __init__(self, api_key: str):
        self.model = "gpt-3.5-turbo"
        self.word_cap = 1000
        openai.api_key = api_key

-    def query(self, question: str) -> Any:
+
+    def query(self, message: str) -> Any:
        # Track Usage
        prompt_tokens = 0
        completion_tokens = 0
@@ -48,7 +52,7 @@ class OpenAIConnector:
        functions = "\n".join(list(map(lambda x: "- %s" % x["def"], minyma.plugins.plugin_defs().values())))

        # Create Initial Prompt
-        prompt = INITIAL_PROMPT_TEMPLATE.format(question = question, functions = functions)
+        prompt = INITIAL_PROMPT_TEMPLATE.format(message = message, functions = indent(functions, ' ' * 2))
        messages = [{"role": "user", "content": prompt}]

        print("[OpenAIConnector] Running Initial OAI Query")
@@ -63,14 +67,7 @@ class OpenAIConnector:
            print("[OpenAIConnector] No Results -> TODO", response)

        content = response.choices[0]["message"]["content"]
-
-        # Get Called Functions (TODO - Better Validation -> Failback Prompt?)
-        all_funcs = list(
-            map(
-                lambda x: x.strip() if x.endswith(")") else x.strip() + ")",
-                content.split("FUNCTION_CALLS:")[1].strip().split("),")
-            )
-        )
+        all_funcs = json.loads(content).get("functions")

        # Update Usage
        prompt_tokens += response.usage.get("prompt_tokens", 0)
@@ -79,20 +76,33 @@ class OpenAIConnector:

        print("[OpenAIConnector] Completed Initial OAI Query:\n", indent(json.dumps({ "usage": response.usage, "function_calls": all_funcs }, indent=2), ' ' * 2))

-        # Execute Requested Functions
-        func_responses = {}
-        for func in all_funcs:
-            func_responses[func] = minyma.plugins.execute(func)
+        # Build Response Text & Metadata
+        func_metadata = {}
+        func_response = []

-        # Build Response Text
-        response_content_arr = []
-        for key, val in func_responses.items():
-            indented_val = indent(val, ' ' * 2)
-            response_content_arr.append("- %s\n%s" % (key, indented_val))
-        response_content = "\n".join(response_content_arr)
+        for func in all_funcs:
+            # Execute Requested Function
+            resp = minyma.plugins.execute(func)
+
+            # Unknown Response
+            if resp is None:
+                print("[OpenAIConnector] Invalid Function Response: %s" % func)
+                continue
+
+            # Get Response
+            content = resp.get("content")
+            metadata = resp.get("metadata")
+            error = resp.get("error")
+
+            # Append Responses & Metadata
+            indented_val = indent(content or error or "Unknown Error", ' ' * 2)
+            func_response.append("- %s\n%s" % (func, indented_val))
+            func_metadata[func] = { "metadata": metadata, "error": error }
+
+        func_response = "\n".join(func_response)

        # Create Follow Up Prompt
-        prompt = FOLLOW_UP_PROMPT_TEMPLATE.format(question = question, response = response_content)
+        prompt = FOLLOW_UP_PROMPT_TEMPLATE.format(message = message, response = func_response)
        messages = [{"role": "user", "content": prompt}]

        print("[OpenAIConnector] Running Follup Up OAI Query")
@@ -116,7 +126,7 @@ class OpenAIConnector:
        # Return Response
        return {
            "response": content,
-            "functions": func_responses,
+            "functions": func_metadata,
            "usage": {
                "prompt_tokens": prompt_tokens,
                "completion_tokens": completion_tokens,
--- a/minyma/plugin.py
+++ b/minyma/plugin.py
@@ -48,7 +48,7 @@ class PluginLoader:
                )

                if func_name in defs:
-                    print("[PluginLoader] Error: Duplicate Function : (%s) %s" % (plugin_name, func_name))
+                    print("[PluginLoader] Error: Duplicate Function: (%s) %s" % (plugin_name, func_name))
                    continue

                func_def = "%s(%s)" % (func_name, ", ".join(params))
--- a/minyma/plugins/chroma_db.py
+++ b/minyma/plugins/chroma_db.py
@@ -13,8 +13,9 @@ class ChromaDBPlugin(MinymaPlugin):
    def __init__(self, config):
        self.name = "chroma_db"
        self.config = config
+        self.word_cap = 1000

-        if not config.CHROMA_DATA_PATH:
+        if config.CHROMA_DATA_PATH is None:
            self.functions = []
        else:
            self.vdb = ChromaDB(config.CHROMA_DATA_PATH)
@@ -25,17 +26,28 @@ class ChromaDBPlugin(MinymaPlugin):
        # Get Related
        related = self.vdb.get_related(collection_name, query)

+        # Get Metadata
+        metadata = [{
+                "id": related.get("ids")[i],
+                "distance": related.get("distances")[i],
+                "metadata": related.get("metadatas")[i],
+        } for i, _ in enumerate(related.get("docs", []))]
+
        # Normalize Data
        return list(
            map(
-                lambda x: " ".join(x.split()[:self.vdb.word_cap]),
+                lambda x: " ".join(x.split()[:self.word_cap]),
                related.get("docs", [])
            )
-        )
+        ), metadata


    def lookup_pubmed_data(self, query: str):
        COLLECTION_NAME = "pubmed"
-        documents = self.__lookup_data(COLLECTION_NAME, query)
+        documents, metadata = self.__lookup_data(COLLECTION_NAME, query)
        context = '\n'.join(documents)
-        return context
+        return {
+            "content": context,
+            "metadata": metadata,
+            "error": None
+        }
--- a/minyma/plugins/duckduckgo.py
+++ b/minyma/plugins/duckduckgo.py
@@ -14,13 +14,14 @@ class DuckDuckGoPlugin(MinymaPlugin):
    def __init__(self, config):
        self.config = config
        self.name = "duck_duck_go"
-        self.functions = [self.duck_duck_go_search]
+        self.functions = [self.search_duck_duck_go]

-    def duck_duck_go_search(self, query: str):
+    def search_duck_duck_go(self, query: str):
        """Search DuckDuckGo"""
        resp = requests.get("https://html.duckduckgo.com/html/?q=%s" % query, headers=HEADERS)
        soup = BeautifulSoup(resp.text, features="html.parser")

+        # Get Results
        results = []
        for item in soup.select(".result > div"):
            title_el = item.select_one(".result__title > a")
@@ -31,4 +32,18 @@ class DuckDuckGoPlugin(MinymaPlugin):

            results.append({"title": title, "description": description})

-        return json.dumps(results[:5])
+        # Derive Metadata (Title)
+        metadata = {
+            "titles": list(
+                map(
+                    lambda x: x.get("title"),
+                    results[:5]
+                )
+            )
+        }
+
+        return {
+            "content": json.dumps(results[:5]),
+            "metadata": metadata,
+            "error": None
+        }
--- a/minyma/plugins/home_assistant.py
+++ b/minyma/plugins/home_assistant.py
@@ -10,17 +10,14 @@ class HomeAssistantPlugin(MinymaPlugin):
    def __init__(self, config):
        self.config = config
        self.name = "home_assistant"
+        self.functions = []

-
-        if not config.HOME_ASSISTANT_API_KEY or not config.HOME_ASSISTANT_URL:
-            if not config.HOME_ASSISTANT_API_KEY:
-                print("[HomeAssistantPlugin] Missing HOME_ASSISTANT_API_KEY")
-            if not config.HOME_ASSISTANT_URL:
-                print("[HomeAssistantPlugin] Missing HOME_ASSISTANT_URL")
-
-            self.functions = []
-        else:
+        if config.HOME_ASSISTANT_API_KEY and config.HOME_ASSISTANT_URL:
            self.functions = [self.home_automation_command]
+        if not config.HOME_ASSISTANT_API_KEY:
+            print("[HomeAssistantPlugin] Missing HOME_ASSISTANT_API_KEY")
+        if not config.HOME_ASSISTANT_URL:
+            print("[HomeAssistantPlugin] Missing HOME_ASSISTANT_URL")

    def home_automation_command(self, natural_language_command: str):
        url = urllib.parse.urljoin(self.config.HOME_ASSISTANT_URL, "/api/conversation/process")
@@ -34,6 +31,17 @@ class HomeAssistantPlugin(MinymaPlugin):

        # Parse JSON
        try:
-            return json.dumps(resp.json())
+            r = resp.json()
+            text = r["response"]["speech"]["plain"]["speech"]
+
+            return {
+                "content": text,
+                "metadata": r,
+                "error": None
+            }
        except requests.JSONDecodeError:
-            return json.dumps({ "error": "Command Failed" })
+            return {
+                "content": None,
+                "metadata": None,
+                "error": "Command Failed"
+            }
--- a/minyma/plugins/vehicle_lookup.py
+++ b/minyma/plugins/vehicle_lookup.py
@@ -50,10 +50,11 @@ class VehicleLookupPlugin(MinymaPlugin):

        # Invalid JSON
        if json_resp is None:
-            return json.dumps({
+            return{
+                "content": None,
+                "metadata": text_resp,
                "error": error,
-                "response": text_resp,
-            })
+            }

        try:
            # Check Result
@@ -63,7 +64,11 @@ class VehicleLookupPlugin(MinymaPlugin):
                    error = "No Results"
                else:
                    error = "API Error: %s" % status_resp
-                return {"error": error, "response": text_resp}
+                return {
+                    "content": None,
+                    "metadata": json_resp,
+                    "error": error,
+                }

            # Parse Result
            vehicle_info = json_resp.get("content")
@@ -74,17 +79,20 @@ class VehicleLookupPlugin(MinymaPlugin):
            trim = vehicle_info.get("vehicles")[0].get("trim")

        except Exception as e:
-            return json.dumps({
+            return {
+                "content": None,
+                "metadata": text_resp,
                "error": "Unknown Error: %s" % e,
-                "response": text_resp,
-            })
+            }

-        return json.dumps({
-            "result": {
+        return {
+            "content": json.dumps({
                "vin": vin,
                "year": year,
                "make": make,
                "model": model,
                "trim": trim,
-            },
-        })
+            }),
+            "metadata": json_resp,
+            "error": None
+        }
--- a/minyma/plugins/youtube.py
+++ b/minyma/plugins/youtube.py
@@ -0,0 +1,53 @@
+import os
+from yt_dlp import YoutubeDL
+import xml.etree.ElementTree as ET
+from minyma.plugin import MinymaPlugin
+
+class YouTubePlugin(MinymaPlugin):
+    """Transcribe YouTube Video"""
+
+    def __init__(self, config):
+        self.config = config
+        self.name = "youtube"
+        self.functions = [self.transcribe_youtube]
+
+
+    def transcribe_youtube(self, youtube_video_id: str):
+        URLS = [youtube_video_id]
+
+        vid = YoutubeDL({
+            "skip_download": True,
+            "writesubtitles": True,
+            "writeautomaticsub": True,
+            "subtitleslangs": ["en"],
+            "subtitlesformat": "ttml",
+            "outtmpl": "transcript"
+        })
+
+        vid.download(URLS)
+        content = self.convert_ttml_to_plain_text("transcript.en.ttml")
+        os.remove("transcript.en.ttml")
+
+        return {
+            "content": content,
+            "metadata": URLS,
+            "error": "TTML Conversion Error" if content is None else None
+        }
+
+
+    def convert_ttml_to_plain_text(self, ttml_file_path):
+        try:
+            # Parse the TTML file
+            tree = ET.parse(ttml_file_path)
+            root = tree.getroot()
+
+            # Process Text
+            plain_text = ""
+            for elem in root.iter():
+                if elem.text:
+                    plain_text += elem.text + " "
+
+            return plain_text.strip()
+        except ET.ParseError as e:
+            print("[YouTubePlugin] TTML Conversion Error:", e)
+            return None
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,7 +16,8 @@ dependencies = [
  "chromadb",
  "sqlite-utils",
  "click",
-  "beautifulsoup4"
+  "beautifulsoup4",
+  "yt-dlp"
 ]

 [project.scripts]
Author	SHA1	Message	Date
Evan Reichard	ebfea97af7	[add] youtube plugin, [improve] initial prompt (JSON) All checks were successful continuous-integration/drone/push Build is passing Details	2023-11-10 09:19:24 -05:00
Evan Reichard	ca8c306534	[add] better error handling All checks were successful continuous-integration/drone/push Build is passing Details	2023-11-08 20:52:29 -05:00
evan	3168bfffd1	Merge pull request 'Add Plugins' (#1 ) from function_plugins into master All checks were successful continuous-integration/drone/push Build is passing Details Reviewed-on: #1	2023-11-09 00:31:51 +00:00
Evan Reichard	7f0d74458d	[add] migrate chromadb to plugin All checks were successful continuous-integration/drone/pr Build is passing Details continuous-integration/drone/push Build is passing Details	2023-11-08 18:35:56 -05:00