Compare commits
4 Commits
5afd2bb498
...
3168bfffd1
Author | SHA1 | Date | |
---|---|---|---|
3168bfffd1 | |||
7f0d74458d | |||
cf8e023b82 | |||
b82e086cbb |
3
.gitignore
vendored
3
.gitignore
vendored
@ -2,6 +2,9 @@ __pycache__
|
|||||||
.DS_Store
|
.DS_Store
|
||||||
.direnv
|
.direnv
|
||||||
data
|
data
|
||||||
|
datasets
|
||||||
venv
|
venv
|
||||||
openai_key
|
openai_key
|
||||||
|
ha_key
|
||||||
minyma.egg-info/
|
minyma.egg-info/
|
||||||
|
NOTES.md
|
||||||
|
102
README.md
102
README.md
@ -13,22 +13,82 @@
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
AI Chat Bot with Vector / Embedding DB Context
|
AI Chat Bot with Plugins (RAG VectorDB - ChromaDB, DuckDuckGo Search, Home Assistant, Vehicle Lookup)
|
||||||
|
|
||||||
[![Build Status](https://drone.va.reichard.io/api/badges/evan/minyma/status.svg)](https://drone.va.reichard.io/evan/minyma)
|
[![Build Status](https://drone.va.reichard.io/api/badges/evan/minyma/status.svg)](https://drone.va.reichard.io/evan/minyma)
|
||||||
|
|
||||||
|
## Plugins
|
||||||
|
|
||||||
|
### ChromeDB Embeddings / Vectors
|
||||||
|
|
||||||
|
This utilizes a local embeddings DB. This allows you to ask the assistant
|
||||||
|
about local information. [Utilizes Retrieval-Augmented Generation (RAG)](https://arxiv.org/abs/2005.11401).
|
||||||
|
|
||||||
|
```
|
||||||
|
User: What are some common symptoms of COVID-19?
|
||||||
|
Assistant: Some common symptoms of COVID-19 mentioned in the context are
|
||||||
|
fatigue, headache, dyspnea (shortness of breath), anosmia (loss of
|
||||||
|
sense of smell), lower respiratory symptoms, cardiac symptoms,
|
||||||
|
concentration or memory issues, tinnitus and earache, and peripheral
|
||||||
|
neuropathy symptoms.
|
||||||
|
```
|
||||||
|
|
||||||
|
**NOTE:** Instructions on how to load this with your own information are in the
|
||||||
|
"Normalizing & Loading Data" section. We include a PubMed data normalizer as an
|
||||||
|
example.
|
||||||
|
|
||||||
|
### DuckDuckGo
|
||||||
|
|
||||||
|
This utilizes DuckDuckGo Search by scraping the top 5 results.
|
||||||
|
|
||||||
|
```
|
||||||
|
User: Tell me about Evan Reichard
|
||||||
|
Assistant: Evan Reichard is a Principal Detection and Response Engineer based
|
||||||
|
in the Washington DC-Baltimore Area. He has been in this role since
|
||||||
|
August 2022. Evan has created a browser extension that helps SOC
|
||||||
|
analysts and saves them over 300 hours per month. Additionally,
|
||||||
|
there are three professionals named Evan Reichard on LinkedIn and
|
||||||
|
there are also profiles of people named Evan Reichard on Facebook.
|
||||||
|
```
|
||||||
|
|
||||||
|
### Vehicle Lookup API
|
||||||
|
|
||||||
|
This utilizes Carvana's undocumented API to lookup details on a vehicle.
|
||||||
|
|
||||||
|
```
|
||||||
|
User: What vehicle is NY plate HELLO?
|
||||||
|
Assistant: The vehicle corresponding to NY plate HELLO is a 2016 MAZDA CX-5
|
||||||
|
Grand Touring Sport Utility 4D with VIN JM3KE4DY6G0672552.
|
||||||
|
```
|
||||||
|
|
||||||
|
### Home Assistant API
|
||||||
|
|
||||||
|
This utilizes Home Assistants [Conversational API](https://developers.home-assistant.io/docs/intent_conversation_api/).
|
||||||
|
|
||||||
|
```
|
||||||
|
User: Turn off the living room lights
|
||||||
|
Assistant: The living room lights have been turned off. Is there anything else I can assist you with?
|
||||||
|
|
||||||
|
User: Turn on the living room lights
|
||||||
|
Assistant: The living room lights have been turned on successfully.
|
||||||
|
```
|
||||||
|
|
||||||
## Running Server
|
## Running Server
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Locally (See "Development" Section)
|
# Locally (See "Development" Section)
|
||||||
export OPENAI_API_KEY=`cat openai_key`
|
export OPENAI_API_KEY=`cat openai_key`
|
||||||
|
export CHROMA_DATA_PATH=/data
|
||||||
|
export HOME_ASSISTANT_API_KEY=`cat ha_key`
|
||||||
|
export HOME_ASSISTANT_URL=https://some-url.com
|
||||||
|
|
||||||
minyma server run
|
minyma server run
|
||||||
|
|
||||||
# Docker Quick Start
|
# Docker Quick Start
|
||||||
docker run \
|
docker run \
|
||||||
-p 5000:5000 \
|
-p 5000:5000 \
|
||||||
-e OPENAI_API_KEY=`cat openai_key` \
|
-e OPENAI_API_KEY=`cat openai_key` \
|
||||||
-e DATA_PATH=/data \
|
-e CHROMA_DATA_PATH=/data \
|
||||||
-v ./data:/data \
|
-v ./data:/data \
|
||||||
gitea.va.reichard.io/evan/minyma:latest
|
gitea.va.reichard.io/evan/minyma:latest
|
||||||
```
|
```
|
||||||
@ -46,10 +106,10 @@ To normalize data, you can use Minyma's `normalize` CLI command:
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
minyma normalize \
|
minyma normalize \
|
||||||
--filename ./pubmed_manuscripts.jsonl \
|
|
||||||
--normalizer pubmed \
|
--normalizer pubmed \
|
||||||
--database chroma \
|
--database chroma \
|
||||||
--datapath ./chroma
|
--datapath ./data \
|
||||||
|
--filename ./datasets/pubmed_manuscripts.jsonl
|
||||||
```
|
```
|
||||||
|
|
||||||
The above example does the following:
|
The above example does the following:
|
||||||
@ -64,10 +124,12 @@ The above example does the following:
|
|||||||
|
|
||||||
## Configuration
|
## Configuration
|
||||||
|
|
||||||
| Environment Variable | Default Value | Description |
|
| Environment Variable | Default Value | Description |
|
||||||
| -------------------- | ------------- | ---------------------------------------------------------------------------------- |
|
| ---------------------- | ------------- | ----------------------------------- |
|
||||||
| OPENAI_API_KEY | NONE | Required OpenAI API Key for ChatGPT access. |
|
| OPENAI_API_KEY | NONE | Required OpenAI API Key for ChatGPT |
|
||||||
| DATA_PATH | ./data | The path to the data directory. Chroma will store its data in the `chroma` subdir. |
|
| CHROMA_DATA_PATH | NONE | ChromaDB Persistent Data Director |
|
||||||
|
| HOME_ASSISTANT_API_KEY | NONE | Home Assistant API Key |
|
||||||
|
| HOME_ASSISTANT_URL | NONE | Home Assistant Instance URL |
|
||||||
|
|
||||||
# Development
|
# Development
|
||||||
|
|
||||||
@ -79,31 +141,9 @@ python3 -m venv venv
|
|||||||
# Local Development
|
# Local Development
|
||||||
pip install -e .
|
pip install -e .
|
||||||
|
|
||||||
# Creds
|
# Creds & Other Environment Variables
|
||||||
export OPENAI_API_KEY=`cat openai_key`
|
export OPENAI_API_KEY=`cat openai_key`
|
||||||
|
|
||||||
# Docker
|
# Docker
|
||||||
make docker_build_local
|
make docker_build_local
|
||||||
```
|
```
|
||||||
|
|
||||||
# Notes
|
|
||||||
|
|
||||||
This is the first time I'm doing anything LLM related, so it was an adventure.
|
|
||||||
Initially I was entertaining OpenAI's Embedding API with plans to load embeddings
|
|
||||||
into Pinecone, however initial calculations with `tiktoken` showed that generating
|
|
||||||
embeddings would cost roughly $250 USD.
|
|
||||||
|
|
||||||
Fortunately I found [Chroma](https://www.trychroma.com/), which basically solved
|
|
||||||
both of those issues. It allowed me to load in the normalized data and automatically
|
|
||||||
generated embeddings for me.
|
|
||||||
|
|
||||||
In order to fit into OpenAI ChatGPT's token limit, I limited each document to roughly
|
|
||||||
1000 words. I wanted to make sure I could add the top two matches as context while
|
|
||||||
still having enough headroom for the actual question from the user.
|
|
||||||
|
|
||||||
A few notes:
|
|
||||||
|
|
||||||
- Context is not carried over from previous messages
|
|
||||||
- I "stole" the prompt that is used in LangChain (See `oai.py`). I tried some variations without much (subjective) improvement.
|
|
||||||
- A generalized normalizer format. This should make it fairly easy to use completely different data. Just add a new normalizer that implements the super class.
|
|
||||||
- Basic web front end with TailwindCSS
|
|
||||||
|
@ -3,6 +3,7 @@ import click
|
|||||||
import signal
|
import signal
|
||||||
import sys
|
import sys
|
||||||
from importlib.metadata import version
|
from importlib.metadata import version
|
||||||
|
from minyma.plugin import PluginLoader
|
||||||
from minyma.oai import OpenAIConnector
|
from minyma.oai import OpenAIConnector
|
||||||
from minyma.vdb import ChromaDB
|
from minyma.vdb import ChromaDB
|
||||||
from flask import Flask
|
from flask import Flask
|
||||||
@ -15,15 +16,15 @@ def signal_handler(sig, frame):
|
|||||||
|
|
||||||
|
|
||||||
def create_app():
|
def create_app():
|
||||||
global oai, vdb
|
global oai, plugins
|
||||||
|
|
||||||
from minyma.config import Config
|
from minyma.config import Config
|
||||||
import minyma.api.common as api_common
|
import minyma.api.common as api_common
|
||||||
import minyma.api.v1 as api_v1
|
import minyma.api.v1 as api_v1
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
vdb = ChromaDB(path.join(Config.DATA_PATH, "chroma"))
|
oai = OpenAIConnector(Config.OPENAI_API_KEY)
|
||||||
oai = OpenAIConnector(Config.OPENAI_API_KEY, vdb)
|
plugins = PluginLoader(Config)
|
||||||
|
|
||||||
app.register_blueprint(api_common.bp)
|
app.register_blueprint(api_common.bp)
|
||||||
app.register_blueprint(api_v1.bp)
|
app.register_blueprint(api_v1.bp)
|
||||||
@ -68,7 +69,7 @@ def normalize(filename, normalizer, database, datapath):
|
|||||||
return print("INVALID NORMALIZER:", normalizer)
|
return print("INVALID NORMALIZER:", normalizer)
|
||||||
|
|
||||||
# Process Data
|
# Process Data
|
||||||
vdb.load_documents(norm)
|
vdb.load_documents(norm.name, norm)
|
||||||
|
|
||||||
|
|
||||||
signal.signal(signal.SIGINT, signal_handler)
|
signal.signal(signal.SIGINT, signal_handler)
|
||||||
|
@ -20,24 +20,20 @@ def get_response():
|
|||||||
resp = minyma.oai.query(message)
|
resp = minyma.oai.query(message)
|
||||||
|
|
||||||
# Derive LLM Data
|
# Derive LLM Data
|
||||||
llm_resp = resp.get("llm", {})
|
# llm_resp = resp.get("llm", {})
|
||||||
llm_choices = llm_resp.get("choices", [])
|
# llm_choices = llm_resp.get("choices", [])
|
||||||
|
|
||||||
# Derive VDB Data
|
# Derive VDB Data
|
||||||
vdb_resp = resp.get("vdb", {})
|
# vdb_resp = resp.get("vdb", {})
|
||||||
combined_context = [{
|
# combined_context = [{
|
||||||
"id": vdb_resp.get("ids")[i],
|
# "id": vdb_resp.get("ids")[i],
|
||||||
"distance": vdb_resp.get("distances")[i],
|
# "distance": vdb_resp.get("distances")[i],
|
||||||
"doc": vdb_resp.get("docs")[i],
|
# "doc": vdb_resp.get("docs")[i],
|
||||||
"metadata": vdb_resp.get("metadatas")[i],
|
# "metadata": vdb_resp.get("metadatas")[i],
|
||||||
} for i, _ in enumerate(vdb_resp.get("docs", []))]
|
# } for i, _ in enumerate(vdb_resp.get("docs", []))]
|
||||||
|
|
||||||
# Return Data
|
# Return Data
|
||||||
return {
|
return resp
|
||||||
"response": None if len(llm_choices) == 0 else llm_choices[0].get("message", {}).get("content"),
|
|
||||||
"context": combined_context,
|
|
||||||
"usage": llm_resp.get("usage"),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -19,5 +19,7 @@ class Config:
|
|||||||
OpenAI API Key - Required
|
OpenAI API Key - Required
|
||||||
"""
|
"""
|
||||||
|
|
||||||
DATA_PATH: str = get_env("DATA_PATH", default="./data")
|
CHROMA_DATA_PATH: str = get_env("CHROMA_DATA_PATH", required=False)
|
||||||
|
HOME_ASSISTANT_API_KEY: str = get_env("HOME_ASSISTANT_API_KEY", required=False)
|
||||||
|
HOME_ASSISTANT_URL: str = get_env("HOME_ASSISTANT_URL", required=False)
|
||||||
OPENAI_API_KEY: str = get_env("OPENAI_API_KEY", required=True)
|
OPENAI_API_KEY: str = get_env("OPENAI_API_KEY", required=True)
|
||||||
|
@ -18,6 +18,7 @@ class PubMedNormalizer(DataNormalizer):
|
|||||||
normalized inside the iterator.
|
normalized inside the iterator.
|
||||||
"""
|
"""
|
||||||
def __init__(self, file: TextIOWrapper):
|
def __init__(self, file: TextIOWrapper):
|
||||||
|
self.name = "pubmed"
|
||||||
self.file = file
|
self.file = file
|
||||||
self.length = 0
|
self.length = 0
|
||||||
|
|
||||||
|
131
minyma/oai.py
131
minyma/oai.py
@ -1,46 +1,125 @@
|
|||||||
from typing import Any
|
import json
|
||||||
|
from textwrap import indent
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Any, List
|
||||||
import openai
|
import openai
|
||||||
|
import minyma
|
||||||
|
|
||||||
from minyma.vdb import VectorDB
|
INITIAL_PROMPT_TEMPLATE = """
|
||||||
|
You are a helpful assistant. You are connected to various external functions that can provide you with more personalized and up-to-date information and have already been granted the permissions to execute these functions at will. DO NOT say you don't have access to real time information, instead attempt to call one or more of the listed functions:
|
||||||
|
|
||||||
# Stolen LangChain Prompt
|
{functions}
|
||||||
PROMPT_TEMPLATE = """
|
|
||||||
Use the following pieces of context to answer the question at the end.
|
|
||||||
If you don't know the answer, just say that you don't know, don't try to
|
|
||||||
make up an answer.
|
|
||||||
|
|
||||||
{context}
|
The user will not see your response. You must only respond with a comma separated list of function calls: "FUNCTION_CALLS: function(), function(), etc". It must be prepended by "FUNCTION_CALLS:".
|
||||||
|
|
||||||
Question: {question}
|
User Message: {question}
|
||||||
Helpful Answer:
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
FOLLOW_UP_PROMPT_TEMPLATE = """
|
||||||
|
You are a helpful assistant. This is a follow up message to provide you with more context on a previous user request. Only respond to the user using the following information:
|
||||||
|
|
||||||
|
{response}
|
||||||
|
|
||||||
|
User Message: {question}
|
||||||
|
"""
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ChatCompletion:
|
||||||
|
id: str
|
||||||
|
object: str
|
||||||
|
created: int
|
||||||
|
model: str
|
||||||
|
choices: List[dict]
|
||||||
|
usage: dict
|
||||||
|
|
||||||
class OpenAIConnector:
|
class OpenAIConnector:
|
||||||
def __init__(self, api_key: str, vdb: VectorDB):
|
def __init__(self, api_key: str):
|
||||||
self.vdb = vdb
|
|
||||||
self.model = "gpt-3.5-turbo"
|
self.model = "gpt-3.5-turbo"
|
||||||
self.word_cap = 1000
|
self.word_cap = 1000
|
||||||
openai.api_key = api_key
|
openai.api_key = api_key
|
||||||
|
|
||||||
def query(self, question: str) -> Any:
|
def query(self, question: str) -> Any:
|
||||||
# Get related documents from vector db
|
# Track Usage
|
||||||
related = self.vdb.get_related(question)
|
prompt_tokens = 0
|
||||||
|
completion_tokens = 0
|
||||||
|
total_tokens = 0
|
||||||
|
|
||||||
# Validate results
|
# Get Available Functions
|
||||||
all_docs = related.get("docs", [])
|
functions = "\n".join(list(map(lambda x: "- %s" % x["def"], minyma.plugins.plugin_defs().values())))
|
||||||
if len(all_docs) == 0:
|
|
||||||
return { "error": "No Context Found" }
|
|
||||||
|
|
||||||
# Join on new line (cap @ word limit), generate main prompt
|
# Create Initial Prompt
|
||||||
reduced_docs = list(map(lambda x: " ".join(x.split()[:self.word_cap]), all_docs))
|
prompt = INITIAL_PROMPT_TEMPLATE.format(question = question, functions = functions)
|
||||||
context = '\n'.join(reduced_docs)
|
messages = [{"role": "user", "content": prompt}]
|
||||||
prompt = PROMPT_TEMPLATE.format(context = context, question = question)
|
|
||||||
|
|
||||||
# Query OpenAI ChatCompletion
|
print("[OpenAIConnector] Running Initial OAI Query")
|
||||||
response = openai.ChatCompletion.create(
|
|
||||||
|
# Run Initial
|
||||||
|
response: ChatCompletion = openai.ChatCompletion.create( # type: ignore
|
||||||
model=self.model,
|
model=self.model,
|
||||||
messages=[{"role": "user", "content": prompt}]
|
messages=messages
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if len(response.choices) == 0:
|
||||||
|
print("[OpenAIConnector] No Results -> TODO", response)
|
||||||
|
|
||||||
|
content = response.choices[0]["message"]["content"]
|
||||||
|
|
||||||
|
# Get Called Functions (TODO - Better Validation -> Failback Prompt?)
|
||||||
|
all_funcs = list(
|
||||||
|
map(
|
||||||
|
lambda x: x.strip() if x.endswith(")") else x.strip() + ")",
|
||||||
|
content.split("FUNCTION_CALLS:")[1].strip().split("),")
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Update Usage
|
||||||
|
prompt_tokens += response.usage.get("prompt_tokens", 0)
|
||||||
|
completion_tokens += response.usage.get("completion_tokens", 0)
|
||||||
|
total_tokens += response.usage.get("prompt_tokens", 0)
|
||||||
|
|
||||||
|
print("[OpenAIConnector] Completed Initial OAI Query:\n", indent(json.dumps({ "usage": response.usage, "function_calls": all_funcs }, indent=2), ' ' * 2))
|
||||||
|
|
||||||
|
# Execute Requested Functions
|
||||||
|
func_responses = {}
|
||||||
|
for func in all_funcs:
|
||||||
|
func_responses[func] = minyma.plugins.execute(func)
|
||||||
|
|
||||||
|
# Build Response Text
|
||||||
|
response_content_arr = []
|
||||||
|
for key, val in func_responses.items():
|
||||||
|
indented_val = indent(val, ' ' * 2)
|
||||||
|
response_content_arr.append("- %s\n%s" % (key, indented_val))
|
||||||
|
response_content = "\n".join(response_content_arr)
|
||||||
|
|
||||||
|
# Create Follow Up Prompt
|
||||||
|
prompt = FOLLOW_UP_PROMPT_TEMPLATE.format(question = question, response = response_content)
|
||||||
|
messages = [{"role": "user", "content": prompt}]
|
||||||
|
|
||||||
|
print("[OpenAIConnector] Running Follup Up OAI Query")
|
||||||
|
|
||||||
|
# Run Follow Up
|
||||||
|
response: ChatCompletion = openai.ChatCompletion.create( # type: ignore
|
||||||
|
model=self.model,
|
||||||
|
messages=messages
|
||||||
|
)
|
||||||
|
|
||||||
|
# Update Usage
|
||||||
|
prompt_tokens += response.usage.get("prompt_tokens", 0)
|
||||||
|
completion_tokens += response.usage.get("completion_tokens", 0)
|
||||||
|
total_tokens += response.usage.get("prompt_tokens", 0)
|
||||||
|
|
||||||
|
print("[OpenAIConnector] Completed Follup Up OAI Query:\n", indent(json.dumps({ "usage": response.usage }, indent=2), ' ' * 2))
|
||||||
|
|
||||||
|
# Get Content
|
||||||
|
content = response.choices[0]["message"]["content"]
|
||||||
|
|
||||||
# Return Response
|
# Return Response
|
||||||
return { "llm": response, "vdb": related }
|
return {
|
||||||
|
"response": content,
|
||||||
|
"functions": func_responses,
|
||||||
|
"usage": {
|
||||||
|
"prompt_tokens": prompt_tokens,
|
||||||
|
"completion_tokens": completion_tokens,
|
||||||
|
"total_tokens": total_tokens
|
||||||
|
}
|
||||||
|
}
|
||||||
|
98
minyma/plugin.py
Normal file
98
minyma/plugin.py
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
import re
|
||||||
|
import inspect
|
||||||
|
import os
|
||||||
|
import importlib.util
|
||||||
|
|
||||||
|
class MinymaPlugin:
|
||||||
|
pass
|
||||||
|
|
||||||
|
class PluginLoader:
|
||||||
|
def __init__(self, config):
|
||||||
|
self.config = config
|
||||||
|
self.plugins = self.get_plugins()
|
||||||
|
self.definitions = self.plugin_defs()
|
||||||
|
|
||||||
|
|
||||||
|
def execute(self, func_cmd):
|
||||||
|
print("[PluginLoader] Execute Function:", func_cmd)
|
||||||
|
|
||||||
|
pattern = r'([a-z_]+)\('
|
||||||
|
|
||||||
|
func_name_search = re.search(pattern, func_cmd)
|
||||||
|
if not func_name_search:
|
||||||
|
return
|
||||||
|
|
||||||
|
func_name = func_name_search.group(1)
|
||||||
|
|
||||||
|
# Not Safe
|
||||||
|
if func_name in self.definitions:
|
||||||
|
args = re.sub(pattern, '(', func_cmd)
|
||||||
|
func = self.definitions[func_name]["func"]
|
||||||
|
return eval("func%s" % args)
|
||||||
|
|
||||||
|
|
||||||
|
def plugin_defs(self):
|
||||||
|
defs = {}
|
||||||
|
for plugin in self.plugins:
|
||||||
|
plugin_name = plugin.name
|
||||||
|
|
||||||
|
for func_obj in plugin.functions:
|
||||||
|
func_name = func_obj.__name__
|
||||||
|
|
||||||
|
signature = inspect.signature(func_obj)
|
||||||
|
params = list(
|
||||||
|
map(
|
||||||
|
lambda x: "%s: %s" % (x.name, x.annotation.__name__),
|
||||||
|
signature.parameters.values()
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if func_name in defs:
|
||||||
|
print("[PluginLoader] Error: Duplicate Function: (%s) %s" % (plugin_name, func_name))
|
||||||
|
continue
|
||||||
|
|
||||||
|
func_def = "%s(%s)" % (func_name, ", ".join(params))
|
||||||
|
defs[func_name] = { "func": func_obj, "def": func_def }
|
||||||
|
|
||||||
|
return defs
|
||||||
|
|
||||||
|
|
||||||
|
def get_plugins(self):
|
||||||
|
"""Dynamically load plugins"""
|
||||||
|
# Derive Plugin Folder
|
||||||
|
loader_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
plugin_folder = os.path.join(loader_dir, "plugins")
|
||||||
|
|
||||||
|
# Find Minyma Plugins
|
||||||
|
plugin_classes = []
|
||||||
|
for filename in os.listdir(plugin_folder):
|
||||||
|
|
||||||
|
# Exclude Files
|
||||||
|
if not filename.endswith(".py") or filename == "__init__.py":
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Derive Module Path
|
||||||
|
module_name = os.path.splitext(filename)[0]
|
||||||
|
module_path = os.path.join(plugin_folder, filename)
|
||||||
|
|
||||||
|
# Load Module Dynamically
|
||||||
|
spec = importlib.util.spec_from_file_location(module_name, module_path)
|
||||||
|
if spec is None or spec.loader is None:
|
||||||
|
raise ImportError("Unable to dynamically load plugin - %s" % filename)
|
||||||
|
|
||||||
|
# Load & Exec Module
|
||||||
|
module = importlib.util.module_from_spec(spec)
|
||||||
|
spec.loader.exec_module(module)
|
||||||
|
|
||||||
|
# Only Process MinymaPlugin SubClasses
|
||||||
|
for _, member in inspect.getmembers(module):
|
||||||
|
if inspect.isclass(member) and issubclass(member, MinymaPlugin) and member != MinymaPlugin:
|
||||||
|
plugin_classes.append(member)
|
||||||
|
|
||||||
|
# Instantiate Plugins
|
||||||
|
plugins = []
|
||||||
|
for cls in plugin_classes:
|
||||||
|
instance = cls(self.config)
|
||||||
|
print("[PluginLoader] %s - Loaded: %d Feature(s)" % (cls.__name__, len(instance.functions)))
|
||||||
|
plugins.append(instance)
|
||||||
|
return plugins
|
3
minyma/plugins/README.md
Normal file
3
minyma/plugins/README.md
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
# Plugins
|
||||||
|
|
||||||
|
These are plugins that provide OpenAI with functions. Each plugin can define multiple plugins. The plugin loader will automatically derive the function definition. Each function will have the plugin name prepended.
|
41
minyma/plugins/chroma_db.py
Normal file
41
minyma/plugins/chroma_db.py
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
from textwrap import indent
|
||||||
|
from minyma.plugin import MinymaPlugin
|
||||||
|
from minyma.vdb import ChromaDB
|
||||||
|
|
||||||
|
|
||||||
|
class ChromaDBPlugin(MinymaPlugin):
|
||||||
|
"""Perform Local VectorDB Lookup
|
||||||
|
|
||||||
|
ChromDB can access multiple "collections". You can add additional functions
|
||||||
|
here that just access a different collection (i.e. different data)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, config):
|
||||||
|
self.name = "chroma_db"
|
||||||
|
self.config = config
|
||||||
|
|
||||||
|
if not config.CHROMA_DATA_PATH:
|
||||||
|
self.functions = []
|
||||||
|
else:
|
||||||
|
self.vdb = ChromaDB(config.CHROMA_DATA_PATH)
|
||||||
|
self.functions = [self.lookup_pubmed_data]
|
||||||
|
|
||||||
|
|
||||||
|
def __lookup_data(self, collection_name: str, query: str):
|
||||||
|
# Get Related
|
||||||
|
related = self.vdb.get_related(collection_name, query)
|
||||||
|
|
||||||
|
# Normalize Data
|
||||||
|
return list(
|
||||||
|
map(
|
||||||
|
lambda x: " ".join(x.split()[:self.vdb.word_cap]),
|
||||||
|
related.get("docs", [])
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def lookup_pubmed_data(self, query: str):
|
||||||
|
COLLECTION_NAME = "pubmed"
|
||||||
|
documents = self.__lookup_data(COLLECTION_NAME, query)
|
||||||
|
context = '\n'.join(documents)
|
||||||
|
return context
|
34
minyma/plugins/duckduckgo.py
Normal file
34
minyma/plugins/duckduckgo.py
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
import json
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from minyma.plugin import MinymaPlugin
|
||||||
|
|
||||||
|
HEADERS = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:105.0)"
|
||||||
|
" Gecko/20100101 Firefox/105.0",
|
||||||
|
}
|
||||||
|
|
||||||
|
class DuckDuckGoPlugin(MinymaPlugin):
|
||||||
|
"""Search DuckDuckGo"""
|
||||||
|
|
||||||
|
def __init__(self, config):
|
||||||
|
self.config = config
|
||||||
|
self.name = "duck_duck_go"
|
||||||
|
self.functions = [self.duck_duck_go_search]
|
||||||
|
|
||||||
|
def duck_duck_go_search(self, query: str):
|
||||||
|
"""Search DuckDuckGo"""
|
||||||
|
resp = requests.get("https://html.duckduckgo.com/html/?q=%s" % query, headers=HEADERS)
|
||||||
|
soup = BeautifulSoup(resp.text, features="html.parser")
|
||||||
|
|
||||||
|
results = []
|
||||||
|
for item in soup.select(".result > div"):
|
||||||
|
title_el = item.select_one(".result__title > a")
|
||||||
|
title = title_el.text.strip() if title_el and title_el.text is not None else ""
|
||||||
|
|
||||||
|
description_el = item.select_one(".result__snippet")
|
||||||
|
description = description_el.text.strip() if description_el and description_el.text is not None else ""
|
||||||
|
|
||||||
|
results.append({"title": title, "description": description})
|
||||||
|
|
||||||
|
return json.dumps(results[:5])
|
39
minyma/plugins/home_assistant.py
Normal file
39
minyma/plugins/home_assistant.py
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
import json
|
||||||
|
import urllib.parse
|
||||||
|
import requests
|
||||||
|
from minyma.plugin import MinymaPlugin
|
||||||
|
|
||||||
|
|
||||||
|
class HomeAssistantPlugin(MinymaPlugin):
|
||||||
|
"""Perform Home Assistant Command"""
|
||||||
|
|
||||||
|
def __init__(self, config):
|
||||||
|
self.config = config
|
||||||
|
self.name = "home_assistant"
|
||||||
|
|
||||||
|
|
||||||
|
if not config.HOME_ASSISTANT_API_KEY or not config.HOME_ASSISTANT_URL:
|
||||||
|
if not config.HOME_ASSISTANT_API_KEY:
|
||||||
|
print("[HomeAssistantPlugin] Missing HOME_ASSISTANT_API_KEY")
|
||||||
|
if not config.HOME_ASSISTANT_URL:
|
||||||
|
print("[HomeAssistantPlugin] Missing HOME_ASSISTANT_URL")
|
||||||
|
|
||||||
|
self.functions = []
|
||||||
|
else:
|
||||||
|
self.functions = [self.home_automation_command]
|
||||||
|
|
||||||
|
def home_automation_command(self, natural_language_command: str):
|
||||||
|
url = urllib.parse.urljoin(self.config.HOME_ASSISTANT_URL, "/api/conversation/process")
|
||||||
|
headers = {
|
||||||
|
"Authorization": "Bearer %s" % self.config.HOME_ASSISTANT_API_KEY,
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
}
|
||||||
|
|
||||||
|
data = {"text": natural_language_command, "language": "en"}
|
||||||
|
resp = requests.post(url, json=data, headers=headers)
|
||||||
|
|
||||||
|
# Parse JSON
|
||||||
|
try:
|
||||||
|
return json.dumps(resp.json())
|
||||||
|
except requests.JSONDecodeError:
|
||||||
|
return json.dumps({ "error": "Command Failed" })
|
90
minyma/plugins/vehicle_lookup.py
Normal file
90
minyma/plugins/vehicle_lookup.py
Normal file
@ -0,0 +1,90 @@
|
|||||||
|
import json
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from minyma.plugin import MinymaPlugin
|
||||||
|
|
||||||
|
HEADERS = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:105.0)"
|
||||||
|
" Gecko/20100101 Firefox/105.0",
|
||||||
|
}
|
||||||
|
|
||||||
|
class VehicleLookupPlugin(MinymaPlugin):
|
||||||
|
"""Search Vehicle Information"""
|
||||||
|
|
||||||
|
def __init__(self, config):
|
||||||
|
self.config = config
|
||||||
|
self.name = "vehicle_state_plate"
|
||||||
|
self.functions = [self.lookup_vehicle_by_state_plate]
|
||||||
|
|
||||||
|
def __query_api(self, url, json=None, headers=None):
|
||||||
|
# Perform Request
|
||||||
|
if json is not None:
|
||||||
|
resp = requests.post(url, json=json, headers=headers)
|
||||||
|
else:
|
||||||
|
resp = requests.get(url, headers=headers)
|
||||||
|
|
||||||
|
# Parse Text
|
||||||
|
text = resp.text.strip()
|
||||||
|
|
||||||
|
# Parse JSON
|
||||||
|
try:
|
||||||
|
json = resp.json()
|
||||||
|
return json, text, None
|
||||||
|
except requests.JSONDecodeError:
|
||||||
|
error = None
|
||||||
|
if resp.status_code != 200:
|
||||||
|
error = "Invalid HTTP Response: %s" % resp.status_code
|
||||||
|
else:
|
||||||
|
error = "Invalid JSON"
|
||||||
|
return None, text, error
|
||||||
|
|
||||||
|
|
||||||
|
def lookup_vehicle_by_state_plate(self, state_abbreviation: str, licence_plate: str):
|
||||||
|
CARVANA_URL = (
|
||||||
|
"https://apim.carvana.io/trades/api/v5/vehicleconfiguration/plateLookup/%s/%s"
|
||||||
|
% (state_abbreviation, licence_plate)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Query API
|
||||||
|
json_resp, text_resp, error = self.__query_api(CARVANA_URL, headers=HEADERS)
|
||||||
|
|
||||||
|
# Invalid JSON
|
||||||
|
if json_resp is None:
|
||||||
|
return json.dumps({
|
||||||
|
"error": error,
|
||||||
|
"response": text_resp,
|
||||||
|
})
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Check Result
|
||||||
|
status_resp = json_resp.get("status", "Unknown")
|
||||||
|
if status_resp != "Succeeded":
|
||||||
|
if status_resp == "MissingResource":
|
||||||
|
error = "No Results"
|
||||||
|
else:
|
||||||
|
error = "API Error: %s" % status_resp
|
||||||
|
return {"error": error, "response": text_resp}
|
||||||
|
|
||||||
|
# Parse Result
|
||||||
|
vehicle_info = json_resp.get("content")
|
||||||
|
vin = vehicle_info.get("vin")
|
||||||
|
year = vehicle_info.get("vehicles")[0].get("year")
|
||||||
|
make = vehicle_info.get("vehicles")[0].get("make")
|
||||||
|
model = vehicle_info.get("vehicles")[0].get("model")
|
||||||
|
trim = vehicle_info.get("vehicles")[0].get("trim")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return json.dumps({
|
||||||
|
"error": "Unknown Error: %s" % e,
|
||||||
|
"response": text_resp,
|
||||||
|
})
|
||||||
|
|
||||||
|
return json.dumps({
|
||||||
|
"result": {
|
||||||
|
"vin": vin,
|
||||||
|
"year": year,
|
||||||
|
"make": make,
|
||||||
|
"model": model,
|
||||||
|
"trim": trim,
|
||||||
|
},
|
||||||
|
})
|
@ -163,7 +163,8 @@
|
|||||||
let responseEl = document.createElement("p");
|
let responseEl = document.createElement("p");
|
||||||
responseEl.setAttribute(
|
responseEl.setAttribute(
|
||||||
"class",
|
"class",
|
||||||
"whitespace-break-spaces border-b pb-3 mb-3"
|
"whitespace-break-spaces"
|
||||||
|
// "whitespace-break-spaces border-b pb-3 mb-3"
|
||||||
);
|
);
|
||||||
responseEl.innerText = data.response;
|
responseEl.innerText = data.response;
|
||||||
|
|
||||||
@ -174,6 +175,7 @@
|
|||||||
<ul class="list-disc ml-6"></ul>`;
|
<ul class="list-disc ml-6"></ul>`;
|
||||||
let ulEl = contextEl.querySelector("ul");
|
let ulEl = contextEl.querySelector("ul");
|
||||||
|
|
||||||
|
/*
|
||||||
// Create Context Links
|
// Create Context Links
|
||||||
data.context
|
data.context
|
||||||
|
|
||||||
@ -209,12 +211,13 @@
|
|||||||
newEl.append(linkEl);
|
newEl.append(linkEl);
|
||||||
ulEl.append(newEl);
|
ulEl.append(newEl);
|
||||||
});
|
});
|
||||||
|
*/
|
||||||
|
|
||||||
// Add to DOM
|
// Add to DOM
|
||||||
content.setAttribute("class", "w-full");
|
content.setAttribute("class", "w-full");
|
||||||
content.innerHTML = "";
|
content.innerHTML = "";
|
||||||
content.append(responseEl);
|
content.append(responseEl);
|
||||||
content.append(contextEl);
|
// content.append(contextEl);
|
||||||
})
|
})
|
||||||
.catch((e) => {
|
.catch((e) => {
|
||||||
console.log("ERROR:", e);
|
console.log("ERROR:", e);
|
||||||
|
@ -18,11 +18,11 @@ def chunk(iterable, chunk_size: int):
|
|||||||
VectorDB Interface
|
VectorDB Interface
|
||||||
"""
|
"""
|
||||||
class VectorDB:
|
class VectorDB:
|
||||||
def load_documents(self, normalizer: DataNormalizer):
|
def load_documents(self, name: str, normalizer: DataNormalizer, chunk_size: int = 10):
|
||||||
pass
|
raise NotImplementedError("VectorDB must implement load_documents")
|
||||||
|
|
||||||
def get_related(self, question: str) -> Any:
|
def get_related(self, name: str, question: str) -> Any:
|
||||||
pass
|
raise NotImplementedError("VectorDB must implement get_related")
|
||||||
|
|
||||||
"""
|
"""
|
||||||
ChromaDV VectorDB Type
|
ChromaDV VectorDB Type
|
||||||
@ -31,12 +31,13 @@ class ChromaDB(VectorDB):
|
|||||||
def __init__(self, path: str):
|
def __init__(self, path: str):
|
||||||
self.client: API = chromadb.PersistentClient(path=path)
|
self.client: API = chromadb.PersistentClient(path=path)
|
||||||
self.word_cap = 2500
|
self.word_cap = 2500
|
||||||
self.collection_name: str = "vdb"
|
|
||||||
self.collection: chromadb.Collection = self.client.create_collection(name=self.collection_name, get_or_create=True)
|
|
||||||
|
|
||||||
def get_related(self, question: str) -> Any:
|
def get_related(self, name: str, question: str) -> Any:
|
||||||
|
# Get or Create Collection
|
||||||
|
collection = chromadb.Collection = self.client.create_collection(name=name, get_or_create=True)
|
||||||
|
|
||||||
"""Returns line separated related docs"""
|
"""Returns line separated related docs"""
|
||||||
results = self.collection.query(
|
results = collection.query(
|
||||||
query_texts=[question.lower()],
|
query_texts=[question.lower()],
|
||||||
n_results=2
|
n_results=2
|
||||||
)
|
)
|
||||||
@ -53,7 +54,11 @@ class ChromaDB(VectorDB):
|
|||||||
"ids": all_ids
|
"ids": all_ids
|
||||||
}
|
}
|
||||||
|
|
||||||
def load_documents(self, normalizer: DataNormalizer, chunk_size: int = 10):
|
def load_documents(self, name: str, normalizer: DataNormalizer, chunk_size: int = 10):
|
||||||
|
# Get or Create Collection
|
||||||
|
collection = chromadb.Collection = self.client.create_collection(name=name, get_or_create=True)
|
||||||
|
|
||||||
|
# Load Items
|
||||||
length = len(normalizer) / chunk_size
|
length = len(normalizer) / chunk_size
|
||||||
for items in tqdm(chunk(normalizer, chunk_size), total=length):
|
for items in tqdm(chunk(normalizer, chunk_size), total=length):
|
||||||
ids = []
|
ids = []
|
||||||
@ -65,7 +70,7 @@ class ChromaDB(VectorDB):
|
|||||||
ids.append(item.get("id"))
|
ids.append(item.get("id"))
|
||||||
metadatas.append(item.get("metadata", {}))
|
metadatas.append(item.get("metadata", {}))
|
||||||
|
|
||||||
self.collection.add(
|
collection.add(
|
||||||
ids=ids,
|
ids=ids,
|
||||||
documents=documents,
|
documents=documents,
|
||||||
metadatas=metadatas,
|
metadatas=metadatas,
|
||||||
|
@ -15,7 +15,8 @@ dependencies = [
|
|||||||
"tqdm",
|
"tqdm",
|
||||||
"chromadb",
|
"chromadb",
|
||||||
"sqlite-utils",
|
"sqlite-utils",
|
||||||
"click"
|
"click",
|
||||||
|
"beautifulsoup4"
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.scripts]
|
[project.scripts]
|
||||||
|
Loading…
Reference in New Issue
Block a user