Initial Commit
This commit is contained in:
42
vreader/__init__.py
Normal file
42
vreader/__init__.py
Normal file
@@ -0,0 +1,42 @@
|
||||
import click
|
||||
import signal
|
||||
import sys
|
||||
from importlib.metadata import version
|
||||
from vreader.oai import OpenAIConnector
|
||||
from vreader.video import VideoManager
|
||||
from flask import Flask
|
||||
from flask.cli import FlaskGroup
|
||||
|
||||
__version__ = version("vreader")
|
||||
|
||||
def signal_handler(sig, frame):
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
def create_app():
|
||||
global oai, vman
|
||||
|
||||
from vreader.config import Config
|
||||
import vreader.api.common as api_common
|
||||
import vreader.api.v1 as api_v1
|
||||
|
||||
app = Flask(__name__)
|
||||
oai = OpenAIConnector(Config.OPENAI_API_KEY)
|
||||
vman = VideoManager()
|
||||
|
||||
app.register_blueprint(api_common.bp)
|
||||
app.register_blueprint(api_v1.bp)
|
||||
|
||||
return app
|
||||
|
||||
|
||||
@click.group()
|
||||
def cli():
|
||||
"""VReader CLI"""
|
||||
|
||||
|
||||
@cli.group(cls=FlaskGroup, create_app=create_app)
|
||||
def server():
|
||||
"""VReader flask server"""
|
||||
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
64
vreader/api/common.py
Normal file
64
vreader/api/common.py
Normal file
@@ -0,0 +1,64 @@
|
||||
from flask import Blueprint
|
||||
from flask import make_response, render_template
|
||||
from html_sanitizer import Sanitizer
|
||||
from markdown import markdown
|
||||
from vreader.config import Config
|
||||
import os
|
||||
|
||||
bp = Blueprint("common", __name__)
|
||||
sanitizer = Sanitizer()
|
||||
|
||||
@bp.route("/", methods=["GET"])
|
||||
def main_entry():
|
||||
|
||||
directory = str(Config.DATA_PATH)
|
||||
|
||||
all_files = os.listdir(directory)
|
||||
markdown_files = [file for file in all_files if file.endswith(".md")]
|
||||
articles = [parse_filename(file) for file in markdown_files]
|
||||
|
||||
return make_response(render_template("index.html", articles=articles))
|
||||
|
||||
@bp.route("/articles/<id>", methods=["GET"])
|
||||
def article_item(id):
|
||||
|
||||
if len(id) != 11:
|
||||
return make_response(render_template("404.html")), 404
|
||||
|
||||
metadata = get_article_metadata(id)
|
||||
if not metadata:
|
||||
return make_response(render_template("404.html")), 404
|
||||
|
||||
try:
|
||||
with open(metadata["filepath"], 'r', encoding='utf-8') as file:
|
||||
article_contents = file.read()
|
||||
|
||||
markdown_html = sanitizer.sanitize(markdown(article_contents))
|
||||
|
||||
return make_response(
|
||||
render_template("article.html", metadata=metadata, markdown_html=markdown_html)
|
||||
)
|
||||
except Exception as _:
|
||||
return make_response(render_template("404.html")), 404
|
||||
|
||||
|
||||
def get_article_metadata(id):
|
||||
directory = str(Config.DATA_PATH)
|
||||
files = os.listdir(directory)
|
||||
for file_name in files:
|
||||
if file_name.startswith(id) and file_name.endswith(".md"):
|
||||
file_path = os.path.join(directory, file_name)
|
||||
metadata = parse_filename(file_name)
|
||||
metadata["filepath"] = file_path
|
||||
return metadata
|
||||
return None
|
||||
|
||||
|
||||
def parse_filename(filename):
|
||||
video_id = filename[:11]
|
||||
title = filename[12:][:-3]
|
||||
|
||||
return {
|
||||
"video_id": video_id,
|
||||
"title": title
|
||||
}
|
||||
78
vreader/api/v1.py
Normal file
78
vreader/api/v1.py
Normal file
@@ -0,0 +1,78 @@
|
||||
import os
|
||||
from os import path
|
||||
from flask import Blueprint, request
|
||||
from vreader.config import Config
|
||||
import vreader
|
||||
|
||||
bp = Blueprint("v1", __name__, url_prefix="/api/v1")
|
||||
|
||||
@bp.route("/articles", methods=["GET"])
|
||||
def articles():
|
||||
directory = str(Config.DATA_PATH)
|
||||
|
||||
all_files = os.listdir(directory)
|
||||
markdown_files = [file for file in all_files if file.endswith(".md")]
|
||||
articles = [parse_filename(file) for file in markdown_files]
|
||||
|
||||
return articles
|
||||
|
||||
@bp.route("/generate", methods=["POST"])
|
||||
def generate():
|
||||
data = request.get_json()
|
||||
if not data:
|
||||
return {"error": "Missing Data"}
|
||||
|
||||
video = str(data.get("video"))
|
||||
if video == "":
|
||||
return {"error": "Missing Data"}
|
||||
|
||||
if len(video) != 11:
|
||||
return {"error": "Invalid VideoID"}
|
||||
|
||||
metadata = get_article_metadata(video)
|
||||
if metadata is not None:
|
||||
return {"video": video}
|
||||
|
||||
context = vreader.vman.transcribe_video(video)
|
||||
if context is None:
|
||||
return {"error": "Unable to Extract Subtitles"}
|
||||
|
||||
resp = vreader.oai.query(context)
|
||||
|
||||
# Get Details
|
||||
directory = str(Config.DATA_PATH)
|
||||
title = resp.get("title")
|
||||
content = resp.get("content")
|
||||
|
||||
# Derive Filename
|
||||
new_title = f"{video}_{title}"
|
||||
file_path = path.join(directory, f"{new_title}.md")
|
||||
|
||||
# Write File
|
||||
file = open(file_path, 'w', encoding='utf-8')
|
||||
file.write(content)
|
||||
file.close()
|
||||
|
||||
return { "title": resp["title"] }
|
||||
|
||||
|
||||
def get_article_metadata(id):
|
||||
directory = str(Config.DATA_PATH)
|
||||
files = os.listdir(directory)
|
||||
for file_name in files:
|
||||
if file_name.startswith(id) and file_name.endswith(".md"):
|
||||
file_path = os.path.join(directory, file_name)
|
||||
metadata = parse_filename(file_name)
|
||||
metadata["filepath"] = file_path
|
||||
return metadata
|
||||
return None
|
||||
|
||||
|
||||
def parse_filename(filename):
|
||||
video_id = filename[:11]
|
||||
title = filename[12:][:-3]
|
||||
|
||||
return {
|
||||
"video_id": video_id,
|
||||
"title": title
|
||||
}
|
||||
24
vreader/config.py
Normal file
24
vreader/config.py
Normal file
@@ -0,0 +1,24 @@
|
||||
import os
|
||||
|
||||
|
||||
def get_env(key, default=None, required=False) -> str | None:
|
||||
"""Wrapper for gathering env vars."""
|
||||
if required:
|
||||
assert key in os.environ, "Missing Environment Variable: %s" % key
|
||||
env = os.environ.get(key, default)
|
||||
return str(env) if env is not None else None
|
||||
|
||||
|
||||
class Config:
|
||||
"""Wrap application configurations
|
||||
|
||||
Attributes
|
||||
----------
|
||||
DATA_PATH : str
|
||||
The path where to store any resources (default: ./)
|
||||
OPENAI_API_KEY : str
|
||||
OpenAI API Key - Required
|
||||
"""
|
||||
|
||||
DATA_PATH: str | None = get_env("DATA_PATH", required=False)
|
||||
OPENAI_API_KEY: str | None = get_env("OPENAI_API_KEY", required=True)
|
||||
67
vreader/oai.py
Normal file
67
vreader/oai.py
Normal file
@@ -0,0 +1,67 @@
|
||||
from dataclasses import dataclass
|
||||
from textwrap import indent
|
||||
from typing import Any, List
|
||||
import json
|
||||
import openai
|
||||
|
||||
INITIAL_PROMPT_TEMPLATE = """
|
||||
The following is a video transcription. Write a fully comprehensive article in markdown appropriately utilizing subsections. Be sure to only use the following transcription to write the article:
|
||||
|
||||
{context}
|
||||
"""
|
||||
|
||||
INITIAL_PROMPT_TEMPLATE_OLD = """
|
||||
The following is a video transcription. Write a comprehensive article in markdown utilizing the following content:
|
||||
|
||||
{context}
|
||||
"""
|
||||
|
||||
@dataclass
|
||||
class ChatCompletion:
|
||||
id: str
|
||||
object: str
|
||||
created: int
|
||||
model: str
|
||||
choices: List[dict]
|
||||
usage: dict
|
||||
|
||||
|
||||
class OpenAIConnector:
|
||||
def __init__(self, api_key: str | None):
|
||||
if api_key is None:
|
||||
raise RuntimeError("OPENAI_API_KEY Required")
|
||||
|
||||
# self.model = "gpt-3.5-turbo-16k"
|
||||
self.model = "gpt-3.5-turbo-1106"
|
||||
self.word_cap = 1000
|
||||
openai.api_key = api_key
|
||||
|
||||
|
||||
def query(self, context: str) -> Any:
|
||||
# Create Initial Prompt
|
||||
prompt = INITIAL_PROMPT_TEMPLATE.format(context = context)
|
||||
messages = [{"role": "user", "content": prompt}]
|
||||
|
||||
print("[OpenAIConnector] Running OAI Query")
|
||||
|
||||
# Article Call
|
||||
response: ChatCompletion = openai.ChatCompletion.create( # type: ignore
|
||||
model=self.model,
|
||||
messages=messages
|
||||
)
|
||||
|
||||
# Markdown Data
|
||||
content = response.choices[0]["message"]["content"]
|
||||
title = self.get_title(content)
|
||||
|
||||
print("[OpenAIConnector] Completed OAI Query:\n", indent(json.dumps({ "usage": response.usage }, indent=2), ' ' * 2))
|
||||
|
||||
# Return Response
|
||||
return { "title": title, "content": content }
|
||||
|
||||
def get_title(self, markdown: str):
|
||||
lines = markdown.split('\n')
|
||||
for line in lines:
|
||||
if line.startswith("# "):
|
||||
return line.strip("# ").strip()
|
||||
return None
|
||||
15
vreader/templates/404.html
Normal file
15
vreader/templates/404.html
Normal file
@@ -0,0 +1,15 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta
|
||||
name="viewport"
|
||||
content="width=device-width, initial-scale=0.9, user-scalable=no, viewport-fit=cover"
|
||||
/>
|
||||
<title>VReader - Article</title>
|
||||
<script src="https://cdn.tailwindcss.com"></script>
|
||||
</head>
|
||||
<body class="bg-slate-200 h-[100dvh] p-5 flex flex-col justify-between">
|
||||
{{ markdown_html|safe }}
|
||||
</body>
|
||||
</html>
|
||||
48
vreader/templates/article.html
Normal file
48
vreader/templates/article.html
Normal file
@@ -0,0 +1,48 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta
|
||||
name="viewport"
|
||||
content="width=device-width, initial-scale=0.9, user-scalable=no, viewport-fit=cover"
|
||||
/>
|
||||
<title>VReader - {{ metadata.title }}</title>
|
||||
<script src="https://cdn.tailwindcss.com"></script>
|
||||
<style>
|
||||
#content {
|
||||
h1 {
|
||||
font-size: 1.75em;
|
||||
font-weight: 400;
|
||||
}
|
||||
h2 {
|
||||
font-size: 1.25em;
|
||||
}
|
||||
p {
|
||||
margin-top: 0.25em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body class="bg-slate-200">
|
||||
<header class="w-screen h-16 bg-slate-300 mb-5">
|
||||
<div
|
||||
class="flex px-2 h-16 w-11/12 md:w-5/6 mx-auto rounded bg-slate-300"
|
||||
>
|
||||
<a class="font-bold flex justify-center items-center" href="/">All Articles</a>
|
||||
</div>
|
||||
</header>
|
||||
<div
|
||||
id="content"
|
||||
class="w-11/12 md:w-5/6 mx-auto rounded px-10 py-5 bg-slate-300"
|
||||
>
|
||||
<div class="flex justify-center pb-5 w-full">
|
||||
<a target="_blank" href="https://www.youtube.com/watch?v={{ metadata.video_id }}">
|
||||
<img class="h-32 rounded" src="https://i.ytimg.com/vi_webp/{{ metadata.video_id }}/maxresdefault.webp"></img>
|
||||
</a>
|
||||
</div>
|
||||
<hr class="border-gray-500 pb-5" />
|
||||
{{ markdown_html|safe }}
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
152
vreader/templates/index.html
Normal file
152
vreader/templates/index.html
Normal file
@@ -0,0 +1,152 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta
|
||||
name="viewport"
|
||||
content="width=device-width, initial-scale=0.9, user-scalable=no, viewport-fit=cover"
|
||||
/>
|
||||
<title>VReader - Home</title>
|
||||
<script src="https://cdn.tailwindcss.com"></script>
|
||||
</head>
|
||||
<body class="bg-slate-200">
|
||||
<header class="w-screen h-16 bg-slate-300 mb-5">
|
||||
<div
|
||||
class="flex px-2 h-16 w-11/12 md:w-5/6 mx-auto rounded bg-slate-300"
|
||||
>
|
||||
<span class="font-bold flex justify-center items-center">VReader</span>
|
||||
</div>
|
||||
</header>
|
||||
|
||||
<main class="flex flex-col gap-4">
|
||||
<div id="submit"
|
||||
class="flex gap-4 items-center text-lg w-11/12 md:w-4/6 mx-auto rounded px-6 py-3 bg-slate-300"
|
||||
>
|
||||
<input type="text" placeholder="YouTube URL" class="w-full p-2 bg-gray-700 text-white">
|
||||
<button class="p-2 bg-gray-500 text-gray-800 hover:bg-gray-100" type="submit">Generate</button>
|
||||
</div>
|
||||
|
||||
{% for article in articles %}
|
||||
<a
|
||||
href="/articles/{{ article.video_id }}"
|
||||
class="flex items-center text-lg w-11/12 md:w-4/6 mx-auto rounded px-6 py-3 bg-slate-300 hover:bg-slate-400 transition-all duration-200"
|
||||
>
|
||||
<img class="h-14 md:h-24 mr-6 rounded" src="https://i.ytimg.com/vi_webp/{{ article.video_id }}/maxresdefault.webp"></img>
|
||||
<span>{{ article.title }}</span>
|
||||
</a>
|
||||
{% endfor %}
|
||||
</main>
|
||||
<script>
|
||||
const LOADING_SVG = `<svg
|
||||
class="w-full"
|
||||
width="24"
|
||||
height="24"
|
||||
viewBox="0 0 24 24"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
fill="currentColor"
|
||||
>
|
||||
<style>
|
||||
.spinner_qM83 {
|
||||
animation: spinner_8HQG 1.05s infinite;
|
||||
}
|
||||
.spinner_oXPr {
|
||||
animation-delay: 0.1s;
|
||||
}
|
||||
.spinner_ZTLf {
|
||||
animation-delay: 0.2s;
|
||||
}
|
||||
@keyframes spinner_8HQG {
|
||||
0%,
|
||||
57.14% {
|
||||
animation-timing-function: cubic-bezier(0.33, 0.66, 0.66, 1);
|
||||
transform: translate(0);
|
||||
}
|
||||
28.57% {
|
||||
animation-timing-function: cubic-bezier(0.33, 0, 0.66, 0.33);
|
||||
transform: translateY(-6px);
|
||||
}
|
||||
100% {
|
||||
transform: translate(0);
|
||||
}
|
||||
}
|
||||
</style>
|
||||
<circle class="spinner_qM83" cx="4" cy="12" r="3"></circle>
|
||||
<circle class="spinner_qM83 spinner_oXPr" cx="12" cy="12" r="3"></circle>
|
||||
<circle class="spinner_qM83 spinner_ZTLf" cx="20" cy="12" r="3"></circle>
|
||||
</svg>`;
|
||||
|
||||
/**
|
||||
* Wrapper API Call
|
||||
**/
|
||||
function apiCall(data) {
|
||||
let fetchObj = {
|
||||
method: data.method || "GET",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
};
|
||||
|
||||
if (fetchObj.method == "POST")
|
||||
fetchObj.body = JSON.stringify(data.data || {});
|
||||
|
||||
return fetch(data.url, fetchObj).then((resp) => resp.json());
|
||||
}
|
||||
|
||||
function getVideoArticle(videoID) {
|
||||
return apiCall({
|
||||
url: "/api/v1/generate",
|
||||
method: "POST",
|
||||
data: { video: videoID },
|
||||
});
|
||||
}
|
||||
|
||||
function generateAction(){
|
||||
let inputEl = document.querySelector("input");
|
||||
let inputVal = inputEl.value;
|
||||
let videoID = getYouTubeVideoId(inputVal);
|
||||
if (!videoID) return alert("Invalid URL")
|
||||
|
||||
// Loading
|
||||
let submitEl = document.querySelector("#submit");
|
||||
let oldHTML = submitEl.innerHTML;
|
||||
submitEl.innerHTML = LOADING_SVG;
|
||||
|
||||
// Do API Call
|
||||
apiCall({
|
||||
url: "/api/v1/generate",
|
||||
method: "POST",
|
||||
data: { video: videoID },
|
||||
}).then((resp) => {
|
||||
if ("error" in resp) throw new Error(resp.error);
|
||||
window.location.href = "/articles/" + videoID;
|
||||
}).catch(e => {
|
||||
console.log(e);
|
||||
alert(e.message);
|
||||
submitEl.innerHTML = oldHTML;
|
||||
});
|
||||
}
|
||||
|
||||
function initListeners(){
|
||||
let buttonEl = document.querySelector("button");
|
||||
let inputEl = document.querySelector("input");
|
||||
buttonEl.addEventListener("click", generateAction);
|
||||
inputEl.addEventListener("keydown", function(event) {
|
||||
if (event.keyCode !== 13) return;
|
||||
generateAction();
|
||||
});
|
||||
}
|
||||
|
||||
function getYouTubeVideoId(url) {
|
||||
var regExp = /^.*(?:youtu.be\/|v\/|u\/\w\/|embed\/|watch\?v=|\&v=)([^#\&\?]*).*/;
|
||||
var match = url.match(regExp);
|
||||
if (match && match[1]) {
|
||||
return match[1];
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
initListeners();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
42
vreader/video.py
Normal file
42
vreader/video.py
Normal file
@@ -0,0 +1,42 @@
|
||||
import os
|
||||
from yt_dlp import YoutubeDL
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
class VideoManager():
|
||||
"""Transcribe Videos"""
|
||||
|
||||
def transcribe_video(self, video_id: str):
|
||||
URLS = [video_id]
|
||||
|
||||
vid = YoutubeDL({
|
||||
"skip_download": True,
|
||||
"writesubtitles": True,
|
||||
"writeautomaticsub": True,
|
||||
"subtitleslangs": ["en"],
|
||||
"subtitlesformat": "ttml",
|
||||
"outtmpl": "transcript"
|
||||
})
|
||||
|
||||
vid.download(URLS)
|
||||
content = self.convert_ttml_to_plain_text("transcript.en.ttml")
|
||||
os.remove("transcript.en.ttml")
|
||||
|
||||
return content
|
||||
|
||||
|
||||
def convert_ttml_to_plain_text(self, ttml_file_path):
|
||||
try:
|
||||
# Parse the TTML file
|
||||
tree = ET.parse(ttml_file_path)
|
||||
root = tree.getroot()
|
||||
|
||||
# Process Text
|
||||
plain_text = ""
|
||||
for elem in root.iter():
|
||||
if elem.text:
|
||||
plain_text += elem.text + " "
|
||||
|
||||
return plain_text.strip()
|
||||
except ET.ParseError as e:
|
||||
print("[VideoManager] TTML Conversion Error:", e)
|
||||
return None
|
||||
Reference in New Issue
Block a user