Initial Commit
This commit is contained in:
		
						commit
						f862e1b8bb
					
				
							
								
								
									
										6
									
								
								.dockerignore
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										6
									
								
								.dockerignore
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,6 @@ | |||||||
|  | __pycache__ | ||||||
|  | .direnv | ||||||
|  | data | ||||||
|  | venv | ||||||
|  | openai_key | ||||||
|  | minyma.egg-info/ | ||||||
							
								
								
									
										6
									
								
								.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										6
									
								
								.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,6 @@ | |||||||
|  | __pycache__ | ||||||
|  | .direnv | ||||||
|  | data | ||||||
|  | venv | ||||||
|  | openai_key | ||||||
|  | minyma.egg-info/ | ||||||
							
								
								
									
										22
									
								
								.pre-commit-config.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								.pre-commit-config.yaml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,22 @@ | |||||||
|  | repos: | ||||||
|  |   - repo: https://github.com/psf/black | ||||||
|  |     rev: 23.9.1 | ||||||
|  |     hooks: | ||||||
|  |       - id: black | ||||||
|  |         name: black | ||||||
|  |         language_version: python3.10 | ||||||
|  |         files: "^minyma/|^setup.py|^tests/minyma/" | ||||||
|  |   - repo: https://github.com/pycqa/flake8 | ||||||
|  |     rev: 6.1.0 | ||||||
|  |     hooks: | ||||||
|  |       - id: flake8 | ||||||
|  |         name: flake8 | ||||||
|  |         args: ["--config=.flake8"] | ||||||
|  |         files: "^minyma/|^setup.py|^tests/minyma/" | ||||||
|  |   - repo: https://github.com/pycqa/isort | ||||||
|  |     rev: 5.12.0 | ||||||
|  |     hooks: | ||||||
|  |       - id: isort | ||||||
|  |         name: isort | ||||||
|  |         args: ["--profile", "black", "--filter-files"] | ||||||
|  |         files: "^minyma/|^setup.py|^tests/minyma/" | ||||||
							
								
								
									
										26
									
								
								Dockerfile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								Dockerfile
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,26 @@ | |||||||
|  | # Build Container | ||||||
|  | FROM python:3.11-slim | ||||||
|  | 
 | ||||||
|  | # Install App | ||||||
|  | WORKDIR /app | ||||||
|  | COPY . /app | ||||||
|  | 
 | ||||||
|  | # Install Curl | ||||||
|  | RUN apt-get update -y | ||||||
|  | RUN apt-get install curl -y | ||||||
|  | 
 | ||||||
|  | # Install Chroma Dependencies | ||||||
|  | RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/ | ||||||
|  | RUN curl https://chroma-onnx-models.s3.amazonaws.com/all-MiniLM-L6-v2/onnx.tar.gz --output /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx.tar.gz | ||||||
|  | 
 | ||||||
|  | # Install App & Gunicorn | ||||||
|  | RUN pip install . | ||||||
|  | RUN pip3 install gunicorn | ||||||
|  | 
 | ||||||
|  | # Cleanup | ||||||
|  | RUN rm -rf /app | ||||||
|  | 
 | ||||||
|  | # Start Application | ||||||
|  | ENTRYPOINT ["gunicorn"] | ||||||
|  | EXPOSE 5000 | ||||||
|  | CMD ["minyma:create_app()", "--bind", "0.0.0.0:5000", "--threads=4", "--access-logfile", "-"] | ||||||
							
								
								
									
										339
									
								
								LICENSE
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										339
									
								
								LICENSE
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,339 @@ | |||||||
|  | GNU GENERAL PUBLIC LICENSE | ||||||
|  |                        Version 2, June 1991 | ||||||
|  | 
 | ||||||
|  |  Copyright (C) 1989, 1991 Free Software Foundation, Inc., | ||||||
|  |  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||||
|  |  Everyone is permitted to copy and distribute verbatim copies | ||||||
|  |  of this license document, but changing it is not allowed. | ||||||
|  | 
 | ||||||
|  |                             Preamble | ||||||
|  | 
 | ||||||
|  |   The licenses for most software are designed to take away your | ||||||
|  | freedom to share and change it.  By contrast, the GNU General Public | ||||||
|  | License is intended to guarantee your freedom to share and change free | ||||||
|  | software--to make sure the software is free for all its users.  This | ||||||
|  | General Public License applies to most of the Free Software | ||||||
|  | Foundation's software and to any other program whose authors commit to | ||||||
|  | using it.  (Some other Free Software Foundation software is covered by | ||||||
|  | the GNU Lesser General Public License instead.)  You can apply it to | ||||||
|  | your programs, too. | ||||||
|  | 
 | ||||||
|  |   When we speak of free software, we are referring to freedom, not | ||||||
|  | price.  Our General Public Licenses are designed to make sure that you | ||||||
|  | have the freedom to distribute copies of free software (and charge for | ||||||
|  | this service if you wish), that you receive source code or can get it | ||||||
|  | if you want it, that you can change the software or use pieces of it | ||||||
|  | in new free programs; and that you know you can do these things. | ||||||
|  | 
 | ||||||
|  |   To protect your rights, we need to make restrictions that forbid | ||||||
|  | anyone to deny you these rights or to ask you to surrender the rights. | ||||||
|  | These restrictions translate to certain responsibilities for you if you | ||||||
|  | distribute copies of the software, or if you modify it. | ||||||
|  | 
 | ||||||
|  |   For example, if you distribute copies of such a program, whether | ||||||
|  | gratis or for a fee, you must give the recipients all the rights that | ||||||
|  | you have.  You must make sure that they, too, receive or can get the | ||||||
|  | source code.  And you must show them these terms so they know their | ||||||
|  | rights. | ||||||
|  | 
 | ||||||
|  |   We protect your rights with two steps: (1) copyright the software, and | ||||||
|  | (2) offer you this license which gives you legal permission to copy, | ||||||
|  | distribute and/or modify the software. | ||||||
|  | 
 | ||||||
|  |   Also, for each author's protection and ours, we want to make certain | ||||||
|  | that everyone understands that there is no warranty for this free | ||||||
|  | software.  If the software is modified by someone else and passed on, we | ||||||
|  | want its recipients to know that what they have is not the original, so | ||||||
|  | that any problems introduced by others will not reflect on the original | ||||||
|  | authors' reputations. | ||||||
|  | 
 | ||||||
|  |   Finally, any free program is threatened constantly by software | ||||||
|  | patents.  We wish to avoid the danger that redistributors of a free | ||||||
|  | program will individually obtain patent licenses, in effect making the | ||||||
|  | program proprietary.  To prevent this, we have made it clear that any | ||||||
|  | patent must be licensed for everyone's free use or not licensed at all. | ||||||
|  | 
 | ||||||
|  |   The precise terms and conditions for copying, distribution and | ||||||
|  | modification follow. | ||||||
|  | 
 | ||||||
|  |                     GNU GENERAL PUBLIC LICENSE | ||||||
|  |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION | ||||||
|  | 
 | ||||||
|  |   0. This License applies to any program or other work which contains | ||||||
|  | a notice placed by the copyright holder saying it may be distributed | ||||||
|  | under the terms of this General Public License.  The "Program", below, | ||||||
|  | refers to any such program or work, and a "work based on the Program" | ||||||
|  | means either the Program or any derivative work under copyright law: | ||||||
|  | that is to say, a work containing the Program or a portion of it, | ||||||
|  | either verbatim or with modifications and/or translated into another | ||||||
|  | language.  (Hereinafter, translation is included without limitation in | ||||||
|  | the term "modification".)  Each licensee is addressed as "you". | ||||||
|  | 
 | ||||||
|  | Activities other than copying, distribution and modification are not | ||||||
|  | covered by this License; they are outside its scope.  The act of | ||||||
|  | running the Program is not restricted, and the output from the Program | ||||||
|  | is covered only if its contents constitute a work based on the | ||||||
|  | Program (independent of having been made by running the Program). | ||||||
|  | Whether that is true depends on what the Program does. | ||||||
|  | 
 | ||||||
|  |   1. You may copy and distribute verbatim copies of the Program's | ||||||
|  | source code as you receive it, in any medium, provided that you | ||||||
|  | conspicuously and appropriately publish on each copy an appropriate | ||||||
|  | copyright notice and disclaimer of warranty; keep intact all the | ||||||
|  | notices that refer to this License and to the absence of any warranty; | ||||||
|  | and give any other recipients of the Program a copy of this License | ||||||
|  | along with the Program. | ||||||
|  | 
 | ||||||
|  | You may charge a fee for the physical act of transferring a copy, and | ||||||
|  | you may at your option offer warranty protection in exchange for a fee. | ||||||
|  | 
 | ||||||
|  |   2. You may modify your copy or copies of the Program or any portion | ||||||
|  | of it, thus forming a work based on the Program, and copy and | ||||||
|  | distribute such modifications or work under the terms of Section 1 | ||||||
|  | above, provided that you also meet all of these conditions: | ||||||
|  | 
 | ||||||
|  |     a) You must cause the modified files to carry prominent notices | ||||||
|  |     stating that you changed the files and the date of any change. | ||||||
|  | 
 | ||||||
|  |     b) You must cause any work that you distribute or publish, that in | ||||||
|  |     whole or in part contains or is derived from the Program or any | ||||||
|  |     part thereof, to be licensed as a whole at no charge to all third | ||||||
|  |     parties under the terms of this License. | ||||||
|  | 
 | ||||||
|  |     c) If the modified program normally reads commands interactively | ||||||
|  |     when run, you must cause it, when started running for such | ||||||
|  |     interactive use in the most ordinary way, to print or display an | ||||||
|  |     announcement including an appropriate copyright notice and a | ||||||
|  |     notice that there is no warranty (or else, saying that you provide | ||||||
|  |     a warranty) and that users may redistribute the program under | ||||||
|  |     these conditions, and telling the user how to view a copy of this | ||||||
|  |     License.  (Exception: if the Program itself is interactive but | ||||||
|  |     does not normally print such an announcement, your work based on | ||||||
|  |     the Program is not required to print an announcement.) | ||||||
|  | 
 | ||||||
|  | These requirements apply to the modified work as a whole.  If | ||||||
|  | identifiable sections of that work are not derived from the Program, | ||||||
|  | and can be reasonably considered independent and separate works in | ||||||
|  | themselves, then this License, and its terms, do not apply to those | ||||||
|  | sections when you distribute them as separate works.  But when you | ||||||
|  | distribute the same sections as part of a whole which is a work based | ||||||
|  | on the Program, the distribution of the whole must be on the terms of | ||||||
|  | this License, whose permissions for other licensees extend to the | ||||||
|  | entire whole, and thus to each and every part regardless of who wrote it. | ||||||
|  | 
 | ||||||
|  | Thus, it is not the intent of this section to claim rights or contest | ||||||
|  | your rights to work written entirely by you; rather, the intent is to | ||||||
|  | exercise the right to control the distribution of derivative or | ||||||
|  | collective works based on the Program. | ||||||
|  | 
 | ||||||
|  | In addition, mere aggregation of another work not based on the Program | ||||||
|  | with the Program (or with a work based on the Program) on a volume of | ||||||
|  | a storage or distribution medium does not bring the other work under | ||||||
|  | the scope of this License. | ||||||
|  | 
 | ||||||
|  |   3. You may copy and distribute the Program (or a work based on it, | ||||||
|  | under Section 2) in object code or executable form under the terms of | ||||||
|  | Sections 1 and 2 above provided that you also do one of the following: | ||||||
|  | 
 | ||||||
|  |     a) Accompany it with the complete corresponding machine-readable | ||||||
|  |     source code, which must be distributed under the terms of Sections | ||||||
|  |     1 and 2 above on a medium customarily used for software interchange; or, | ||||||
|  | 
 | ||||||
|  |     b) Accompany it with a written offer, valid for at least three | ||||||
|  |     years, to give any third party, for a charge no more than your | ||||||
|  |     cost of physically performing source distribution, a complete | ||||||
|  |     machine-readable copy of the corresponding source code, to be | ||||||
|  |     distributed under the terms of Sections 1 and 2 above on a medium | ||||||
|  |     customarily used for software interchange; or, | ||||||
|  | 
 | ||||||
|  |     c) Accompany it with the information you received as to the offer | ||||||
|  |     to distribute corresponding source code.  (This alternative is | ||||||
|  |     allowed only for noncommercial distribution and only if you | ||||||
|  |     received the program in object code or executable form with such | ||||||
|  |     an offer, in accord with Subsection b above.) | ||||||
|  | 
 | ||||||
|  | The source code for a work means the preferred form of the work for | ||||||
|  | making modifications to it.  For an executable work, complete source | ||||||
|  | code means all the source code for all modules it contains, plus any | ||||||
|  | associated interface definition files, plus the scripts used to | ||||||
|  | control compilation and installation of the executable.  However, as a | ||||||
|  | special exception, the source code distributed need not include | ||||||
|  | anything that is normally distributed (in either source or binary | ||||||
|  | form) with the major components (compiler, kernel, and so on) of the | ||||||
|  | operating system on which the executable runs, unless that component | ||||||
|  | itself accompanies the executable. | ||||||
|  | 
 | ||||||
|  | If distribution of executable or object code is made by offering | ||||||
|  | access to copy from a designated place, then offering equivalent | ||||||
|  | access to copy the source code from the same place counts as | ||||||
|  | distribution of the source code, even though third parties are not | ||||||
|  | compelled to copy the source along with the object code. | ||||||
|  | 
 | ||||||
|  |   4. You may not copy, modify, sublicense, or distribute the Program | ||||||
|  | except as expressly provided under this License.  Any attempt | ||||||
|  | otherwise to copy, modify, sublicense or distribute the Program is | ||||||
|  | void, and will automatically terminate your rights under this License. | ||||||
|  | However, parties who have received copies, or rights, from you under | ||||||
|  | this License will not have their licenses terminated so long as such | ||||||
|  | parties remain in full compliance. | ||||||
|  | 
 | ||||||
|  |   5. You are not required to accept this License, since you have not | ||||||
|  | signed it.  However, nothing else grants you permission to modify or | ||||||
|  | distribute the Program or its derivative works.  These actions are | ||||||
|  | prohibited by law if you do not accept this License.  Therefore, by | ||||||
|  | modifying or distributing the Program (or any work based on the | ||||||
|  | Program), you indicate your acceptance of this License to do so, and | ||||||
|  | all its terms and conditions for copying, distributing or modifying | ||||||
|  | the Program or works based on it. | ||||||
|  | 
 | ||||||
|  |   6. Each time you redistribute the Program (or any work based on the | ||||||
|  | Program), the recipient automatically receives a license from the | ||||||
|  | original licensor to copy, distribute or modify the Program subject to | ||||||
|  | these terms and conditions.  You may not impose any further | ||||||
|  | restrictions on the recipients' exercise of the rights granted herein. | ||||||
|  | You are not responsible for enforcing compliance by third parties to | ||||||
|  | this License. | ||||||
|  | 
 | ||||||
|  |   7. If, as a consequence of a court judgment or allegation of patent | ||||||
|  | infringement or for any other reason (not limited to patent issues), | ||||||
|  | conditions are imposed on you (whether by court order, agreement or | ||||||
|  | otherwise) that contradict the conditions of this License, they do not | ||||||
|  | excuse you from the conditions of this License.  If you cannot | ||||||
|  | distribute so as to satisfy simultaneously your obligations under this | ||||||
|  | License and any other pertinent obligations, then as a consequence you | ||||||
|  | may not distribute the Program at all.  For example, if a patent | ||||||
|  | license would not permit royalty-free redistribution of the Program by | ||||||
|  | all those who receive copies directly or indirectly through you, then | ||||||
|  | the only way you could satisfy both it and this License would be to | ||||||
|  | refrain entirely from distribution of the Program. | ||||||
|  | 
 | ||||||
|  | If any portion of this section is held invalid or unenforceable under | ||||||
|  | any particular circumstance, the balance of the section is intended to | ||||||
|  | apply and the section as a whole is intended to apply in other | ||||||
|  | circumstances. | ||||||
|  | 
 | ||||||
|  | It is not the purpose of this section to induce you to infringe any | ||||||
|  | patents or other property right claims or to contest validity of any | ||||||
|  | such claims; this section has the sole purpose of protecting the | ||||||
|  | integrity of the free software distribution system, which is | ||||||
|  | implemented by public license practices.  Many people have made | ||||||
|  | generous contributions to the wide range of software distributed | ||||||
|  | through that system in reliance on consistent application of that | ||||||
|  | system; it is up to the author/donor to decide if he or she is willing | ||||||
|  | to distribute software through any other system and a licensee cannot | ||||||
|  | impose that choice. | ||||||
|  | 
 | ||||||
|  | This section is intended to make thoroughly clear what is believed to | ||||||
|  | be a consequence of the rest of this License. | ||||||
|  | 
 | ||||||
|  |   8. If the distribution and/or use of the Program is restricted in | ||||||
|  | certain countries either by patents or by copyrighted interfaces, the | ||||||
|  | original copyright holder who places the Program under this License | ||||||
|  | may add an explicit geographical distribution limitation excluding | ||||||
|  | those countries, so that distribution is permitted only in or among | ||||||
|  | countries not thus excluded.  In such case, this License incorporates | ||||||
|  | the limitation as if written in the body of this License. | ||||||
|  | 
 | ||||||
|  |   9. The Free Software Foundation may publish revised and/or new versions | ||||||
|  | of the General Public License from time to time.  Such new versions will | ||||||
|  | be similar in spirit to the present version, but may differ in detail to | ||||||
|  | address new problems or concerns. | ||||||
|  | 
 | ||||||
|  | Each version is given a distinguishing version number.  If the Program | ||||||
|  | specifies a version number of this License which applies to it and "any | ||||||
|  | later version", you have the option of following the terms and conditions | ||||||
|  | either of that version or of any later version published by the Free | ||||||
|  | Software Foundation.  If the Program does not specify a version number of | ||||||
|  | this License, you may choose any version ever published by the Free Software | ||||||
|  | Foundation. | ||||||
|  | 
 | ||||||
|  |   10. If you wish to incorporate parts of the Program into other free | ||||||
|  | programs whose distribution conditions are different, write to the author | ||||||
|  | to ask for permission.  For software which is copyrighted by the Free | ||||||
|  | Software Foundation, write to the Free Software Foundation; we sometimes | ||||||
|  | make exceptions for this.  Our decision will be guided by the two goals | ||||||
|  | of preserving the free status of all derivatives of our free software and | ||||||
|  | of promoting the sharing and reuse of software generally. | ||||||
|  | 
 | ||||||
|  |                             NO WARRANTY | ||||||
|  | 
 | ||||||
|  |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY | ||||||
|  | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN | ||||||
|  | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES | ||||||
|  | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED | ||||||
|  | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | ||||||
|  | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS | ||||||
|  | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE | ||||||
|  | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, | ||||||
|  | REPAIR OR CORRECTION. | ||||||
|  | 
 | ||||||
|  |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING | ||||||
|  | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR | ||||||
|  | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, | ||||||
|  | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING | ||||||
|  | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED | ||||||
|  | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY | ||||||
|  | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER | ||||||
|  | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE | ||||||
|  | POSSIBILITY OF SUCH DAMAGES. | ||||||
|  | 
 | ||||||
|  |                      END OF TERMS AND CONDITIONS | ||||||
|  | 
 | ||||||
|  |             How to Apply These Terms to Your New Programs | ||||||
|  | 
 | ||||||
|  |   If you develop a new program, and you want it to be of the greatest | ||||||
|  | possible use to the public, the best way to achieve this is to make it | ||||||
|  | free software which everyone can redistribute and change under these terms. | ||||||
|  | 
 | ||||||
|  |   To do so, attach the following notices to the program.  It is safest | ||||||
|  | to attach them to the start of each source file to most effectively | ||||||
|  | convey the exclusion of warranty; and each file should have at least | ||||||
|  | the "copyright" line and a pointer to where the full notice is found. | ||||||
|  | 
 | ||||||
|  |     {{description}} | ||||||
|  |     Copyright (C) {{year}}  {{fullname}} | ||||||
|  | 
 | ||||||
|  |     This program is free software; you can redistribute it and/or modify | ||||||
|  |     it under the terms of the GNU General Public License as published by | ||||||
|  |     the Free Software Foundation; either version 2 of the License, or | ||||||
|  |     (at your option) any later version. | ||||||
|  | 
 | ||||||
|  |     This program is distributed in the hope that it will be useful, | ||||||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |     GNU General Public License for more details. | ||||||
|  | 
 | ||||||
|  |     You should have received a copy of the GNU General Public License along | ||||||
|  |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  | 
 | ||||||
|  | Also add information on how to contact you by electronic and paper mail. | ||||||
|  | 
 | ||||||
|  | If the program is interactive, make it output a short notice like this | ||||||
|  | when it starts in an interactive mode: | ||||||
|  | 
 | ||||||
|  |     Gnomovision version 69, Copyright (C) year name of author | ||||||
|  |     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. | ||||||
|  |     This is free software, and you are welcome to redistribute it | ||||||
|  |     under certain conditions; type `show c' for details. | ||||||
|  | 
 | ||||||
|  | The hypothetical commands `show w' and `show c' should show the appropriate | ||||||
|  | parts of the General Public License.  Of course, the commands you use may | ||||||
|  | be called something other than `show w' and `show c'; they could even be | ||||||
|  | mouse-clicks or menu items--whatever suits your program. | ||||||
|  | 
 | ||||||
|  | You should also get your employer (if you work as a programmer) or your | ||||||
|  | school, if any, to sign a "copyright disclaimer" for the program, if | ||||||
|  | necessary.  Here is a sample; alter the names: | ||||||
|  | 
 | ||||||
|  |   Yoyodyne, Inc., hereby disclaims all copyright interest in the program | ||||||
|  |   `Gnomovision' (which makes passes at compilers) written by James Hacker. | ||||||
|  | 
 | ||||||
|  |   {signature of Ty Coon}, 1 April 1989 | ||||||
|  |   Ty Coon, President of Vice | ||||||
|  | 
 | ||||||
|  | This General Public License does not permit incorporating your program into | ||||||
|  | proprietary programs.  If your program is a subroutine library, you may | ||||||
|  | consider it more useful to permit linking proprietary applications with the | ||||||
|  | library.  If this is what you want to do, use the GNU Lesser General | ||||||
|  | Public License instead of this License. | ||||||
							
								
								
									
										2
									
								
								MANIFEST.in
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								MANIFEST.in
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,2 @@ | |||||||
|  | recursive-include minyma/api *.py | ||||||
|  | recursive-include minyma/templates * | ||||||
							
								
								
									
										76
									
								
								README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										76
									
								
								README.md
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,76 @@ | |||||||
|  | # Usage | ||||||
|  | 
 | ||||||
|  | ## Running Server | ||||||
|  | 
 | ||||||
|  | ```bash | ||||||
|  | # Locally | ||||||
|  | minyma server run | ||||||
|  | 
 | ||||||
|  | # Docker Quick Start | ||||||
|  | make docker_build_local | ||||||
|  | docker run \ | ||||||
|  |     -p 5000:5000 \ | ||||||
|  |     -e OPENAI_API_KEY=`cat openai_key` \ | ||||||
|  |     -e DATA_PATH=/data \ | ||||||
|  |     -v ./data:/data \ | ||||||
|  |     minyma:latest | ||||||
|  | ``` | ||||||
|  | 
 | ||||||
|  | The server will now be accessible at `http://localhost:5000` | ||||||
|  | 
 | ||||||
|  | ## Normalizing & Loading Data | ||||||
|  | 
 | ||||||
|  | Minyma is designed to be extensible. You can add normalizers and vector db's | ||||||
|  | using the appropriate interfaces defined in `./minyma/normalizer.py` and | ||||||
|  | `./minyma/vdb.py`. At the moment the only supported database is `chroma` | ||||||
|  | and the only supported normalizer is the `pubmed` normalizer. | ||||||
|  | 
 | ||||||
|  | To normalize data, you can use Minyma's `normalize` CLI command: | ||||||
|  | 
 | ||||||
|  | ```bash | ||||||
|  | minyma normalize --filename ./pubmed_manuscripts.jsonl --normalizer pubmed --database chroma --datapath ./chroma | ||||||
|  | ``` | ||||||
|  | 
 | ||||||
|  | The above example does the following: | ||||||
|  | 
 | ||||||
|  | - Uses the `pubmed` normalizer | ||||||
|  | - Normalizes the `./pubmed_manuscripts.jsonl` raw dataset [0] | ||||||
|  | - Loads the output into a `chroma` database and persists the data to the `./chroma` directory | ||||||
|  | 
 | ||||||
|  | **NOTE:** The above dataset took about an hour to normalize on my MPB M2 Max | ||||||
|  | 
 | ||||||
|  | [0] https://huggingface.co/datasets/TaylorAI/pubmed_author_manuscripts/tree/main | ||||||
|  | 
 | ||||||
|  | # Development | ||||||
|  | 
 | ||||||
|  | ```bash | ||||||
|  | # Initiate | ||||||
|  | python3 -m venv venv | ||||||
|  | . ./venv/bin/activate | ||||||
|  | 
 | ||||||
|  | # Local Development | ||||||
|  | pip install -e . | ||||||
|  | 
 | ||||||
|  | # Creds | ||||||
|  | export OPENAI_API_KEY=`cat openai_key` | ||||||
|  | ``` | ||||||
|  | 
 | ||||||
|  | # Datasets | ||||||
|  | 
 | ||||||
|  | https://huggingface.co/datasets/TaylorAI/pubmed_author_manuscripts/tree/main | ||||||
|  | 
 | ||||||
|  | # Notes | ||||||
|  | 
 | ||||||
|  | - https://docs.pinecone.io/docs/openai | ||||||
|  | - https://docs.pinecone.io/docs/langchain | ||||||
|  | - https://docs.pinecone.io/docs/langchain#creating-embeddings | ||||||
|  | - https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb | ||||||
|  | - https://medium.com/@abhishekranjandev/building-a-speech-recognition-app-with-deepspeech-word2vec-and-pinecone-1e5907d103e2 | ||||||
|  | - https://medium.com/@mishra.thedeepak/doc2vec-simple-implementation-example-df2afbbfbad5 | ||||||
|  | - https://cookbook.openai.com/examples/semantic_text_search_using_embeddings | ||||||
|  | 
 | ||||||
|  | TODO: | ||||||
|  | 
 | ||||||
|  | - Build this with Word2Vec / Doc2Vec: https://docs.pinecone.io/docs/openai | ||||||
|  | - https://radimrehurek.com/gensim/auto_examples/tutorials/run_doc2vec_lee.html#sphx-glr-auto-examples-tutorials-run-doc2vec-lee-py | ||||||
|  | - https://webcache.googleusercontent.com/search?q=cache:https://medium.com/@rubentak/unleashing-the-power-of-intelligent-chatbots-with-gpt-4-and-vector-databases-a-step-by-step-8027e2ce9e78 | ||||||
							
								
								
									
										73
									
								
								minyma/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										73
									
								
								minyma/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,73 @@ | |||||||
|  | import click | ||||||
|  | import signal | ||||||
|  | import sys | ||||||
|  | from importlib.metadata import version | ||||||
|  | from minyma.config import Config | ||||||
|  | from minyma.oai import OpenAIConnector | ||||||
|  | from minyma.vdb import ChromaDB | ||||||
|  | from flask import Flask | ||||||
|  | from flask.cli import FlaskGroup | ||||||
|  | 
 | ||||||
|  | __version__ = version("minyma") | ||||||
|  | 
 | ||||||
|  | def signal_handler(sig, frame): | ||||||
|  |     sys.exit(0) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def create_app(): | ||||||
|  |     global oai, cdb | ||||||
|  | 
 | ||||||
|  |     import minyma.api.common as api_common | ||||||
|  |     import minyma.api.v1 as api_v1 | ||||||
|  | 
 | ||||||
|  |     app = Flask(__name__) | ||||||
|  |     cdb = ChromaDB(Config.DATA_PATH) | ||||||
|  |     oai = OpenAIConnector(Config.OPENAI_API_KEY, cdb) | ||||||
|  | 
 | ||||||
|  |     app.register_blueprint(api_common.bp) | ||||||
|  |     app.register_blueprint(api_v1.bp) | ||||||
|  | 
 | ||||||
|  |     return app | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | @click.group() | ||||||
|  | def cli(): | ||||||
|  |     """Minyma CLI""" | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | @cli.group(cls=FlaskGroup, create_app=create_app) | ||||||
|  | def server(): | ||||||
|  |     """Minyma flask server""" | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | @cli.command() | ||||||
|  | @click.option('--filename', type=click.File('r'), required=True) | ||||||
|  | @click.option('--normalizer', help="pubmed", required=True) | ||||||
|  | @click.option('--database', help="chroma", required=True) | ||||||
|  | @click.option('--datapath', type=click.Path(), help="database datapath", required=False) | ||||||
|  | def normalize(filename, normalizer, database, datapath): | ||||||
|  |     """Minyma data normalizer & loader""" | ||||||
|  | 
 | ||||||
|  |     database = database.lower() | ||||||
|  |     normalizer = normalizer.lower() | ||||||
|  | 
 | ||||||
|  |     # Validate Database | ||||||
|  |     if database == "chroma": | ||||||
|  |         if datapath is None: | ||||||
|  |             return print("INVALID DATAPATH") | ||||||
|  |         vdb = ChromaDB(datapath) | ||||||
|  |     else: | ||||||
|  |         return print("INVALID DATABASE:", database) | ||||||
|  | 
 | ||||||
|  |     # Select Normalizer | ||||||
|  |     if normalizer == "pubmed": | ||||||
|  |         from minyma.normalizer import PubMedNormalizer | ||||||
|  |         norm = PubMedNormalizer(filename) | ||||||
|  |     else: | ||||||
|  |         return print("INVALID NORMALIZER:", normalizer) | ||||||
|  | 
 | ||||||
|  |     # Process Data | ||||||
|  |     vdb.load_documents(norm) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | signal.signal(signal.SIGINT, signal_handler) | ||||||
							
								
								
									
										0
									
								
								minyma/api/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								minyma/api/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										7
									
								
								minyma/api/common.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								minyma/api/common.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,7 @@ | |||||||
|  | from flask import make_response, render_template, send_from_directory | ||||||
|  | from flask import Blueprint | ||||||
|  | bp = Blueprint("common", __name__) | ||||||
|  | 
 | ||||||
|  | @bp.route("/", methods=["GET"]) | ||||||
|  | def main_entry(): | ||||||
|  |     return make_response(render_template("index.html")) | ||||||
							
								
								
									
										38
									
								
								minyma/api/v1.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								minyma/api/v1.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,38 @@ | |||||||
|  | import minyma | ||||||
|  | 
 | ||||||
|  | from flask import Blueprint, request | ||||||
|  | bp = Blueprint("v1", __name__, url_prefix="/api/v1") | ||||||
|  | 
 | ||||||
|  | """ | ||||||
|  | Return OpenAI LLM final response with vector db embedding | ||||||
|  | context | ||||||
|  | """ | ||||||
|  | @bp.route("/query", methods=["POST"]) | ||||||
|  | def get_response(): | ||||||
|  |     data = request.get_json() | ||||||
|  |     if not data: | ||||||
|  |         return {"error": "Missing Message"} | ||||||
|  | 
 | ||||||
|  |     message = str(data.get("message")) | ||||||
|  |     if message == "": | ||||||
|  |         return {"error": "Empty Message"} | ||||||
|  | 
 | ||||||
|  |     oai_response = minyma.oai.query(message) | ||||||
|  |     return oai_response | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | """ | ||||||
|  | Return the raw vector db related response | ||||||
|  | """ | ||||||
|  | @bp.route("/related", methods=["POST"]) | ||||||
|  | def get_related(): | ||||||
|  |     data = request.get_json() | ||||||
|  |     if not data: | ||||||
|  |         return {"error": "Missing Message"} | ||||||
|  | 
 | ||||||
|  |     message = str(data.get("message")) | ||||||
|  |     if message == "": | ||||||
|  |         return {"error": "Empty Message"} | ||||||
|  | 
 | ||||||
|  |     related_documents = minyma.cdb.get_related(message) | ||||||
|  |     return related_documents | ||||||
							
								
								
									
										22
									
								
								minyma/config.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								minyma/config.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,22 @@ | |||||||
|  | import os | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def get_env(key, default=None, required=False) -> str: | ||||||
|  |     """Wrapper for gathering env vars.""" | ||||||
|  |     if required: | ||||||
|  |         assert key in os.environ, "Missing Environment Variable: %s" % key | ||||||
|  |     return str(os.environ.get(key, default)) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class Config: | ||||||
|  |     """Wrap application configurations | ||||||
|  | 
 | ||||||
|  |     Attributes | ||||||
|  |     ---------- | ||||||
|  |     DATA_PATH : str | ||||||
|  |         The path where to store any resources (default: ./) | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     DATA_PATH: str = get_env("DATA_PATH", default="./data") | ||||||
|  |     CHROMA_DATA_PATH: str = get_env("CHROMA_DATA_PATH", default="./data/chroma") | ||||||
|  |     OPENAI_API_KEY: str = get_env("OPENAI_API_KEY", required=True) | ||||||
							
								
								
									
										46
									
								
								minyma/normalizer.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								minyma/normalizer.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,46 @@ | |||||||
|  | from io import TextIOWrapper | ||||||
|  | import json | ||||||
|  | 
 | ||||||
|  | class DataNormalizer: | ||||||
|  |     def __init__(self, file: TextIOWrapper): | ||||||
|  |         pass | ||||||
|  | 
 | ||||||
|  |     def __iter__(self): | ||||||
|  |         pass | ||||||
|  | 
 | ||||||
|  | # Iterator class that takes a file and iterates over each line. | ||||||
|  | # Data is normalized inside the iterator | ||||||
|  | class PubMedNormalizer(DataNormalizer): | ||||||
|  |     def __init__(self, file: TextIOWrapper): | ||||||
|  |          self.file = file | ||||||
|  | 
 | ||||||
|  |     def __iter__(self): | ||||||
|  |         count = 0 | ||||||
|  | 
 | ||||||
|  |         # Iterate over each line in self.file, normalize | ||||||
|  |         # increment counter, and yield the normalized data. | ||||||
|  |         while True: | ||||||
|  |             line = self.file.readline() | ||||||
|  | 
 | ||||||
|  |             # EOF | ||||||
|  |             if not line: | ||||||
|  |                 break | ||||||
|  | 
 | ||||||
|  |             # Load JSON | ||||||
|  |             l = json.loads(line, strict=False) | ||||||
|  |             norm_text = l.get("text").lower() | ||||||
|  | 
 | ||||||
|  |             # Using the second occurance of "text mining" as a break | ||||||
|  |             # point. We only capture what follows. Initially tried | ||||||
|  |             # using regular expressions, but this is significantly | ||||||
|  |             # faster. | ||||||
|  |             split_data = norm_text.split("text mining") | ||||||
|  |             # if len(split_data) < 3: | ||||||
|  |             #      print("NOT FOUND STG1", count) | ||||||
|  |             norm_text = "text mining".join(split_data[2:]) | ||||||
|  |             norm_text = "\n".join(norm_text.split("\n")[1:]) | ||||||
|  | 
 | ||||||
|  |             count += 1 | ||||||
|  | 
 | ||||||
|  |             # ID = Line Number | ||||||
|  |             yield { "doc": norm_text, "id": str(count - 1) } | ||||||
							
								
								
									
										44
									
								
								minyma/oai.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								minyma/oai.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,44 @@ | |||||||
|  | from typing import Any | ||||||
|  | import openai | ||||||
|  | 
 | ||||||
|  | from minyma.vdb import VectorDB | ||||||
|  | 
 | ||||||
|  | # Stolen LangChain Prompt | ||||||
|  | PROMPT_TEMPLATE = """ | ||||||
|  | Use the following pieces of context to answer the question at the end.  | ||||||
|  | If you don't know the answer, just say that you don't know, don't try to  | ||||||
|  | make up an answer. | ||||||
|  | 
 | ||||||
|  | {context} | ||||||
|  | 
 | ||||||
|  | Question: {question} | ||||||
|  | Helpful Answer: | ||||||
|  | """ | ||||||
|  | 
 | ||||||
|  | class OpenAIConnector: | ||||||
|  |     def __init__(self, api_key: str, vdb: VectorDB): | ||||||
|  |         self.vdb = vdb | ||||||
|  |         self.model = "gpt-3.5-turbo" | ||||||
|  |         openai.api_key = api_key | ||||||
|  | 
 | ||||||
|  |     def query(self, question: str) -> Any: | ||||||
|  |         # Get related documents from vector db | ||||||
|  |         related = self.vdb.get_related(question) | ||||||
|  | 
 | ||||||
|  |         # Validate results | ||||||
|  |         all_docs = related.get("docs", []) | ||||||
|  |         if len(all_docs) == 0: | ||||||
|  |             return { "error": "No Context Found" } | ||||||
|  | 
 | ||||||
|  |         # Join on new line, generate main prompt | ||||||
|  |         context = '\n'.join(all_docs) | ||||||
|  |         prompt = PROMPT_TEMPLATE.format(context = context, question = question) | ||||||
|  | 
 | ||||||
|  |         # Query OpenAI ChatCompletion | ||||||
|  |         response = openai.ChatCompletion.create( | ||||||
|  |           model=self.model, | ||||||
|  |           messages=[{"role": "user", "content": prompt}] | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         # Return Response | ||||||
|  |         return response | ||||||
							
								
								
									
										184
									
								
								minyma/templates/index.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										184
									
								
								minyma/templates/index.html
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,184 @@ | |||||||
|  | <!DOCTYPE html> | ||||||
|  | <html lang="en"> | ||||||
|  |   <head> | ||||||
|  |     <meta charset="utf-8" /> | ||||||
|  |     <title>Minyma - Chat</title> | ||||||
|  |     <script src="https://cdn.tailwindcss.com"></script> | ||||||
|  |   </head> | ||||||
|  |   <body class="bg-slate-900 h-screen p-5 flex flex-col justify-between"> | ||||||
|  |     <header class="w-full"> | ||||||
|  |       <svg | ||||||
|  |         preserveAspectRatio="xMidYMid meet" | ||||||
|  |         color-interpolation-filters="sRGB" | ||||||
|  |         style="margin: auto" | ||||||
|  |         height="80" | ||||||
|  |         width="200" | ||||||
|  |         viewBox="70 90 200 90" | ||||||
|  |       > | ||||||
|  |         <g | ||||||
|  |           fill="#ebb919" | ||||||
|  |           transform="translate(69.05000305175781,91.03400039672852)" | ||||||
|  |         > | ||||||
|  |           <g transform="translate(0,0)"> | ||||||
|  |             <g transform="scale(1)"> | ||||||
|  |               <g> | ||||||
|  |                 <path | ||||||
|  |                   d="M33.96-30.84L33.96-30.84Q36.48-30.84 38.37-29.88 40.26-28.92 41.46-27.24 42.66-25.56 43.26-23.34 43.86-21.12 43.86-18.54L43.86-18.54 43.86 0 36.66 0 36.66-18.54Q36.66-20.64 35.16-22.14L35.16-22.14Q33.72-23.64 31.56-23.64L31.56-23.64Q29.4-23.64 27.96-22.14L27.96-22.14Q26.46-20.64 26.46-18.54L26.46-18.54 26.46 0 19.26 0 19.26-18.54Q19.26-20.64 17.76-22.14L17.76-22.14Q17.04-22.92 16.11-23.28 15.18-23.64 14.16-23.64L14.16-23.64Q11.94-23.64 10.5-22.14L10.5-22.14Q9-20.64 9-18.54L9-18.54 9 0 1.8 0 1.8-30 9-30 9-27.36Q10.74-28.86 12.66-29.85 14.58-30.84 16.56-30.84L16.56-30.84Q19.26-30.84 21-29.76 22.74-28.68 24.12-26.76L24.12-26.76Q25.74-28.5 28.32-29.67 30.9-30.84 33.96-30.84ZM54.96 0L47.76 0 47.76-30 54.96-30 54.96 0ZM47.76-34.8L47.76-42 54.96-42 54.96-34.8 47.76-34.8ZM74.28-30.84L74.28-30.84Q77.22-30.84 79.62-29.73 82.02-28.62 83.73-26.67 85.44-24.72 86.37-22.14 87.3-19.56 87.3-16.62L87.3-16.62 87.3 0 80.1 0 80.1-16.62Q80.1-19.62 78-21.6L78-21.6Q75.96-23.64 73.08-23.64L73.08-23.64Q70.14-23.64 68.1-21.6L68.1-21.6Q66.06-19.56 66.06-16.62L66.06-16.62 66.06 0 58.86 0 58.86-30 66.06-30 66.06-27.72Q67.68-29.1 69.72-29.97 71.76-30.84 74.28-30.84ZM116.94-30L124.86-30 110.94 0 109.08 4.08Q107.4 7.74 104.04 9.9 100.68 12.06 96.6 12.06L96.6 12.06 93.42 12.06 95.22 4.86 96.96 4.86Q98.7 4.86 100.2 3.9 101.7 2.94 102.42 1.32L102.42 1.32 103.02 0 89.1-30 97.02-30 106.98-8.52 116.94-30ZM159.12-30.84L159.12-30.84Q161.64-30.84 163.53-29.88 165.42-28.92 166.62-27.24 167.82-25.56 168.42-23.34 169.02-21.12 169.02-18.54L169.02-18.54 169.02 0 161.82 0 161.82-18.54Q161.82-20.64 160.32-22.14L160.32-22.14Q158.88-23.64 156.72-23.64L156.72-23.64Q154.56-23.64 153.12-22.14L153.12-22.14Q151.62-20.64 151.62-18.54L151.62-18.54 151.62 0 144.42 0 144.42-18.54Q144.42-20.64 142.92-22.14L142.92-22.14Q142.2-22.92 141.27-23.28 140.34-23.64 139.32-23.64L139.32-23.64Q137.1-23.64 135.66-22.14L135.66-22.14Q134.16-20.64 134.16-18.54L134.16-18.54 134.16 0 126.96 0 126.96-30 134.16-30 134.16-27.36Q135.9-28.86 137.82-29.85 139.74-30.84 141.72-30.84L141.72-30.84Q144.42-30.84 146.16-29.76 147.9-28.68 149.28-26.76L149.28-26.76Q150.9-28.5 153.48-29.67 156.06-30.84 159.12-30.84ZM196.5-30.06L203.7-30.06 203.7 0 196.5 0 196.5-15Q196.5-18.6 193.98-21.12L193.98-21.12Q191.46-23.64 187.86-23.64L187.86-23.64Q186.12-23.64 184.53-22.98 182.94-22.32 181.74-21.12L181.74-21.12Q179.22-18.6 179.22-15L179.22-15Q179.22-11.46 181.74-8.94L181.74-8.94Q182.94-7.68 184.53-7.05 186.12-6.42 187.86-6.42L187.86-6.42Q189.66-6.42 191.1-7.02L191.1-7.02 193.68-0.6Q190.92 0.78 187.26 0.78L187.26 0.78Q183.96 0.78 181.17-0.45 178.38-1.68 176.34-3.84 174.3-6 173.16-8.88 172.02-11.76 172.02-15L172.02-15Q172.02-18.3 173.16-21.18 174.3-24.06 176.34-26.22 178.38-28.38 181.17-29.61 183.96-30.84 187.26-30.84L187.26-30.84Q190.2-30.84 192.48-29.94 194.76-29.04 196.5-27.66L196.5-27.66 196.5-30.06Z" | ||||||
|  |                   transform="translate(-1.7999999523162842, 42)" | ||||||
|  |                 ></path> | ||||||
|  |               </g> | ||||||
|  |             </g> | ||||||
|  |           </g> | ||||||
|  |           <g fill="#ebb919" transform="translate(5,60.060001373291016)"> | ||||||
|  |             <rect | ||||||
|  |               x="0" | ||||||
|  |               height="1" | ||||||
|  |               y="3.434999942779541" | ||||||
|  |               width="88.66999673843384" | ||||||
|  |             ></rect> | ||||||
|  |             <rect | ||||||
|  |               height="1" | ||||||
|  |               y="3.434999942779541" | ||||||
|  |               width="88.66999673843384" | ||||||
|  |               x="103.22999715805054" | ||||||
|  |             ></rect> | ||||||
|  |             <g transform="translate(91.66999673843384,0)"> | ||||||
|  |               <g transform="scale(1)"> | ||||||
|  |                 <path | ||||||
|  |                   d="M4.43-3.20L2.06-3.20L2.44-4.40C2.58-4.84 2.72-5.28 2.84-5.72C2.97-6.15 3.10-6.60 3.22-7.06L3.26-7.06C3.39-6.60 3.52-6.15 3.65-5.72C3.78-5.28 3.91-4.84 4.06-4.40ZM4.68-2.40L5.42 0L6.49 0L3.83-7.87L2.70-7.87L0.04 0L1.06 0L1.81-2.40ZM7.61-7.87L7.61 0L8.60 0L8.60-7.87Z" | ||||||
|  |                   transform="translate(-0.036000000000000004, 7.872)" | ||||||
|  |                 ></path> | ||||||
|  |               </g> | ||||||
|  |             </g> | ||||||
|  |           </g> | ||||||
|  |         </g> | ||||||
|  |       </svg> | ||||||
|  |     </header> | ||||||
|  |     <main | ||||||
|  |       class="flex flex-col justify-between w-11/12 mx-auto bg-slate-700 text-gray-300 rounded p-2 gap-4 h-full" | ||||||
|  |     > | ||||||
|  |       <div | ||||||
|  |         id="messages" | ||||||
|  |         class="flex flex-col-reverse gap-2 p-2 h-full overflow-scroll" | ||||||
|  |       ></div> | ||||||
|  |       <div | ||||||
|  |         contenteditable | ||||||
|  |         class="w-full border-2 rounded p-1 border-slate-800 outline-none" | ||||||
|  |       /> | ||||||
|  |     </main> | ||||||
|  |     <script> | ||||||
|  |       const LOADING_SVG = `<svg | ||||||
|  | 	width="24" | ||||||
|  | 	height="24" | ||||||
|  | 	viewBox="0 0 24 24" | ||||||
|  | 	xmlns="http://www.w3.org/2000/svg" | ||||||
|  | 	fill="currentColor" | ||||||
|  |       > | ||||||
|  | 	<style> | ||||||
|  | 	  .spinner_qM83 { | ||||||
|  | 	    animation: spinner_8HQG 1.05s infinite; | ||||||
|  | 	  } | ||||||
|  | 	  .spinner_oXPr { | ||||||
|  | 	    animation-delay: 0.1s; | ||||||
|  | 	  } | ||||||
|  | 	  .spinner_ZTLf { | ||||||
|  | 	    animation-delay: 0.2s; | ||||||
|  | 	  } | ||||||
|  | 	  @keyframes spinner_8HQG { | ||||||
|  | 	    0%, | ||||||
|  | 	    57.14% { | ||||||
|  | 	      animation-timing-function: cubic-bezier(0.33, 0.66, 0.66, 1); | ||||||
|  | 	      transform: translate(0); | ||||||
|  | 	    } | ||||||
|  | 	    28.57% { | ||||||
|  | 	      animation-timing-function: cubic-bezier(0.33, 0, 0.66, 0.33); | ||||||
|  | 	      transform: translateY(-6px); | ||||||
|  | 	    } | ||||||
|  | 	    100% { | ||||||
|  | 	      transform: translate(0); | ||||||
|  | 	    } | ||||||
|  | 	  } | ||||||
|  | 	</style> | ||||||
|  | 	<circle class="spinner_qM83" cx="4" cy="12" r="3"></circle> | ||||||
|  | 	<circle class="spinner_qM83 spinner_oXPr" cx="12" cy="12" r="3"></circle> | ||||||
|  | 	<circle class="spinner_qM83 spinner_ZTLf" cx="20" cy="12" r="3"></circle> | ||||||
|  |       </svg>`; | ||||||
|  | 
 | ||||||
|  |       /** | ||||||
|  |        * Wrapper API Call | ||||||
|  |        **/ | ||||||
|  |       function apiCall(data) { | ||||||
|  |         return fetch(data.url, { | ||||||
|  |           method: data.method || "GET", | ||||||
|  |           headers: { | ||||||
|  |             "Content-Type": "application/json", | ||||||
|  |           }, | ||||||
|  |           body: JSON.stringify(data.data || {}), | ||||||
|  |         }).then((resp) => resp.json()); | ||||||
|  |       } | ||||||
|  | 
 | ||||||
|  |       function appendMessageElement(name, content) { | ||||||
|  |         // Wrapping Element | ||||||
|  |         let wrapEl = document.createElement("div"); | ||||||
|  |         wrapEl.innerHTML = `<div class="flex"> | ||||||
|  | 	   <span class="font-bold w-24 grow-0 shrink-0"></span> | ||||||
|  | 	   <span class="whitespace-break-spaces w-full"></span> | ||||||
|  | 	 </div>`; | ||||||
|  | 
 | ||||||
|  |         // Get Elements | ||||||
|  |         let nameEl = wrapEl.querySelector("span"); | ||||||
|  |         let contentEl = nameEl.nextElementSibling; | ||||||
|  | 
 | ||||||
|  |         // Prevent XSS | ||||||
|  |         nameEl.innerText = name + ":"; | ||||||
|  |         contentEl.innerText = content; | ||||||
|  | 
 | ||||||
|  |         // Add to DOM | ||||||
|  |         let newEl = wrapEl.querySelector("div"); | ||||||
|  |         document.querySelector("#messages").prepend(newEl); | ||||||
|  | 
 | ||||||
|  |         // Return References (Used in sendMessage) | ||||||
|  |         return { name: nameEl, content: contentEl }; | ||||||
|  |       } | ||||||
|  | 
 | ||||||
|  |       function sendMessage(message) { | ||||||
|  |         // Set Loading | ||||||
|  |         let { name, content } = appendMessageElement("Assistant", ""); | ||||||
|  |         content.innerHTML = LOADING_SVG; | ||||||
|  | 
 | ||||||
|  |         // Request API | ||||||
|  |         apiCall({ | ||||||
|  |           url: "./api/v1/query", | ||||||
|  |           method: "POST", | ||||||
|  |           data: { message }, | ||||||
|  |         }) | ||||||
|  |           .then((data) => { | ||||||
|  |             console.log("SUCCESS:", data); | ||||||
|  |             content.innerText = data.choices[0].message.content; | ||||||
|  |           }) | ||||||
|  |           .catch((e) => { | ||||||
|  |             console.log("ERROR:", e); | ||||||
|  |             content.innerText = "[API ERROR]"; | ||||||
|  |           }); | ||||||
|  |       } | ||||||
|  | 
 | ||||||
|  |       function initListeners() { | ||||||
|  |         let messageBox = document.querySelector("[contenteditable]"); | ||||||
|  |         messageBox.addEventListener("keydown", (evt) => { | ||||||
|  |           if (evt.keyCode != 13) return; | ||||||
|  | 
 | ||||||
|  |           // Send Message & Add to DOM | ||||||
|  |           let textContent = evt.target.innerText; | ||||||
|  |           appendMessageElement("User", textContent); | ||||||
|  |           sendMessage(textContent); | ||||||
|  | 
 | ||||||
|  |           // Reset | ||||||
|  |           evt.target.innerHTML = ""; | ||||||
|  |           evt.preventDefault(); | ||||||
|  |         }); | ||||||
|  |       } | ||||||
|  | 
 | ||||||
|  |       initListeners(); | ||||||
|  |     </script> | ||||||
|  |   </body> | ||||||
|  | </html> | ||||||
							
								
								
									
										75
									
								
								minyma/vdb.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										75
									
								
								minyma/vdb.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,75 @@ | |||||||
|  | from chromadb.api import API | ||||||
|  | from itertools import islice | ||||||
|  | from os import path | ||||||
|  | from tqdm.auto import tqdm | ||||||
|  | from typing import Any, cast | ||||||
|  | import chromadb | ||||||
|  | 
 | ||||||
|  | from minyma.normalizer import DataNormalizer | ||||||
|  | 
 | ||||||
|  | """ | ||||||
|  | Given an iterable, chunk it by `chunk_size` | ||||||
|  | """ | ||||||
|  | def chunk(iterable, chunk_size: int): | ||||||
|  |     iterator = iter(iterable) | ||||||
|  |     while batch := list(islice(iterator, chunk_size)): | ||||||
|  |         yield batch | ||||||
|  | 
 | ||||||
|  | """ | ||||||
|  | VectorDB Interface | ||||||
|  | """ | ||||||
|  | class VectorDB: | ||||||
|  |     def load_documents(self, normalizer: DataNormalizer): | ||||||
|  |         pass | ||||||
|  | 
 | ||||||
|  |     def get_related(self, question: str) -> Any: | ||||||
|  |         pass | ||||||
|  | 
 | ||||||
|  | """ | ||||||
|  | ChromaDV VectorDB Type | ||||||
|  | """ | ||||||
|  | class ChromaDB(VectorDB): | ||||||
|  |     def __init__(self, base_path: str): | ||||||
|  |         chroma_path = path.join(base_path, "chroma") | ||||||
|  |         self.client: API = chromadb.PersistentClient(path=chroma_path) | ||||||
|  |         self.word_limit = 1000 | ||||||
|  |         self.collection_name: str = "vdb" | ||||||
|  |         self.collection: chromadb.Collection = self.client.create_collection(name=self.collection_name, get_or_create=True) | ||||||
|  | 
 | ||||||
|  |     def get_related(self, question) -> Any: | ||||||
|  |         """Returns line separated related docs""" | ||||||
|  |         results = self.collection.query( | ||||||
|  |             query_texts=[question], | ||||||
|  |             n_results=2 | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         all_docs: list = cast(list, results.get("documents", [[]]))[0] | ||||||
|  |         all_distances: list = cast(list, results.get("distances", [[]]))[0] | ||||||
|  |         all_ids: list = cast(list, results.get("ids", [[]]))[0] | ||||||
|  | 
 | ||||||
|  |         return { | ||||||
|  |             "distances":all_distances,  | ||||||
|  |             "docs": all_docs, | ||||||
|  |             "ids": all_ids | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |     def load_documents(self, normalizer: DataNormalizer): | ||||||
|  |         # 10 Item Chunking | ||||||
|  |         for items in tqdm(chunk(normalizer, 50)): | ||||||
|  |             ids = [] | ||||||
|  |             documents = [] | ||||||
|  | 
 | ||||||
|  |             # Limit words per document to accommodate context token limits | ||||||
|  |             for item in items: | ||||||
|  |                 doc = " ".join(item.get("doc").split()[:self.word_limit]) | ||||||
|  |                 documents.append(doc) | ||||||
|  |                 ids.append(item.get("id")) | ||||||
|  | 
 | ||||||
|  |             # Ideally we parse out metadata from each document | ||||||
|  |             # and pass to the metadata kwarg. However, each | ||||||
|  |             # document appears to have a slightly different format, | ||||||
|  |             # so it's difficult to parse out. | ||||||
|  |             self.collection.add( | ||||||
|  |                 documents=documents, | ||||||
|  |                 ids=ids | ||||||
|  |             ) | ||||||
							
								
								
									
										25
									
								
								pyproject.toml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										25
									
								
								pyproject.toml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,25 @@ | |||||||
|  | [project] | ||||||
|  | name = "minyma" | ||||||
|  | version = "0.0.1" | ||||||
|  | description = "AI Chat Bot with Vector DB Context" | ||||||
|  | authors = [ | ||||||
|  |   { name = "Evan Reichard", email = "evan@reichard.io" }, | ||||||
|  | ] | ||||||
|  | license = { file = "LICENSE" } | ||||||
|  | readme = "README.md" | ||||||
|  | requires-python = ">=3.11" | ||||||
|  | dependencies = [ | ||||||
|  |   "Flask>=3.0", | ||||||
|  |   "openai==0.28.1", | ||||||
|  |   "openai[datalib]==0.28.1", | ||||||
|  |   "tqdm", | ||||||
|  |   "chromadb", | ||||||
|  |   "sqlite-utils", | ||||||
|  |   "click" | ||||||
|  | ] | ||||||
|  | 
 | ||||||
|  | [project.scripts] | ||||||
|  | minyma = "minyma:cli" | ||||||
|  | 
 | ||||||
|  | [tool.setuptools.packages] | ||||||
|  | find = {} | ||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user