-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcreate_database.py
More file actions
32 lines (24 loc) · 779 Bytes
/
Copy pathcreate_database.py
File metadata and controls
32 lines (24 loc) · 779 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#load pdf
#split into chunks
#create the embeddings
#stor into chroma DB
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_mistralai import MistralAIEmbeddings
from dotenv import load_dotenv
import os
load_dotenv()
loader = PyPDFLoader("deep-learning-material-dept-ece-ase-blr-1.pdf")
docs = loader.load()
splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200
)
chunks = splitter.split_documents(docs)
embedding_model = MistralAIEmbeddings(model="mistral-embed")
vectorstore = Chroma.from_documents(
documents = chunks,
embedding= embedding_model,
persist_directory= "chroma_db"
)