-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path3_rag_v1.py
More file actions
56 lines (42 loc) · 1.79 KB
/
3_rag_v1.py
File metadata and controls
56 lines (42 loc) · 1.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# pip install -U langchain langchain-openai langchain-community faiss-cpu pypdf python-dotenv
import os
from dotenv import load_dotenv
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser
os.environ['LANGCHAIN_PROJECT'] = 'RAG ChatBot'
load_dotenv() # expects OPENAI_API_KEY in .env
PDF_PATH = "islr.pdf" # <-- change to your PDF filename
# 1) Load PDF
loader = PyPDFLoader(PDF_PATH)
docs = loader.load() # one Document per page
# 2) Chunk
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
splits = splitter.split_documents(docs)
# 3) Embed + index
emb = OpenAIEmbeddings(model="text-embedding-3-small")
vs = FAISS.from_documents(splits, emb)
retriever = vs.as_retriever(search_type="similarity", search_kwargs={"k": 4})
# 4) Prompt
prompt = ChatPromptTemplate.from_messages([
("system", "Answer ONLY from the provided context. If not found, say you don't know."),
("human", "Question: {question}\n\nContext:\n{context}")
])
# 5) Chain
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
def format_docs(docs):
return "\n\n".join(d.page_content for d in docs)
parallel = RunnableParallel({
"context": retriever | RunnableLambda(format_docs),
"question": RunnablePassthrough()
})
chain = parallel | prompt | llm | StrOutputParser()
# 6) Ask questions
print("PDF RAG ready. Ask a question (or Ctrl+C to exit).")
q = input("\nQ: ")
ans = chain.invoke(q.strip())
print("\nA:", ans)