Code コードの全体構成
model = ChatOpenAI(model="gpt-4o-mini", temperature=0)
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
loader = PyPDFLoader("data/sample.pdf")
pages = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = splitter.split_documents(pages)
vectorstore = FAISS.from_documents(chunks, embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
class RAGState(TypedDict):
question: str
documents: List[str]
answer: str
def retrieve_node(state):
docs = retriever.invoke(state["question"])
return {"documents": [doc.page_content for doc in docs]}
def generate_node(state):
context = "\n\n".join(state["documents"])
prompt = ChatPromptTemplate.from_messages([
("system", "以下の文書を参考に質問に回答してください。\n\n{context}"),
("human", "{question}")
])
chain = prompt | model | StrOutputParser()
return {"answer": chain.invoke({"context": context, "question": state["question"]})}
graph = StateGraph(RAGState)
graph.add_node("retrieve", retrieve_node)
graph.add_node("generate", generate_node)
graph.set_entry_point("retrieve")
graph.add_edge("retrieve", "generate")
graph.add_edge("generate", END)
app = graph.compile()
retrieve_node
→
generate_node
→
END