# LangChain
from langchain.chains.retrieval import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.documents import Document
from langchain_core.output_parsers import StrOutputParser
from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
from langchain_groq import ChatGroq
from langchain_pinecone import PineconeVectorStore

# Pinecone VectorDB
from pinecone import Pinecone
from pinecone import ServerlessSpec

import os

# API Keys
from dotenv import load_dotenv
load_dotenv()

True

index_name = "prez-speeches"

pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
pc.list_indexes()

[
    {
        "name": "prez-speeches",
        "metric": "cosine",
        "host": "prez-speeches-2307pwa.svc.aped-4627-b74a.pinecone.io",
        "spec": {
            "serverless": {
                "cloud": "aws",
                "region": "us-east-1"
            }
        },
        "status": {
            "ready": true,
            "state": "Ready"
        },
        "vector_type": "dense",
        "dimension": 2048,
        "deletion_protection": "disabled",
        "tags": null
    },
    {
        "name": "jfk-speeches",
        "metric": "cosine",
        "host": "jfk-speeches-2307pwa.svc.aped-4627-b74a.pinecone.io",
        "spec": {
            "serverless": {
                "cloud": "aws",
                "region": "us-east-1"
            }
        },
        "status": {
            "ready": true,
            "state": "Ready"
        },
        "vector_type": "dense",
        "dimension": 2048,
        "deletion_protection": "disabled",
        "tags": null
    }
]

embedding = embedding = NVIDIAEmbeddings(
                            model="nvidia/llama-3.2-nv-embedqa-1b-v2",
                            api_key=os.getenv("NVIDIA_API_KEY"),
                            dimension=2048,
                            truncate="NONE")

vectordb = PineconeVectorStore(
                    pinecone_api_key=os.getenv("PINECONE_API_KEY"),
                    embedding=embedding,
                    index_name=index_name
)

/Users/mikeharmon/miniconda3/envs/llm_env/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm

question = "How did President Kennedy feel about the Berlin Wall?"

results = await vectordb.asimilarity_search(query=question, k=5)

for document in results:
    print("Document ID:", document.id)

Document ID: d0245e9a-b4f2-46e6-a6d0-07ee3afbad16
Document ID: b9e573a6-d9f9-4306-a6e3-72ac769643dd
Document ID: a6bcd4fa-90a3-46b2-a48d-105115ccaed7
Document ID: ffe2db4a-6983-4cde-a853-658080619575
Document ID: b909248f-495d-4819-9776-d512e7c545f1

retriever = vectordb.as_retriever()
print(type(retriever))

<class 'langchain_core.vectorstores.base.VectorStoreRetriever'>

documents = retriever.invoke(input=question)

for document in documents:
    print("Document ID:", document.id)

Document ID: d0245e9a-b4f2-46e6-a6d0-07ee3afbad16
Document ID: b9e573a6-d9f9-4306-a6e3-72ac769643dd
Document ID: a6bcd4fa-90a3-46b2-a48d-105115ccaed7
Document ID: ffe2db4a-6983-4cde-a853-658080619575

from langchain.prompts import PromptTemplate

template = """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: {input} 
Context: {context} 
Answer:
"""

prompt = PromptTemplate(
    template=template,
    input_variables=["input", "context"],
)

print(
    prompt.invoke({
        "input": question,
        "context": [document.id for document in documents]
    }).text
)

You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: How did President Kennedy feel about the Berlin Wall? 
Context: ['d0245e9a-b4f2-46e6-a6d0-07ee3afbad16', 'b9e573a6-d9f9-4306-a6e3-72ac769643dd', 'a6bcd4fa-90a3-46b2-a48d-105115ccaed7', 'ffe2db4a-6983-4cde-a853-658080619575'] 
Answer:

llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0)

generate_chain = create_stuff_documents_chain(llm=llm, prompt=prompt)

print(generate_chain)

bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {input} \nContext: {context} \nAnswer:\n")
| ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x128d7ffd0>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x12fd8b990>, model_name='llama-3.3-70b-versatile', temperature=1e-08, model_kwargs={}, groq_api_key=SecretStr('**********'))
| StrOutputParser() kwargs={} config={'run_name': 'stuff_documents_chain'} config_factories=[]

answer = generate_chain.invoke(
       {
        'context': documents,
        "input": question
      }
)

print(answer)

President Kennedy felt strongly against the Berlin Wall, calling it "an offense not only against history but an offense against humanity" that separates families and divides a people. He saw it as a demonstration of the failures of the Communist system and a threat to freedom. Kennedy emphasized the importance of defending West Berlin and upholding the commitment to its people, stating "we shall not surrender" and seeking peace without surrendering to Communist pressures.

rag_chain = create_retrieval_chain(
                    retriever=retriever, 
                    combine_docs_chain=generate_chain)

print(rag_chain)

bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['PineconeVectorStore', 'NVIDIAEmbeddings'], vectorstore=<langchain_pinecone.vectorstores.PineconeVectorStore object at 0x11cf39e50>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {input} \nContext: {context} \nAnswer:\n")
            | ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x128d7ffd0>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x12fd8b990>, model_name='llama-3.3-70b-versatile', temperature=1e-08, model_kwargs={}, groq_api_key=SecretStr('**********'))
            | StrOutputParser(), kwargs={}, config={'run_name': 'stuff_documents_chain'}, config_factories=[])
  }) kwargs={} config={'run_name': 'retrieval_chain'} config_factories=[]

rag_chain.get_prompts()

[PromptTemplate(input_variables=['page_content'], input_types={}, partial_variables={}, template='{page_content}'),
 PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {input} \nContext: {context} \nAnswer:\n")]

response = rag_chain.invoke({"input": question})

response['answer']

'President Kennedy felt strongly against the Berlin Wall, calling it "an offense not only against history but an offense against humanity" that separates families and divides a people. He saw it as a demonstration of the failures of the Communist system and a threat to freedom. Kennedy emphasized the importance of defending West Berlin and upholding the commitment to its people, stating "we shall not surrender" and seeking peace without surrendering to Communist pressures.'

references = {(doc.metadata['title'], doc.metadata['url']) for doc in  response['context']}

references

{('Radio and Television Report to the American People on the Berlin Crisis, July 25, 1961',
  'https://www.jfklibrary.org//archives/other-resources/john-f-kennedy-speeches/berlin-crisis-19610725'),
 ('Remarks of President John F. Kennedy at the Rudolph Wilde Platz, Berlin, June 26, 1963',
  'https://www.jfklibrary.org//archives/other-resources/john-f-kennedy-speeches/berlin-w-germany-rudolph-wilde-platz-19630626')}

RAG On JFK Speeches: Part 2¶

1. Introduction to RAG ¶

2. Retriving Documents With Vector (Semantic) Search ¶

3. Building A RAG Pipeline ¶

4. Deploying A RAG Application ¶

5. Conclusions ¶