USE_REPLICATE = True
if USE_REPLICATE:
print('Using replicate')
import os
os.environ["REPLICATE_API_TOKEN"] = "r8_Undampori9Pazhampory10SavalaVAda"
from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.replicate import Replicate
from transformers import AutoTokenizer
print("Finished importing everything")
# set the LLM
llama2_7b_chat = "meta/llama-2-7b-chat:8e6975e5ed6174911a6ff3d60540dfd4844201974602551e10e9e87ab143d81e"
Settings.llm = Replicate(
model=llama2_7b_chat,
temperature=0.01,
additional_kwargs={"top_p": 1, "max_new_tokens": 300},
)
print("Settings finished")
# set tokenizer to match LLM
Settings.tokenizer = AutoTokenizer.from_pretrained(
"NousResearch/Llama-2-7b-chat-hf"
)
print("Initialized Settings tokenizer")
# set the embed model
Settings.embed_model = HuggingFaceEmbedding(
model_name="BAAI/bge-small-en-v1.5"
)
print("Loaded Settings embedded model")
documents = SimpleDirectoryReader("docs").load_data()
index = VectorStoreIndex.from_documents(
documents,
)
print("Loaded index")
query_engine = index.as_query_engine()
print("initialized query engine")
response = query_engine.query(" What is auto-negotiation in a switch")
print('response is ',response)
else:
print('Running without replicate')
import os
os.environ['OPENAI_API_KEY'] = "sk-_Undampori9Pazhampory10SavalaVAda"
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
documents = SimpleDirectoryReader("docs").load_data()
index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine()
response = query_engine.query(" What is auto-negotiation in a switch")
print('response is ',response)
x
Thursday, April 11, 2024
Simple piece of code to query local document using LLAMA2 in replicate and OpenAI
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment