pinecon
Verschillen
Dit geeft de verschillen weer tussen de geselecteerde revisie en de huidige revisie van de pagina.
Beide kanten vorige revisieVorige revisieVolgende revisie | Vorige revisie | ||
pinecon [2024/02/10 16:20] – [Data Opdelen in Chunks] a3dijke | pinecon [2024/10/15 17:28] (huidige) – [Opdelen van .txt bestanden] a3dijke | ||
---|---|---|---|
Regel 1: | Regel 1: | ||
====== Pinecone Vector database ====== | ====== Pinecone Vector database ====== | ||
- | <= [[start|Terug naar start]]\\ | + | 🗂️ |
- | <- -- [[projecten|Terug naar A3Dbot Start]]\\ | + | |
[[https:// | [[https:// | ||
+ | |||
+ | **[[https:// | ||
---- | ---- | ||
Regel 19: | Regel 21: | ||
-> dimentions = 1536\\ | -> dimentions = 1536\\ | ||
-> metric = " | -> metric = " | ||
- | -> Model: OpenAI/ | + | -> Model: OpenAI/ |
+ | -> Tabblad PODS: P2 // | ||
+ | -> Cloud provider: Google\\ | ||
+ | -> Region: Netherlands eu-west4-gcp | ||
**[[https:// | **[[https:// | ||
Regel 33: | Regel 38: | ||
==== Opdelen van .txt bestanden ==== | ==== Opdelen van .txt bestanden ==== | ||
- | Ik heb tekstbestanden gegenereerd met plokken | + | [[https:// |
- | Deze tekstblokken | + | Ik heb tekstbestanden gegenereerd met blokken |
+ | Deze tekst blokken | ||
+ | bestanden worden opgesplitst in //(qua aantal leestekens ongelijke)// | ||
+ | Ik gebruik de volgende code: | ||
+ | < | ||
+ | import os | ||
+ | import streamlit as st | ||
+ | from pinecone import Pinecone, ServerlessSpec | ||
+ | from langchain_community.vectorstores import Pinecone as LangChainPinecone | ||
+ | from langchain_openai.embeddings import OpenAIEmbeddings | ||
+ | import uuid | ||
+ | |||
+ | # Initialize the app ============================ | ||
+ | os.environ[" | ||
+ | |||
+ | pineKey = st.secrets[" | ||
+ | pineEnv = st.secrets[" | ||
+ | pineInd = st.secrets[" | ||
+ | pinemod = " | ||
+ | |||
+ | |||
+ | class Document: | ||
+ | def __init__(self, | ||
+ | self.page_content = text | ||
+ | self.metadata = metadata if metadata is not None else {} | ||
+ | self.id = doc_id if doc_id is not None else str(uuid.uuid4()) | ||
+ | |||
+ | |||
+ | def main(): | ||
+ | doc_db = embedding_db() | ||
+ | print(doc_db) | ||
+ | print(" | ||
+ | |||
+ | |||
+ | def embedding_db(): | ||
+ | embeddings = OpenAIEmbeddings(model=pinemod) | ||
+ | pc = Pinecone(api_key=pineKey) | ||
+ | |||
+ | if pineInd not in pc.list_indexes().names(): | ||
+ | pc.create_index( | ||
+ | name=pineInd, | ||
+ | dimension=1536, | ||
+ | metric=' | ||
+ | spec=ServerlessSpec( | ||
+ | cloud=' | ||
+ | region=pineEnv | ||
+ | ) | ||
+ | ) | ||
+ | |||
+ | docs_split = load_embeddings_from_dir() | ||
+ | |||
+ | doc_db = LangChainPinecone.from_documents( | ||
+ | docs_split, | ||
+ | embeddings, | ||
+ | index_name=pineInd | ||
+ | ) | ||
+ | | ||
+ | return doc_db | ||
+ | |||
+ | |||
+ | def load_embeddings_from_dir(): | ||
+ | directory = ' | ||
+ | documents = [] | ||
+ | for filename in os.listdir(directory): | ||
+ | if filename.endswith(' | ||
+ | file_path = os.path.join(directory, | ||
+ | with open(file_path, | ||
+ | content = file.read() | ||
+ | parts = content.split(' | ||
+ | for part in parts: | ||
+ | documents.append(Document(part)) | ||
+ | print(f" | ||
+ | return documents | ||
+ | |||
+ | |||
+ | # Start the app =============================== | ||
+ | if __name__ == " | ||
+ | main() | ||
+ | </ | ||
+ | |||
+ | |||
+ | [[https:// | ||
---- | ---- | ||
- | ===== Pinecone en Langchain v0.1.0 ===== | + | |
- | [[https:// | + |
pinecon.1707578410.txt.gz · Laatst gewijzigd: 2024/02/10 16:20 door a3dijke