Compare commits

...

5 Commits

Author SHA1 Message Date
Platform Engineering Bot 63a83a7398
fix(deps): update auto merged updates (#880)
Signed-off-by: Platform Engineering Bot <platform-engineering@redhat.com>
2025-06-09 08:44:36 +03:00
Platform Engineering Bot 1bace0e7c5
fix(deps): update auto merged updates (#879)
Signed-off-by: Platform Engineering Bot <platform-engineering@redhat.com>
2025-06-02 08:05:13 +03:00
Platform Engineering Bot 2f6980bd4d
fix(deps): update auto merged updates (#878)
Signed-off-by: Platform Engineering Bot <platform-engineering@redhat.com>
2025-05-26 09:24:49 +03:00
Brian M cdf6cd5615
Fixed graphrag macos errors and really long load times for parsing documents (#876) 2025-05-21 13:44:14 +02:00
Jeff MAURY bbffacf06d
fix: update summarizer recipe for the ramalama images (#875) 2025-05-21 10:13:51 +02:00
9 changed files with 410 additions and 220 deletions

View File

@ -16,4 +16,4 @@
# under the License.
wrapperVersion=3.3.2
distributionType=only-script
distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.9/apache-maven-3.9.9-bin.zip
distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.10/apache-maven-3.9.10-bin.zip

View File

@ -1,5 +1,5 @@
# Install dependencies only when needed
FROM registry.access.redhat.com/ubi8/nodejs-18-minimal:1-143 AS deps
FROM registry.access.redhat.com/ubi8/nodejs-18-minimal:1-144 AS deps
USER 0
WORKDIR /app
@ -13,7 +13,7 @@ RUN \
fi
# Rebuild the source code only when needed
FROM registry.access.redhat.com/ubi8/nodejs-18-minimal:1-143 AS builder
FROM registry.access.redhat.com/ubi8/nodejs-18-minimal:1-144 AS builder
USER 0
WORKDIR /app
COPY --from=deps /app/node_modules ./node_modules
@ -28,7 +28,7 @@ ENV NEXT_TELEMETRY_DISABLED 1
RUN npm run build
# Production image, copy all the files and run next
FROM registry.access.redhat.com/ubi8/nodejs-18-minimal:1-143 AS runner
FROM registry.access.redhat.com/ubi8/nodejs-18-minimal:1-144 AS runner
USER 0
WORKDIR /app

View File

@ -186,9 +186,9 @@
}
},
"node_modules/@langchain/core": {
"version": "0.3.56",
"resolved": "https://registry.npmjs.org/@langchain/core/-/core-0.3.56.tgz",
"integrity": "sha512-eF9MyInM9RLNisAygiCrzHnqzOnuzGWy4f1SAqAis+XIMhcA98WuZDNWxyX9pP3aKQGc47FAJ/9XWJwv5KiquA==",
"version": "0.3.57",
"resolved": "https://registry.npmjs.org/@langchain/core/-/core-0.3.57.tgz",
"integrity": "sha512-jz28qCTKJmi47b6jqhQ6vYRTG5jRpqhtPQjriRTB5wR8mgvzo6xKs0fG/kExS3ZvM79ytD1npBvgf8i19xOo9Q==",
"license": "MIT",
"dependencies": {
"@cfworker/json-schema": "^4.0.2",
@ -221,9 +221,9 @@
}
},
"node_modules/@langchain/langgraph": {
"version": "0.2.72",
"resolved": "https://registry.npmjs.org/@langchain/langgraph/-/langgraph-0.2.72.tgz",
"integrity": "sha512-2Rs79mLSx0Yxr/omiWOXBlaS+eywZ9KACe06pI6XkA3hT2hwqjMlXYMvbeD7mxZlKrPtLsQaHWvL9IO2VAa+lQ==",
"version": "0.2.74",
"resolved": "https://registry.npmjs.org/@langchain/langgraph/-/langgraph-0.2.74.tgz",
"integrity": "sha512-oHpEi5sTZTPaeZX1UnzfM2OAJ21QGQrwReTV6+QnX7h8nDCBzhtipAw1cK616S+X8zpcVOjgOtJuaJhXa4mN8w==",
"license": "MIT",
"dependencies": {
"@langchain/langgraph-checkpoint": "~0.0.17",

View File

@ -9,15 +9,7 @@ RUN chown -R 1001:0 /graph-rag
COPY requirements.txt .
COPY rag_app.py .
# Detect architecture and install Rust only on ARM (aarch64/arm64)
RUN ARCH=$(uname -m) && \
if [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then \
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y && \
source "$HOME/.cargo/env" && \
rustc --version && \
cargo --version; \
fi && \
pip install --upgrade pip && \
RUN pip install --upgrade pip && \
pip install --no-cache-dir --upgrade -r /graph-rag/requirements.txt
# Expose the port for the application

View File

@ -1,196 +1,183 @@
import sys
import os
import shutil
import nest_asyncio
import fitz # PyMuPDF
import streamlit as st
import fitz
import logging
# logging.basicConfig(level=logging.DEBUG)
from lightrag import LightRAG, QueryParam
from lightrag.llm.hf import hf_embed
from lightrag.llm.openai import openai_complete_if_cache
from lightrag.utils import EmbeddingFunc, encode_string_by_tiktoken, truncate_list_by_token_size, decode_tokens_by_tiktoken
from transformers import AutoModel, AutoTokenizer
# Apply nest_asyncio to solve event loop issues
nest_asyncio.apply()
WORKING_DIR = "rag_data"
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
LLM_MODEL = "dummy"
API_KEY = "dummy"
from typing import List
from langchain_core.documents import Document
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_graph_retriever import GraphRetriever
from graph_retriever.strategies import Eager
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI
# Configuration
EMBEDDING_MODEL = "sentence-transformers/all-mpnet-base-v2"
model_service = os.getenv("MODEL_ENDPOINT",
"http://localhost:8001")
model_service = f"{model_service}/v1"
# Check if folder exists
if not os.path.exists(WORKING_DIR):
os.mkdir(WORKING_DIR)
async def llm_model_func(
prompt: str, system_prompt: str = None, history_messages: list[str] = [], **kwargs
) -> str:
"""LLM function to ensure total tokens (prompt + system_prompt + history_messages) <= 2048."""
# Calculate token sizes
prompt_tokens = len(encode_string_by_tiktoken(prompt))
# Calculate remaining tokens for history_messages
max_total_tokens = 1000
# If the prompt itself exceeds the token limit, truncate it
if prompt_tokens > max_total_tokens:
print("Warning: Prompt exceeds token limit. Truncating prompt.")
truncated_prompt = encode_string_by_tiktoken(prompt)[:max_total_tokens]
prompt = decode_tokens_by_tiktoken(truncated_prompt)
prompt_tokens = len(truncated_prompt)
# Truncate history_messages to fit within the remaining tokens
# Log token sizes for debugging
print(f"Prompt tokens: {prompt_tokens}")
# Call the LLM with truncated prompt and history_messages
return await openai_complete_if_cache(
model=LLM_MODEL,
prompt=prompt,
system_prompt=system_prompt,
# history_messages=history_messages,
base_url=model_service,
api_key=API_KEY,
**kwargs,
)
rag = LightRAG(
working_dir=WORKING_DIR,
llm_model_func=llm_model_func,
chunk_token_size = 256,
chunk_overlap_token_size = 50,
llm_model_max_token_size=1000,
llm_model_name=LLM_MODEL,
embedding_func=EmbeddingFunc(
embedding_dim=384,
max_token_size=5000,
func=lambda texts: hf_embed(
texts,
tokenizer=AutoTokenizer.from_pretrained(EMBEDDING_MODEL),
embed_model=AutoModel.from_pretrained(EMBEDDING_MODEL),
),
),
)
LLM_MODEL = "local-model"
WORKING_DIR = "graph_rag_data"
# Initialize session state
if 'uploaded_file_previous' not in st.session_state:
st.session_state.uploaded_file_previous = None
if 'rag_initialized' not in st.session_state:
st.session_state.rag_initialized = False
if 'retriever' not in st.session_state:
st.session_state.retriever = None
if 'chain' not in st.session_state:
st.session_state.chain = None
if 'user_query' not in st.session_state:
st.session_state.user_query = ''
if 'last_submission' not in st.session_state:
st.session_state.last_submission = ''
def pdf_to_text(pdf_path, output_path):
def pdf_to_text(pdf_path: str) -> str:
"""Extract text from PDF file."""
try:
doc = fitz.open(pdf_path)
text = ''
for page in doc:
text += page.get_text()
with open(output_path, 'w', encoding='utf-8') as file:
file.write(text)
return text
except Exception as e:
st.error(f"Error extracting text from PDF: {e}")
raise
async def async_query(query, mode="mix"):
print('\n')
print("query: ", query)
try:
with st.spinner("Processing your query..."):
stream = rag.query(query, param=QueryParam(mode=mode, stream=True, max_token_for_text_unit=1750, max_token_for_global_context=1750, max_token_for_local_context=1750))
# Create a placeholder for the streamed content
output_placeholder = st.empty()
# Manually consume the stream and write to Streamlit
response = ""
# Check if stream is an async iterable
if hasattr(stream, "__aiter__"):
print("async")
async for chunk in stream:
response += chunk
# Update the placeholder with the latest response
output_placeholder.markdown(response, unsafe_allow_html=True)
else:
print("not async")
st.write(stream)
response = stream
# Store the final response in session state
st.session_state.last_submission = response
except ValueError as e:
if "exceed context window" in str(e):
st.error(
"The tokens in your query exceed the model's context window. Please try a different query mode or shorten your query."
def create_documents_from_text(text: str) -> List[Document]:
"""Create LangChain Documents from text with basic metadata."""
chunks = text.split('\n\n') # Simple paragraph-based chunking
documents = []
for i, chunk in enumerate(chunks):
if chunk.strip(): # Skip empty chunks
documents.append(
Document(
page_content=chunk.strip(),
metadata={"id": f"chunk_{i}", "source": "uploaded_file"}
)
)
# Optionally, you could reset the query mode or suggest alternatives
st.session_state.query_mode = "mix" # Default to "mix" mode
st.session_state.user_query = '' # Clear the user query
else:
st.error(f"Error processing query: {e}")
return documents
def setup_retriever(documents: List[Document]) -> GraphRetriever:
"""Set up the Graph Retriever with HuggingFace embeddings."""
# Initialize embeddings
embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
# Create vector store
vector_store = InMemoryVectorStore.from_documents(
documents=documents,
embedding=embeddings,
)
# Create graph retriever
retriever = GraphRetriever(
store=vector_store,
edges=[("source", "source")], # Simple edge - can customize based on your metadata
strategy=Eager(k=5, start_k=1, max_depth=2),
)
return retriever
def setup_llm_chain(retriever: GraphRetriever):
"""Set up the LLM chain with the retriever."""
llm = ChatOpenAI(
base_url=model_service,
api_key="dummy",
model=LLM_MODEL,
streaming=True,
)
prompt = ChatPromptTemplate.from_template(
"""Answer the question based only on the context provided.
Context: {context}
Question: {question}"""
)
def format_docs(docs):
return "\n\n".join(f"{doc.page_content}" for doc in docs)
chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)
return chain
def process_query(query: str):
"""Process user query using the Graph RAG chain."""
if st.session_state.chain is None:
st.error("Please upload and process a PDF file first.")
return
try:
st.subheader("Answer:")
with st.spinner("Processing your query..."):
# Stream output token-by-token
response_placeholder = st.empty()
full_response = ""
for chunk in st.session_state.chain.stream(query):
full_response += chunk
response_placeholder.markdown(full_response + "")
response_placeholder.markdown(full_response)
except Exception as e:
st.error(f"Error processing query: {e}")
def query(query, mode="mix"):
# Run the async function in the event loop
import asyncio
asyncio.run(async_query(query, mode))
# Streamlit UI
st.title("GraphRAG Chatbot")
st.title("Graph RAG with PDF Upload")
uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
if uploaded_file is not None:
if uploaded_file.name != st.session_state.uploaded_file_previous:
st.session_state.uploaded_file_previous = uploaded_file.name
if os.path.exists(WORKING_DIR):
shutil.rmtree(WORKING_DIR, ignore_errors=True)
os.makedirs(WORKING_DIR)
with open("temp.pdf", "wb") as f:
# Create working directory if it doesn't exist
if not os.path.exists(WORKING_DIR):
os.makedirs(WORKING_DIR)
# Save uploaded file temporarily
temp_pdf_path = os.path.join(WORKING_DIR, "temp.pdf")
with open(temp_pdf_path, "wb") as f:
f.write(uploaded_file.getbuffer())
try:
with st.spinner("Processing PDF..."):
pdf_to_text("temp.pdf", "document.txt")
with open("document.txt", "r", encoding="utf-8") as f:
rag.insert(f.read())
st.session_state.rag_initialized = True
text = pdf_to_text(temp_pdf_path)
documents = create_documents_from_text(text)
# Set up retriever and chain
st.session_state.retriever = setup_retriever(documents)
st.session_state.chain = setup_llm_chain(st.session_state.retriever)
st.success("PDF processed successfully! You can now ask questions.")
except Exception as e:
st.error(f"Error processing PDF: {e}")
finally:
if os.path.exists("temp.pdf"):
os.remove("temp.pdf")
# Clean up temporary file
if os.path.exists(temp_pdf_path):
os.remove(temp_pdf_path)
if st.session_state.rag_initialized:
query_mode = st.radio(
"Select query mode:",
options=["local", "global", "naive", "hybrid", "mix"],
index=3,
key="mode"
# Query section
if st.session_state.retriever is not None:
st.subheader("Ask a Question")
st.text_input(
"Enter your question about the document:",
key="query_input"
)
st.session_state.query_mode = query_mode
user_query = st.session_state.query_input
# Use a unique key for the text input to avoid conflicts
user_query = st.text_input("Enter your query:", key="query_input")
if st.button("Submit"):
if user_query.strip():
st.session_state.user_query = user_query
query(st.session_state.user_query, mode=st.session_state.query_mode)
if user_query.strip() and user_query != st.session_state.user_query:
st.session_state.user_query = user_query
process_query(user_query)

View File

@ -1,37 +1,7 @@
lightrag-hku==1.1.7
numpy==1.26.4
pydantic==2.10.6
python-dotenv==1.0.1
pipmaster==0.4.0
httpx==0.28.1
nest_asyncio==1.6.0
future==1.0.0
setuptools==75.8.2
tenacity==9.0.0
PyMuPDF==1.25.5
streamlit==1.42.0
tiktoken
torch
transformers
matplotlib
scikit-learn
POT==0.9.5
anytree==2.12.1
autograd==1.7.0
beartype==0.18.5
gensim==4.3.3
graspologic==3.4.1
hyppo==0.4.0
llvmlite==0.44.0
numba==0.61.2
patsy==1.0.1
pynndescent==0.5.13
seaborn==0.13.2
smart-open==7.1.0
statsmodels==0.14.4
umap-learn==0.5.7
wrapt==1.17.2
nano-vectordb==0.0.4.3
jiter==0.8.2
distro==1.9.0
openai==1.64.0
streamlit==1.45.1
langchain-graph-retriever==0.8.0
langchain-huggingface==0.2.0
langchain-openai==0.3.17
transformers==4.52.4
torch==2.7.1
PyMuPDF==1.25.5

View File

@ -1,5 +1,5 @@
# Install dependencies only when needed
FROM registry.access.redhat.com/ubi8/nodejs-18-minimal:1-143 AS deps
FROM registry.access.redhat.com/ubi8/nodejs-18-minimal:1-144 AS deps
USER 0
WORKDIR /app
@ -13,7 +13,7 @@ RUN \
fi
# Rebuild the source code only when needed
FROM registry.access.redhat.com/ubi8/nodejs-18-minimal:1-143 AS builder
FROM registry.access.redhat.com/ubi8/nodejs-18-minimal:1-144 AS builder
USER 0
WORKDIR /app
COPY --from=deps /app/node_modules ./node_modules
@ -28,7 +28,7 @@ ENV NEXT_TELEMETRY_DISABLED 1
RUN npm run build
# Production image, copy all the files and run next
FROM registry.access.redhat.com/ubi8/nodejs-18-minimal:1-143 AS runner
FROM registry.access.redhat.com/ubi8/nodejs-18-minimal:1-144 AS runner
USER 0
WORKDIR /app

View File

@ -2514,6 +2514,103 @@
"node": "^12.22.0 || ^14.17.0 || >=16.0.0"
}
},
"node_modules/@graphql-typed-document-node/core": {
"version": "3.2.0",
"resolved": "https://registry.npmjs.org/@graphql-typed-document-node/core/-/core-3.2.0.tgz",
"integrity": "sha512-mB9oAsNCm9aM3/SOv4YtBMqZbYj10R7dkq8byBqxGY/ncFwhf2oQzMV+LCRlWoDSEBJ3COiR1yeDvMtsoOsuFQ==",
"license": "MIT",
"peerDependencies": {
"graphql": "^0.8.0 || ^0.9.0 || ^0.10.0 || ^0.11.0 || ^0.12.0 || ^0.13.0 || ^14.0.0 || ^15.0.0 || ^16.0.0 || ^17.0.0"
}
},
"node_modules/@grpc/grpc-js": {
"version": "1.13.4",
"resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.13.4.tgz",
"integrity": "sha512-GsFaMXCkMqkKIvwCQjCrwH+GHbPKBjhwo/8ZuUkWHqbI73Kky9I+pQltrlT0+MWpedCoosda53lgjYfyEPgxBg==",
"license": "Apache-2.0",
"dependencies": {
"@grpc/proto-loader": "^0.7.13",
"@js-sdsl/ordered-map": "^4.4.2"
},
"engines": {
"node": ">=12.10.0"
}
},
"node_modules/@grpc/proto-loader": {
"version": "0.7.15",
"resolved": "https://registry.npmjs.org/@grpc/proto-loader/-/proto-loader-0.7.15.tgz",
"integrity": "sha512-tMXdRCfYVixjuFK+Hk0Q1s38gV9zDiDJfWL3h1rv4Qc39oILCu1TRTDt7+fGUI8K4G1Fj125Hx/ru3azECWTyQ==",
"license": "Apache-2.0",
"dependencies": {
"lodash.camelcase": "^4.3.0",
"long": "^5.0.0",
"protobufjs": "^7.2.5",
"yargs": "^17.7.2"
},
"bin": {
"proto-loader-gen-types": "build/bin/proto-loader-gen-types.js"
},
"engines": {
"node": ">=6"
}
},
"node_modules/@grpc/proto-loader/node_modules/long": {
"version": "5.3.2",
"resolved": "https://registry.npmjs.org/long/-/long-5.3.2.tgz",
"integrity": "sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==",
"license": "Apache-2.0"
},
"node_modules/@grpc/proto-loader/node_modules/protobufjs": {
"version": "7.4.0",
"resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.4.0.tgz",
"integrity": "sha512-mRUWCc3KUU4w1jU8sGxICXH/gNS94DvI1gxqDvBzhj1JpcsimQkYiOJfwsPUykUI5ZaspFbSgmBLER8IrQ3tqw==",
"hasInstallScript": true,
"license": "BSD-3-Clause",
"dependencies": {
"@protobufjs/aspromise": "^1.1.2",
"@protobufjs/base64": "^1.1.2",
"@protobufjs/codegen": "^2.0.4",
"@protobufjs/eventemitter": "^1.1.0",
"@protobufjs/fetch": "^1.1.0",
"@protobufjs/float": "^1.0.2",
"@protobufjs/inquire": "^1.1.0",
"@protobufjs/path": "^1.1.2",
"@protobufjs/pool": "^1.1.0",
"@protobufjs/utf8": "^1.1.0",
"@types/node": ">=13.7.0",
"long": "^5.0.0"
},
"engines": {
"node": ">=12.0.0"
}
},
"node_modules/@grpc/proto-loader/node_modules/yargs": {
"version": "17.7.2",
"resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz",
"integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==",
"license": "MIT",
"dependencies": {
"cliui": "^8.0.1",
"escalade": "^3.1.1",
"get-caller-file": "^2.0.5",
"require-directory": "^2.1.1",
"string-width": "^4.2.3",
"y18n": "^5.0.5",
"yargs-parser": "^21.1.1"
},
"engines": {
"node": ">=12"
}
},
"node_modules/@grpc/proto-loader/node_modules/yargs-parser": {
"version": "21.1.1",
"resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz",
"integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==",
"license": "ISC",
"engines": {
"node": ">=12"
}
},
"node_modules/@huggingface/jinja": {
"version": "0.2.2",
"resolved": "https://registry.npmjs.org/@huggingface/jinja/-/jinja-0.2.2.tgz",
@ -3944,13 +4041,24 @@
"@jridgewell/sourcemap-codec": "^1.4.14"
}
},
"node_modules/@js-sdsl/ordered-map": {
"version": "4.4.2",
"resolved": "https://registry.npmjs.org/@js-sdsl/ordered-map/-/ordered-map-4.4.2.tgz",
"integrity": "sha512-iUKgm52T8HOE/makSxjqoWhe95ZJA1/G1sYsGev2JDKUSS14KAgg1LHb+Ba+IPow0xflbnSkOsZcO08C7w1gYw==",
"license": "MIT",
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/js-sdsl"
}
},
"node_modules/@langchain/community": {
"version": "0.3.43",
"resolved": "https://registry.npmjs.org/@langchain/community/-/community-0.3.43.tgz",
"integrity": "sha512-rTXuKflXyftKFw2fAl5YbkfMcwsIcot8tpUy50asXxbe3eGpQimIFXZsLeaBlftjQPadgnBMOr3Wn1xX8kfOzA==",
"version": "0.3.45",
"resolved": "https://registry.npmjs.org/@langchain/community/-/community-0.3.45.tgz",
"integrity": "sha512-KkAGmnP+w5tozLYsj/kGKwyfuPnCcA6MyDXfNF7oDo7L1TxhUgdEKhvNsY7ooLXz6Xh/LV5Kqp2B8U0jfYCQKQ==",
"license": "MIT",
"dependencies": {
"@langchain/openai": ">=0.2.0 <0.6.0",
"@langchain/weaviate": "^0.2.0",
"binary-extensions": "^2.2.0",
"expr-eval": "^2.0.2",
"flat": "^5.0.2",
@ -4088,7 +4196,7 @@
"typesense": "^1.5.3",
"usearch": "^1.1.1",
"voy-search": "0.6.2",
"weaviate-ts-client": "*",
"weaviate-client": "^3.5.2",
"web-auth-library": "^1.0.3",
"word-extractor": "*",
"ws": "^8.14.2",
@ -4452,7 +4560,7 @@
"voy-search": {
"optional": true
},
"weaviate-ts-client": {
"weaviate-client": {
"optional": true
},
"web-auth-library": {
@ -4567,6 +4675,22 @@
"@langchain/core": ">=0.2.21 <0.4.0"
}
},
"node_modules/@langchain/weaviate": {
"version": "0.2.0",
"resolved": "https://registry.npmjs.org/@langchain/weaviate/-/weaviate-0.2.0.tgz",
"integrity": "sha512-gAtTCxSllR8Z92qAuRn2ir0cop241VmftQHQN+UYtTeoLge8hvZT5k0j55PDVaXTVpjx0ecx6DKv5I/wLRQI+A==",
"license": "MIT",
"dependencies": {
"uuid": "^10.0.0",
"weaviate-client": "^3.5.2"
},
"engines": {
"node": ">=18"
},
"peerDependencies": {
"@langchain/core": ">=0.2.21 <0.4.0"
}
},
"node_modules/@leichtgewicht/ip-codec": {
"version": "2.0.5",
"resolved": "https://registry.npmjs.org/@leichtgewicht/ip-codec/-/ip-codec-2.0.5.tgz",
@ -6422,6 +6546,12 @@
"node": ">=6.5"
}
},
"node_modules/abort-controller-x": {
"version": "0.4.3",
"resolved": "https://registry.npmjs.org/abort-controller-x/-/abort-controller-x-0.4.3.tgz",
"integrity": "sha512-VtUwTNU8fpMwvWGn4xE93ywbogTYsuT+AUxAXOeelbXuQVIwNmC5YLeho9sH4vZ4ITW8414TTAOG1nW6uIVHCA==",
"license": "MIT"
},
"node_modules/accepts": {
"version": "1.3.8",
"resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.8.tgz",
@ -8339,6 +8469,15 @@
"node": ">= 6"
}
},
"node_modules/cross-fetch": {
"version": "3.2.0",
"resolved": "https://registry.npmjs.org/cross-fetch/-/cross-fetch-3.2.0.tgz",
"integrity": "sha512-Q+xVJLoGOeIMXZmbUK4HYk+69cQH6LudR0Vu/pRm2YlU/hDV9CiS0gKUMaWY5f2NeUH9C1nV3bsTlCo0FsTV1Q==",
"license": "MIT",
"dependencies": {
"node-fetch": "^2.7.0"
}
},
"node_modules/cross-spawn": {
"version": "7.0.6",
"resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
@ -11491,6 +11630,28 @@
"integrity": "sha512-EtKwoO6kxCL9WO5xipiHTZlSzBm7WLT627TqC/uVRd0HKmq8NXyebnNYxDoBi7wt8eTWrUrKXCOVaFq9x1kgag==",
"license": "MIT"
},
"node_modules/graphql": {
"version": "16.11.0",
"resolved": "https://registry.npmjs.org/graphql/-/graphql-16.11.0.tgz",
"integrity": "sha512-mS1lbMsxgQj6hge1XZ6p7GPhbrtFwUFYi3wRzXAC/FmYnyXMTvvI3td3rjmQ2u8ewXueaSvRPWaEcgVVOT9Jnw==",
"license": "MIT",
"engines": {
"node": "^12.22.0 || ^14.16.0 || ^16.0.0 || >=17.0.0"
}
},
"node_modules/graphql-request": {
"version": "6.1.0",
"resolved": "https://registry.npmjs.org/graphql-request/-/graphql-request-6.1.0.tgz",
"integrity": "sha512-p+XPfS4q7aIpKVcgmnZKhMNqhltk20hfXtkaIkTfjjmiKMJ5xrt5c743cL03y/K7y1rg3WrIC49xGiEQ4mxdNw==",
"license": "MIT",
"dependencies": {
"@graphql-typed-document-node/core": "^3.2.0",
"cross-fetch": "^3.1.5"
},
"peerDependencies": {
"graphql": "14 - 16"
}
},
"node_modules/guid-typescript": {
"version": "1.0.9",
"resolved": "https://registry.npmjs.org/guid-typescript/-/guid-typescript-1.0.9.tgz",
@ -15321,6 +15482,12 @@
"integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==",
"license": "MIT"
},
"node_modules/lodash.camelcase": {
"version": "4.3.0",
"resolved": "https://registry.npmjs.org/lodash.camelcase/-/lodash.camelcase-4.3.0.tgz",
"integrity": "sha512-TwuEnCnxbc3rAvhf/LbG7tJUDzhqXyFnv3dtzLOPgCG/hODL7WFnsbwktkD7yUV0RrreP/l1PALq/YSg6VvjlA==",
"license": "MIT"
},
"node_modules/lodash.debounce": {
"version": "4.0.8",
"resolved": "https://registry.npmjs.org/lodash.debounce/-/lodash.debounce-4.0.8.tgz",
@ -15874,6 +16041,36 @@
"node": "^10 || ^12 || >=14"
}
},
"node_modules/nice-grpc": {
"version": "2.1.12",
"resolved": "https://registry.npmjs.org/nice-grpc/-/nice-grpc-2.1.12.tgz",
"integrity": "sha512-J1n4Wg+D3IhRhGQb+iqh2OpiM0GzTve/kf2lnlW4S+xczmIEd0aHUDV1OsJ5a3q8GSTqJf+s4Rgg1M8uJltarw==",
"license": "MIT",
"dependencies": {
"@grpc/grpc-js": "^1.13.1",
"abort-controller-x": "^0.4.0",
"nice-grpc-common": "^2.0.2"
}
},
"node_modules/nice-grpc-client-middleware-retry": {
"version": "3.1.11",
"resolved": "https://registry.npmjs.org/nice-grpc-client-middleware-retry/-/nice-grpc-client-middleware-retry-3.1.11.tgz",
"integrity": "sha512-xW/imz/kNG2g0DwTfH2eYEGrg1chSLrXtvGp9fg2qkhTgGFfAS/Pq3+t+9G8KThcC4hK/xlEyKvZWKk++33S6A==",
"license": "MIT",
"dependencies": {
"abort-controller-x": "^0.4.0",
"nice-grpc-common": "^2.0.2"
}
},
"node_modules/nice-grpc-common": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/nice-grpc-common/-/nice-grpc-common-2.0.2.tgz",
"integrity": "sha512-7RNWbls5kAL1QVUOXvBsv1uO0wPQK3lHv+cY1gwkTzirnG1Nop4cBJZubpgziNbaVc/bl9QJcyvsf/NQxa3rjQ==",
"license": "MIT",
"dependencies": {
"ts-error": "^1.0.6"
}
},
"node_modules/no-case": {
"version": "3.0.4",
"resolved": "https://registry.npmjs.org/no-case/-/no-case-3.0.4.tgz",
@ -20800,9 +20997,9 @@
}
},
"node_modules/primereact": {
"version": "10.9.5",
"resolved": "https://registry.npmjs.org/primereact/-/primereact-10.9.5.tgz",
"integrity": "sha512-4O6gm0LrKF7Ml8zQmb8mGiWS/ugJ94KBOAS/CAxWFQh9qyNgfNw/qcpCeomPIkjWd98jrM2XDiEbgq+W0395Hw==",
"version": "10.9.6",
"resolved": "https://registry.npmjs.org/primereact/-/primereact-10.9.6.tgz",
"integrity": "sha512-0Jjz/KzfUURSHaPTXJwjL2Dc7CDPnbO17MivyJz7T5smGAMLY5d+IqpQhV61R22G/rDmhMh3+32LCNva2M8fRw==",
"license": "MIT",
"dependencies": {
"@types/react-transition-group": "^4.4.1",
@ -23923,6 +24120,12 @@
"integrity": "sha512-c3zayb8/kWWpycWYg87P71E1S1ZL6b6IJxfb5fvsUgsf0S2MVGaDhDXXjDMpdCpfWXqptc+4mXwmiy1ypXqRAA==",
"license": "MIT"
},
"node_modules/ts-error": {
"version": "1.0.6",
"resolved": "https://registry.npmjs.org/ts-error/-/ts-error-1.0.6.tgz",
"integrity": "sha512-tLJxacIQUM82IR7JO1UUkKlYuUTmoY9HBJAmNWFzheSlDS5SPMcNIepejHJa4BpPQLAcbRhRf3GDJzyj6rbKvA==",
"license": "MIT"
},
"node_modules/ts-interface-checker": {
"version": "0.1.13",
"resolved": "https://registry.npmjs.org/ts-interface-checker/-/ts-interface-checker-0.1.13.tgz",
@ -24440,6 +24643,44 @@
"minimalistic-assert": "^1.0.0"
}
},
"node_modules/weaviate-client": {
"version": "3.5.5",
"resolved": "https://registry.npmjs.org/weaviate-client/-/weaviate-client-3.5.5.tgz",
"integrity": "sha512-wAjJtJmBQn2KiTPkfUGEzddBIbySpN0y0wAcYPWDCBXVjXqf0UOExujFJ+QeeRp+AjHk15B6BmUaUX9NHVLzsw==",
"license": "SEE LICENSE IN LICENSE",
"dependencies": {
"abort-controller-x": "^0.4.3",
"graphql": "^16.10.0",
"graphql-request": "^6.1.0",
"long": "^5.2.4",
"nice-grpc": "^2.1.11",
"nice-grpc-client-middleware-retry": "^3.1.10",
"nice-grpc-common": "^2.0.2",
"uuid": "^9.0.1"
},
"engines": {
"node": ">=18.0.0"
}
},
"node_modules/weaviate-client/node_modules/long": {
"version": "5.3.2",
"resolved": "https://registry.npmjs.org/long/-/long-5.3.2.tgz",
"integrity": "sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==",
"license": "Apache-2.0"
},
"node_modules/weaviate-client/node_modules/uuid": {
"version": "9.0.1",
"resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz",
"integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==",
"funding": [
"https://github.com/sponsors/broofa",
"https://github.com/sponsors/ctavan"
],
"license": "MIT",
"bin": {
"uuid": "dist/bin/uuid"
}
},
"node_modules/web-streams-polyfill": {
"version": "4.0.0-beta.3",
"resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz",

View File

@ -57,9 +57,9 @@ def chunk_text(text):
text_chunks = text_splitter.create_documents([text])
for chunk in text_chunks:
chunk = chunk.page_content
chunk_kwargs = request_kwargs | {"json": {"input": chunk}}
count = requests.post(f"{model_service[:-2]}extras/tokenize/count", **chunk_kwargs).content
count = json.loads(count)["count"]
chunk_kwargs = request_kwargs | {"json": {"content": chunk}}
count = requests.post(f"{model_service[:-2]}tokenize", **chunk_kwargs).content
count = len(json.loads(count)["tokens"])
if count >= 2048:
split_append_chunk(chunk, chunks)
else: