Compare commits
5 Commits
Author | SHA1 | Date |
---|---|---|
|
63a83a7398 | |
|
1bace0e7c5 | |
|
2f6980bd4d | |
|
cdf6cd5615 | |
|
bbffacf06d |
|
@ -16,4 +16,4 @@
|
|||
# under the License.
|
||||
wrapperVersion=3.3.2
|
||||
distributionType=only-script
|
||||
distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.9/apache-maven-3.9.9-bin.zip
|
||||
distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.10/apache-maven-3.9.10-bin.zip
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# Install dependencies only when needed
|
||||
FROM registry.access.redhat.com/ubi8/nodejs-18-minimal:1-143 AS deps
|
||||
FROM registry.access.redhat.com/ubi8/nodejs-18-minimal:1-144 AS deps
|
||||
USER 0
|
||||
WORKDIR /app
|
||||
|
||||
|
@ -13,7 +13,7 @@ RUN \
|
|||
fi
|
||||
|
||||
# Rebuild the source code only when needed
|
||||
FROM registry.access.redhat.com/ubi8/nodejs-18-minimal:1-143 AS builder
|
||||
FROM registry.access.redhat.com/ubi8/nodejs-18-minimal:1-144 AS builder
|
||||
USER 0
|
||||
WORKDIR /app
|
||||
COPY --from=deps /app/node_modules ./node_modules
|
||||
|
@ -28,7 +28,7 @@ ENV NEXT_TELEMETRY_DISABLED 1
|
|||
RUN npm run build
|
||||
|
||||
# Production image, copy all the files and run next
|
||||
FROM registry.access.redhat.com/ubi8/nodejs-18-minimal:1-143 AS runner
|
||||
FROM registry.access.redhat.com/ubi8/nodejs-18-minimal:1-144 AS runner
|
||||
USER 0
|
||||
WORKDIR /app
|
||||
|
||||
|
|
|
@ -186,9 +186,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@langchain/core": {
|
||||
"version": "0.3.56",
|
||||
"resolved": "https://registry.npmjs.org/@langchain/core/-/core-0.3.56.tgz",
|
||||
"integrity": "sha512-eF9MyInM9RLNisAygiCrzHnqzOnuzGWy4f1SAqAis+XIMhcA98WuZDNWxyX9pP3aKQGc47FAJ/9XWJwv5KiquA==",
|
||||
"version": "0.3.57",
|
||||
"resolved": "https://registry.npmjs.org/@langchain/core/-/core-0.3.57.tgz",
|
||||
"integrity": "sha512-jz28qCTKJmi47b6jqhQ6vYRTG5jRpqhtPQjriRTB5wR8mgvzo6xKs0fG/kExS3ZvM79ytD1npBvgf8i19xOo9Q==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@cfworker/json-schema": "^4.0.2",
|
||||
|
@ -221,9 +221,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@langchain/langgraph": {
|
||||
"version": "0.2.72",
|
||||
"resolved": "https://registry.npmjs.org/@langchain/langgraph/-/langgraph-0.2.72.tgz",
|
||||
"integrity": "sha512-2Rs79mLSx0Yxr/omiWOXBlaS+eywZ9KACe06pI6XkA3hT2hwqjMlXYMvbeD7mxZlKrPtLsQaHWvL9IO2VAa+lQ==",
|
||||
"version": "0.2.74",
|
||||
"resolved": "https://registry.npmjs.org/@langchain/langgraph/-/langgraph-0.2.74.tgz",
|
||||
"integrity": "sha512-oHpEi5sTZTPaeZX1UnzfM2OAJ21QGQrwReTV6+QnX7h8nDCBzhtipAw1cK616S+X8zpcVOjgOtJuaJhXa4mN8w==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@langchain/langgraph-checkpoint": "~0.0.17",
|
||||
|
|
|
@ -9,15 +9,7 @@ RUN chown -R 1001:0 /graph-rag
|
|||
COPY requirements.txt .
|
||||
COPY rag_app.py .
|
||||
|
||||
# Detect architecture and install Rust only on ARM (aarch64/arm64)
|
||||
RUN ARCH=$(uname -m) && \
|
||||
if [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then \
|
||||
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y && \
|
||||
source "$HOME/.cargo/env" && \
|
||||
rustc --version && \
|
||||
cargo --version; \
|
||||
fi && \
|
||||
pip install --upgrade pip && \
|
||||
RUN pip install --upgrade pip && \
|
||||
pip install --no-cache-dir --upgrade -r /graph-rag/requirements.txt
|
||||
|
||||
# Expose the port for the application
|
||||
|
|
|
@ -1,196 +1,183 @@
|
|||
import sys
|
||||
import os
|
||||
import shutil
|
||||
import nest_asyncio
|
||||
import fitz # PyMuPDF
|
||||
import streamlit as st
|
||||
import fitz
|
||||
import logging
|
||||
|
||||
# logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
from lightrag import LightRAG, QueryParam
|
||||
from lightrag.llm.hf import hf_embed
|
||||
from lightrag.llm.openai import openai_complete_if_cache
|
||||
from lightrag.utils import EmbeddingFunc, encode_string_by_tiktoken, truncate_list_by_token_size, decode_tokens_by_tiktoken
|
||||
from transformers import AutoModel, AutoTokenizer
|
||||
|
||||
# Apply nest_asyncio to solve event loop issues
|
||||
nest_asyncio.apply()
|
||||
|
||||
WORKING_DIR = "rag_data"
|
||||
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
|
||||
LLM_MODEL = "dummy"
|
||||
API_KEY = "dummy"
|
||||
from typing import List
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.vectorstores import InMemoryVectorStore
|
||||
from langchain_huggingface import HuggingFaceEmbeddings
|
||||
from langchain_graph_retriever import GraphRetriever
|
||||
from graph_retriever.strategies import Eager
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain_core.runnables import RunnablePassthrough
|
||||
from langchain_core.output_parsers import StrOutputParser
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
# Configuration
|
||||
EMBEDDING_MODEL = "sentence-transformers/all-mpnet-base-v2"
|
||||
model_service = os.getenv("MODEL_ENDPOINT",
|
||||
"http://localhost:8001")
|
||||
model_service = f"{model_service}/v1"
|
||||
|
||||
# Check if folder exists
|
||||
if not os.path.exists(WORKING_DIR):
|
||||
os.mkdir(WORKING_DIR)
|
||||
|
||||
async def llm_model_func(
|
||||
prompt: str, system_prompt: str = None, history_messages: list[str] = [], **kwargs
|
||||
) -> str:
|
||||
"""LLM function to ensure total tokens (prompt + system_prompt + history_messages) <= 2048."""
|
||||
# Calculate token sizes
|
||||
prompt_tokens = len(encode_string_by_tiktoken(prompt))
|
||||
|
||||
# Calculate remaining tokens for history_messages
|
||||
max_total_tokens = 1000
|
||||
|
||||
# If the prompt itself exceeds the token limit, truncate it
|
||||
if prompt_tokens > max_total_tokens:
|
||||
print("Warning: Prompt exceeds token limit. Truncating prompt.")
|
||||
truncated_prompt = encode_string_by_tiktoken(prompt)[:max_total_tokens]
|
||||
prompt = decode_tokens_by_tiktoken(truncated_prompt)
|
||||
prompt_tokens = len(truncated_prompt)
|
||||
|
||||
# Truncate history_messages to fit within the remaining tokens
|
||||
|
||||
# Log token sizes for debugging
|
||||
print(f"Prompt tokens: {prompt_tokens}")
|
||||
|
||||
# Call the LLM with truncated prompt and history_messages
|
||||
return await openai_complete_if_cache(
|
||||
model=LLM_MODEL,
|
||||
prompt=prompt,
|
||||
system_prompt=system_prompt,
|
||||
# history_messages=history_messages,
|
||||
base_url=model_service,
|
||||
api_key=API_KEY,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
rag = LightRAG(
|
||||
working_dir=WORKING_DIR,
|
||||
llm_model_func=llm_model_func,
|
||||
chunk_token_size = 256,
|
||||
chunk_overlap_token_size = 50,
|
||||
llm_model_max_token_size=1000,
|
||||
llm_model_name=LLM_MODEL,
|
||||
embedding_func=EmbeddingFunc(
|
||||
embedding_dim=384,
|
||||
max_token_size=5000,
|
||||
func=lambda texts: hf_embed(
|
||||
texts,
|
||||
tokenizer=AutoTokenizer.from_pretrained(EMBEDDING_MODEL),
|
||||
embed_model=AutoModel.from_pretrained(EMBEDDING_MODEL),
|
||||
),
|
||||
),
|
||||
)
|
||||
LLM_MODEL = "local-model"
|
||||
WORKING_DIR = "graph_rag_data"
|
||||
|
||||
# Initialize session state
|
||||
if 'uploaded_file_previous' not in st.session_state:
|
||||
st.session_state.uploaded_file_previous = None
|
||||
|
||||
if 'rag_initialized' not in st.session_state:
|
||||
st.session_state.rag_initialized = False
|
||||
if 'retriever' not in st.session_state:
|
||||
st.session_state.retriever = None
|
||||
|
||||
if 'chain' not in st.session_state:
|
||||
st.session_state.chain = None
|
||||
|
||||
if 'user_query' not in st.session_state:
|
||||
st.session_state.user_query = ''
|
||||
if 'last_submission' not in st.session_state:
|
||||
st.session_state.last_submission = ''
|
||||
|
||||
def pdf_to_text(pdf_path, output_path):
|
||||
def pdf_to_text(pdf_path: str) -> str:
|
||||
"""Extract text from PDF file."""
|
||||
try:
|
||||
doc = fitz.open(pdf_path)
|
||||
text = ''
|
||||
for page in doc:
|
||||
text += page.get_text()
|
||||
with open(output_path, 'w', encoding='utf-8') as file:
|
||||
file.write(text)
|
||||
return text
|
||||
except Exception as e:
|
||||
st.error(f"Error extracting text from PDF: {e}")
|
||||
raise
|
||||
|
||||
async def async_query(query, mode="mix"):
|
||||
print('\n')
|
||||
print("query: ", query)
|
||||
try:
|
||||
with st.spinner("Processing your query..."):
|
||||
stream = rag.query(query, param=QueryParam(mode=mode, stream=True, max_token_for_text_unit=1750, max_token_for_global_context=1750, max_token_for_local_context=1750))
|
||||
|
||||
# Create a placeholder for the streamed content
|
||||
output_placeholder = st.empty()
|
||||
|
||||
# Manually consume the stream and write to Streamlit
|
||||
response = ""
|
||||
|
||||
# Check if stream is an async iterable
|
||||
if hasattr(stream, "__aiter__"):
|
||||
print("async")
|
||||
async for chunk in stream:
|
||||
response += chunk
|
||||
# Update the placeholder with the latest response
|
||||
output_placeholder.markdown(response, unsafe_allow_html=True)
|
||||
else:
|
||||
print("not async")
|
||||
st.write(stream)
|
||||
response = stream
|
||||
|
||||
# Store the final response in session state
|
||||
st.session_state.last_submission = response
|
||||
|
||||
except ValueError as e:
|
||||
if "exceed context window" in str(e):
|
||||
st.error(
|
||||
"The tokens in your query exceed the model's context window. Please try a different query mode or shorten your query."
|
||||
def create_documents_from_text(text: str) -> List[Document]:
|
||||
"""Create LangChain Documents from text with basic metadata."""
|
||||
chunks = text.split('\n\n') # Simple paragraph-based chunking
|
||||
documents = []
|
||||
for i, chunk in enumerate(chunks):
|
||||
if chunk.strip(): # Skip empty chunks
|
||||
documents.append(
|
||||
Document(
|
||||
page_content=chunk.strip(),
|
||||
metadata={"id": f"chunk_{i}", "source": "uploaded_file"}
|
||||
)
|
||||
)
|
||||
# Optionally, you could reset the query mode or suggest alternatives
|
||||
st.session_state.query_mode = "mix" # Default to "mix" mode
|
||||
st.session_state.user_query = '' # Clear the user query
|
||||
else:
|
||||
st.error(f"Error processing query: {e}")
|
||||
return documents
|
||||
|
||||
def setup_retriever(documents: List[Document]) -> GraphRetriever:
|
||||
"""Set up the Graph Retriever with HuggingFace embeddings."""
|
||||
# Initialize embeddings
|
||||
embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
|
||||
|
||||
# Create vector store
|
||||
vector_store = InMemoryVectorStore.from_documents(
|
||||
documents=documents,
|
||||
embedding=embeddings,
|
||||
)
|
||||
|
||||
# Create graph retriever
|
||||
retriever = GraphRetriever(
|
||||
store=vector_store,
|
||||
edges=[("source", "source")], # Simple edge - can customize based on your metadata
|
||||
strategy=Eager(k=5, start_k=1, max_depth=2),
|
||||
)
|
||||
|
||||
return retriever
|
||||
|
||||
def setup_llm_chain(retriever: GraphRetriever):
|
||||
"""Set up the LLM chain with the retriever."""
|
||||
llm = ChatOpenAI(
|
||||
base_url=model_service,
|
||||
api_key="dummy",
|
||||
model=LLM_MODEL,
|
||||
streaming=True,
|
||||
)
|
||||
|
||||
prompt = ChatPromptTemplate.from_template(
|
||||
"""Answer the question based only on the context provided.
|
||||
|
||||
Context: {context}
|
||||
|
||||
Question: {question}"""
|
||||
)
|
||||
|
||||
def format_docs(docs):
|
||||
return "\n\n".join(f"{doc.page_content}" for doc in docs)
|
||||
|
||||
chain = (
|
||||
{"context": retriever | format_docs, "question": RunnablePassthrough()}
|
||||
| prompt
|
||||
| llm
|
||||
| StrOutputParser()
|
||||
)
|
||||
|
||||
return chain
|
||||
|
||||
def process_query(query: str):
|
||||
"""Process user query using the Graph RAG chain."""
|
||||
if st.session_state.chain is None:
|
||||
st.error("Please upload and process a PDF file first.")
|
||||
return
|
||||
|
||||
try:
|
||||
st.subheader("Answer:")
|
||||
with st.spinner("Processing your query..."):
|
||||
# Stream output token-by-token
|
||||
response_placeholder = st.empty()
|
||||
|
||||
full_response = ""
|
||||
for chunk in st.session_state.chain.stream(query):
|
||||
full_response += chunk
|
||||
response_placeholder.markdown(full_response + "▌")
|
||||
|
||||
response_placeholder.markdown(full_response)
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"Error processing query: {e}")
|
||||
|
||||
def query(query, mode="mix"):
|
||||
# Run the async function in the event loop
|
||||
import asyncio
|
||||
asyncio.run(async_query(query, mode))
|
||||
|
||||
# Streamlit UI
|
||||
st.title("GraphRAG Chatbot")
|
||||
st.title("Graph RAG with PDF Upload")
|
||||
|
||||
uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
|
||||
|
||||
if uploaded_file is not None:
|
||||
if uploaded_file.name != st.session_state.uploaded_file_previous:
|
||||
st.session_state.uploaded_file_previous = uploaded_file.name
|
||||
if os.path.exists(WORKING_DIR):
|
||||
shutil.rmtree(WORKING_DIR, ignore_errors=True)
|
||||
os.makedirs(WORKING_DIR)
|
||||
|
||||
with open("temp.pdf", "wb") as f:
|
||||
|
||||
# Create working directory if it doesn't exist
|
||||
if not os.path.exists(WORKING_DIR):
|
||||
os.makedirs(WORKING_DIR)
|
||||
|
||||
# Save uploaded file temporarily
|
||||
temp_pdf_path = os.path.join(WORKING_DIR, "temp.pdf")
|
||||
with open(temp_pdf_path, "wb") as f:
|
||||
f.write(uploaded_file.getbuffer())
|
||||
|
||||
try:
|
||||
with st.spinner("Processing PDF..."):
|
||||
pdf_to_text("temp.pdf", "document.txt")
|
||||
with open("document.txt", "r", encoding="utf-8") as f:
|
||||
rag.insert(f.read())
|
||||
st.session_state.rag_initialized = True
|
||||
text = pdf_to_text(temp_pdf_path)
|
||||
|
||||
documents = create_documents_from_text(text)
|
||||
|
||||
# Set up retriever and chain
|
||||
st.session_state.retriever = setup_retriever(documents)
|
||||
st.session_state.chain = setup_llm_chain(st.session_state.retriever)
|
||||
|
||||
st.success("PDF processed successfully! You can now ask questions.")
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"Error processing PDF: {e}")
|
||||
finally:
|
||||
if os.path.exists("temp.pdf"):
|
||||
os.remove("temp.pdf")
|
||||
# Clean up temporary file
|
||||
if os.path.exists(temp_pdf_path):
|
||||
os.remove(temp_pdf_path)
|
||||
|
||||
if st.session_state.rag_initialized:
|
||||
query_mode = st.radio(
|
||||
"Select query mode:",
|
||||
options=["local", "global", "naive", "hybrid", "mix"],
|
||||
index=3,
|
||||
key="mode"
|
||||
# Query section
|
||||
if st.session_state.retriever is not None:
|
||||
st.subheader("Ask a Question")
|
||||
|
||||
st.text_input(
|
||||
"Enter your question about the document:",
|
||||
key="query_input"
|
||||
)
|
||||
st.session_state.query_mode = query_mode
|
||||
user_query = st.session_state.query_input
|
||||
|
||||
# Use a unique key for the text input to avoid conflicts
|
||||
user_query = st.text_input("Enter your query:", key="query_input")
|
||||
|
||||
if st.button("Submit"):
|
||||
if user_query.strip():
|
||||
st.session_state.user_query = user_query
|
||||
query(st.session_state.user_query, mode=st.session_state.query_mode)
|
||||
if user_query.strip() and user_query != st.session_state.user_query:
|
||||
st.session_state.user_query = user_query
|
||||
process_query(user_query)
|
|
@ -1,37 +1,7 @@
|
|||
lightrag-hku==1.1.7
|
||||
numpy==1.26.4
|
||||
pydantic==2.10.6
|
||||
python-dotenv==1.0.1
|
||||
pipmaster==0.4.0
|
||||
httpx==0.28.1
|
||||
nest_asyncio==1.6.0
|
||||
future==1.0.0
|
||||
setuptools==75.8.2
|
||||
tenacity==9.0.0
|
||||
PyMuPDF==1.25.5
|
||||
streamlit==1.42.0
|
||||
tiktoken
|
||||
torch
|
||||
transformers
|
||||
matplotlib
|
||||
scikit-learn
|
||||
POT==0.9.5
|
||||
anytree==2.12.1
|
||||
autograd==1.7.0
|
||||
beartype==0.18.5
|
||||
gensim==4.3.3
|
||||
graspologic==3.4.1
|
||||
hyppo==0.4.0
|
||||
llvmlite==0.44.0
|
||||
numba==0.61.2
|
||||
patsy==1.0.1
|
||||
pynndescent==0.5.13
|
||||
seaborn==0.13.2
|
||||
smart-open==7.1.0
|
||||
statsmodels==0.14.4
|
||||
umap-learn==0.5.7
|
||||
wrapt==1.17.2
|
||||
nano-vectordb==0.0.4.3
|
||||
jiter==0.8.2
|
||||
distro==1.9.0
|
||||
openai==1.64.0
|
||||
streamlit==1.45.1
|
||||
langchain-graph-retriever==0.8.0
|
||||
langchain-huggingface==0.2.0
|
||||
langchain-openai==0.3.17
|
||||
transformers==4.52.4
|
||||
torch==2.7.1
|
||||
PyMuPDF==1.25.5
|
|
@ -1,5 +1,5 @@
|
|||
# Install dependencies only when needed
|
||||
FROM registry.access.redhat.com/ubi8/nodejs-18-minimal:1-143 AS deps
|
||||
FROM registry.access.redhat.com/ubi8/nodejs-18-minimal:1-144 AS deps
|
||||
USER 0
|
||||
WORKDIR /app
|
||||
|
||||
|
@ -13,7 +13,7 @@ RUN \
|
|||
fi
|
||||
|
||||
# Rebuild the source code only when needed
|
||||
FROM registry.access.redhat.com/ubi8/nodejs-18-minimal:1-143 AS builder
|
||||
FROM registry.access.redhat.com/ubi8/nodejs-18-minimal:1-144 AS builder
|
||||
USER 0
|
||||
WORKDIR /app
|
||||
COPY --from=deps /app/node_modules ./node_modules
|
||||
|
@ -28,7 +28,7 @@ ENV NEXT_TELEMETRY_DISABLED 1
|
|||
RUN npm run build
|
||||
|
||||
# Production image, copy all the files and run next
|
||||
FROM registry.access.redhat.com/ubi8/nodejs-18-minimal:1-143 AS runner
|
||||
FROM registry.access.redhat.com/ubi8/nodejs-18-minimal:1-144 AS runner
|
||||
USER 0
|
||||
WORKDIR /app
|
||||
|
||||
|
|
|
@ -2514,6 +2514,103 @@
|
|||
"node": "^12.22.0 || ^14.17.0 || >=16.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@graphql-typed-document-node/core": {
|
||||
"version": "3.2.0",
|
||||
"resolved": "https://registry.npmjs.org/@graphql-typed-document-node/core/-/core-3.2.0.tgz",
|
||||
"integrity": "sha512-mB9oAsNCm9aM3/SOv4YtBMqZbYj10R7dkq8byBqxGY/ncFwhf2oQzMV+LCRlWoDSEBJ3COiR1yeDvMtsoOsuFQ==",
|
||||
"license": "MIT",
|
||||
"peerDependencies": {
|
||||
"graphql": "^0.8.0 || ^0.9.0 || ^0.10.0 || ^0.11.0 || ^0.12.0 || ^0.13.0 || ^14.0.0 || ^15.0.0 || ^16.0.0 || ^17.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@grpc/grpc-js": {
|
||||
"version": "1.13.4",
|
||||
"resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.13.4.tgz",
|
||||
"integrity": "sha512-GsFaMXCkMqkKIvwCQjCrwH+GHbPKBjhwo/8ZuUkWHqbI73Kky9I+pQltrlT0+MWpedCoosda53lgjYfyEPgxBg==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@grpc/proto-loader": "^0.7.13",
|
||||
"@js-sdsl/ordered-map": "^4.4.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@grpc/proto-loader": {
|
||||
"version": "0.7.15",
|
||||
"resolved": "https://registry.npmjs.org/@grpc/proto-loader/-/proto-loader-0.7.15.tgz",
|
||||
"integrity": "sha512-tMXdRCfYVixjuFK+Hk0Q1s38gV9zDiDJfWL3h1rv4Qc39oILCu1TRTDt7+fGUI8K4G1Fj125Hx/ru3azECWTyQ==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"lodash.camelcase": "^4.3.0",
|
||||
"long": "^5.0.0",
|
||||
"protobufjs": "^7.2.5",
|
||||
"yargs": "^17.7.2"
|
||||
},
|
||||
"bin": {
|
||||
"proto-loader-gen-types": "build/bin/proto-loader-gen-types.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=6"
|
||||
}
|
||||
},
|
||||
"node_modules/@grpc/proto-loader/node_modules/long": {
|
||||
"version": "5.3.2",
|
||||
"resolved": "https://registry.npmjs.org/long/-/long-5.3.2.tgz",
|
||||
"integrity": "sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==",
|
||||
"license": "Apache-2.0"
|
||||
},
|
||||
"node_modules/@grpc/proto-loader/node_modules/protobufjs": {
|
||||
"version": "7.4.0",
|
||||
"resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.4.0.tgz",
|
||||
"integrity": "sha512-mRUWCc3KUU4w1jU8sGxICXH/gNS94DvI1gxqDvBzhj1JpcsimQkYiOJfwsPUykUI5ZaspFbSgmBLER8IrQ3tqw==",
|
||||
"hasInstallScript": true,
|
||||
"license": "BSD-3-Clause",
|
||||
"dependencies": {
|
||||
"@protobufjs/aspromise": "^1.1.2",
|
||||
"@protobufjs/base64": "^1.1.2",
|
||||
"@protobufjs/codegen": "^2.0.4",
|
||||
"@protobufjs/eventemitter": "^1.1.0",
|
||||
"@protobufjs/fetch": "^1.1.0",
|
||||
"@protobufjs/float": "^1.0.2",
|
||||
"@protobufjs/inquire": "^1.1.0",
|
||||
"@protobufjs/path": "^1.1.2",
|
||||
"@protobufjs/pool": "^1.1.0",
|
||||
"@protobufjs/utf8": "^1.1.0",
|
||||
"@types/node": ">=13.7.0",
|
||||
"long": "^5.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@grpc/proto-loader/node_modules/yargs": {
|
||||
"version": "17.7.2",
|
||||
"resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz",
|
||||
"integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"cliui": "^8.0.1",
|
||||
"escalade": "^3.1.1",
|
||||
"get-caller-file": "^2.0.5",
|
||||
"require-directory": "^2.1.1",
|
||||
"string-width": "^4.2.3",
|
||||
"y18n": "^5.0.5",
|
||||
"yargs-parser": "^21.1.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@grpc/proto-loader/node_modules/yargs-parser": {
|
||||
"version": "21.1.1",
|
||||
"resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz",
|
||||
"integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==",
|
||||
"license": "ISC",
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@huggingface/jinja": {
|
||||
"version": "0.2.2",
|
||||
"resolved": "https://registry.npmjs.org/@huggingface/jinja/-/jinja-0.2.2.tgz",
|
||||
|
@ -3944,13 +4041,24 @@
|
|||
"@jridgewell/sourcemap-codec": "^1.4.14"
|
||||
}
|
||||
},
|
||||
"node_modules/@js-sdsl/ordered-map": {
|
||||
"version": "4.4.2",
|
||||
"resolved": "https://registry.npmjs.org/@js-sdsl/ordered-map/-/ordered-map-4.4.2.tgz",
|
||||
"integrity": "sha512-iUKgm52T8HOE/makSxjqoWhe95ZJA1/G1sYsGev2JDKUSS14KAgg1LHb+Ba+IPow0xflbnSkOsZcO08C7w1gYw==",
|
||||
"license": "MIT",
|
||||
"funding": {
|
||||
"type": "opencollective",
|
||||
"url": "https://opencollective.com/js-sdsl"
|
||||
}
|
||||
},
|
||||
"node_modules/@langchain/community": {
|
||||
"version": "0.3.43",
|
||||
"resolved": "https://registry.npmjs.org/@langchain/community/-/community-0.3.43.tgz",
|
||||
"integrity": "sha512-rTXuKflXyftKFw2fAl5YbkfMcwsIcot8tpUy50asXxbe3eGpQimIFXZsLeaBlftjQPadgnBMOr3Wn1xX8kfOzA==",
|
||||
"version": "0.3.45",
|
||||
"resolved": "https://registry.npmjs.org/@langchain/community/-/community-0.3.45.tgz",
|
||||
"integrity": "sha512-KkAGmnP+w5tozLYsj/kGKwyfuPnCcA6MyDXfNF7oDo7L1TxhUgdEKhvNsY7ooLXz6Xh/LV5Kqp2B8U0jfYCQKQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@langchain/openai": ">=0.2.0 <0.6.0",
|
||||
"@langchain/weaviate": "^0.2.0",
|
||||
"binary-extensions": "^2.2.0",
|
||||
"expr-eval": "^2.0.2",
|
||||
"flat": "^5.0.2",
|
||||
|
@ -4088,7 +4196,7 @@
|
|||
"typesense": "^1.5.3",
|
||||
"usearch": "^1.1.1",
|
||||
"voy-search": "0.6.2",
|
||||
"weaviate-ts-client": "*",
|
||||
"weaviate-client": "^3.5.2",
|
||||
"web-auth-library": "^1.0.3",
|
||||
"word-extractor": "*",
|
||||
"ws": "^8.14.2",
|
||||
|
@ -4452,7 +4560,7 @@
|
|||
"voy-search": {
|
||||
"optional": true
|
||||
},
|
||||
"weaviate-ts-client": {
|
||||
"weaviate-client": {
|
||||
"optional": true
|
||||
},
|
||||
"web-auth-library": {
|
||||
|
@ -4567,6 +4675,22 @@
|
|||
"@langchain/core": ">=0.2.21 <0.4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@langchain/weaviate": {
|
||||
"version": "0.2.0",
|
||||
"resolved": "https://registry.npmjs.org/@langchain/weaviate/-/weaviate-0.2.0.tgz",
|
||||
"integrity": "sha512-gAtTCxSllR8Z92qAuRn2ir0cop241VmftQHQN+UYtTeoLge8hvZT5k0j55PDVaXTVpjx0ecx6DKv5I/wLRQI+A==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"uuid": "^10.0.0",
|
||||
"weaviate-client": "^3.5.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@langchain/core": ">=0.2.21 <0.4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@leichtgewicht/ip-codec": {
|
||||
"version": "2.0.5",
|
||||
"resolved": "https://registry.npmjs.org/@leichtgewicht/ip-codec/-/ip-codec-2.0.5.tgz",
|
||||
|
@ -6422,6 +6546,12 @@
|
|||
"node": ">=6.5"
|
||||
}
|
||||
},
|
||||
"node_modules/abort-controller-x": {
|
||||
"version": "0.4.3",
|
||||
"resolved": "https://registry.npmjs.org/abort-controller-x/-/abort-controller-x-0.4.3.tgz",
|
||||
"integrity": "sha512-VtUwTNU8fpMwvWGn4xE93ywbogTYsuT+AUxAXOeelbXuQVIwNmC5YLeho9sH4vZ4ITW8414TTAOG1nW6uIVHCA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/accepts": {
|
||||
"version": "1.3.8",
|
||||
"resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.8.tgz",
|
||||
|
@ -8339,6 +8469,15 @@
|
|||
"node": ">= 6"
|
||||
}
|
||||
},
|
||||
"node_modules/cross-fetch": {
|
||||
"version": "3.2.0",
|
||||
"resolved": "https://registry.npmjs.org/cross-fetch/-/cross-fetch-3.2.0.tgz",
|
||||
"integrity": "sha512-Q+xVJLoGOeIMXZmbUK4HYk+69cQH6LudR0Vu/pRm2YlU/hDV9CiS0gKUMaWY5f2NeUH9C1nV3bsTlCo0FsTV1Q==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"node-fetch": "^2.7.0"
|
||||
}
|
||||
},
|
||||
"node_modules/cross-spawn": {
|
||||
"version": "7.0.6",
|
||||
"resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
|
||||
|
@ -11491,6 +11630,28 @@
|
|||
"integrity": "sha512-EtKwoO6kxCL9WO5xipiHTZlSzBm7WLT627TqC/uVRd0HKmq8NXyebnNYxDoBi7wt8eTWrUrKXCOVaFq9x1kgag==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/graphql": {
|
||||
"version": "16.11.0",
|
||||
"resolved": "https://registry.npmjs.org/graphql/-/graphql-16.11.0.tgz",
|
||||
"integrity": "sha512-mS1lbMsxgQj6hge1XZ6p7GPhbrtFwUFYi3wRzXAC/FmYnyXMTvvI3td3rjmQ2u8ewXueaSvRPWaEcgVVOT9Jnw==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": "^12.22.0 || ^14.16.0 || ^16.0.0 || >=17.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/graphql-request": {
|
||||
"version": "6.1.0",
|
||||
"resolved": "https://registry.npmjs.org/graphql-request/-/graphql-request-6.1.0.tgz",
|
||||
"integrity": "sha512-p+XPfS4q7aIpKVcgmnZKhMNqhltk20hfXtkaIkTfjjmiKMJ5xrt5c743cL03y/K7y1rg3WrIC49xGiEQ4mxdNw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@graphql-typed-document-node/core": "^3.2.0",
|
||||
"cross-fetch": "^3.1.5"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"graphql": "14 - 16"
|
||||
}
|
||||
},
|
||||
"node_modules/guid-typescript": {
|
||||
"version": "1.0.9",
|
||||
"resolved": "https://registry.npmjs.org/guid-typescript/-/guid-typescript-1.0.9.tgz",
|
||||
|
@ -15321,6 +15482,12 @@
|
|||
"integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/lodash.camelcase": {
|
||||
"version": "4.3.0",
|
||||
"resolved": "https://registry.npmjs.org/lodash.camelcase/-/lodash.camelcase-4.3.0.tgz",
|
||||
"integrity": "sha512-TwuEnCnxbc3rAvhf/LbG7tJUDzhqXyFnv3dtzLOPgCG/hODL7WFnsbwktkD7yUV0RrreP/l1PALq/YSg6VvjlA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/lodash.debounce": {
|
||||
"version": "4.0.8",
|
||||
"resolved": "https://registry.npmjs.org/lodash.debounce/-/lodash.debounce-4.0.8.tgz",
|
||||
|
@ -15874,6 +16041,36 @@
|
|||
"node": "^10 || ^12 || >=14"
|
||||
}
|
||||
},
|
||||
"node_modules/nice-grpc": {
|
||||
"version": "2.1.12",
|
||||
"resolved": "https://registry.npmjs.org/nice-grpc/-/nice-grpc-2.1.12.tgz",
|
||||
"integrity": "sha512-J1n4Wg+D3IhRhGQb+iqh2OpiM0GzTve/kf2lnlW4S+xczmIEd0aHUDV1OsJ5a3q8GSTqJf+s4Rgg1M8uJltarw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@grpc/grpc-js": "^1.13.1",
|
||||
"abort-controller-x": "^0.4.0",
|
||||
"nice-grpc-common": "^2.0.2"
|
||||
}
|
||||
},
|
||||
"node_modules/nice-grpc-client-middleware-retry": {
|
||||
"version": "3.1.11",
|
||||
"resolved": "https://registry.npmjs.org/nice-grpc-client-middleware-retry/-/nice-grpc-client-middleware-retry-3.1.11.tgz",
|
||||
"integrity": "sha512-xW/imz/kNG2g0DwTfH2eYEGrg1chSLrXtvGp9fg2qkhTgGFfAS/Pq3+t+9G8KThcC4hK/xlEyKvZWKk++33S6A==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"abort-controller-x": "^0.4.0",
|
||||
"nice-grpc-common": "^2.0.2"
|
||||
}
|
||||
},
|
||||
"node_modules/nice-grpc-common": {
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/nice-grpc-common/-/nice-grpc-common-2.0.2.tgz",
|
||||
"integrity": "sha512-7RNWbls5kAL1QVUOXvBsv1uO0wPQK3lHv+cY1gwkTzirnG1Nop4cBJZubpgziNbaVc/bl9QJcyvsf/NQxa3rjQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"ts-error": "^1.0.6"
|
||||
}
|
||||
},
|
||||
"node_modules/no-case": {
|
||||
"version": "3.0.4",
|
||||
"resolved": "https://registry.npmjs.org/no-case/-/no-case-3.0.4.tgz",
|
||||
|
@ -20800,9 +20997,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/primereact": {
|
||||
"version": "10.9.5",
|
||||
"resolved": "https://registry.npmjs.org/primereact/-/primereact-10.9.5.tgz",
|
||||
"integrity": "sha512-4O6gm0LrKF7Ml8zQmb8mGiWS/ugJ94KBOAS/CAxWFQh9qyNgfNw/qcpCeomPIkjWd98jrM2XDiEbgq+W0395Hw==",
|
||||
"version": "10.9.6",
|
||||
"resolved": "https://registry.npmjs.org/primereact/-/primereact-10.9.6.tgz",
|
||||
"integrity": "sha512-0Jjz/KzfUURSHaPTXJwjL2Dc7CDPnbO17MivyJz7T5smGAMLY5d+IqpQhV61R22G/rDmhMh3+32LCNva2M8fRw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@types/react-transition-group": "^4.4.1",
|
||||
|
@ -23923,6 +24120,12 @@
|
|||
"integrity": "sha512-c3zayb8/kWWpycWYg87P71E1S1ZL6b6IJxfb5fvsUgsf0S2MVGaDhDXXjDMpdCpfWXqptc+4mXwmiy1ypXqRAA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/ts-error": {
|
||||
"version": "1.0.6",
|
||||
"resolved": "https://registry.npmjs.org/ts-error/-/ts-error-1.0.6.tgz",
|
||||
"integrity": "sha512-tLJxacIQUM82IR7JO1UUkKlYuUTmoY9HBJAmNWFzheSlDS5SPMcNIepejHJa4BpPQLAcbRhRf3GDJzyj6rbKvA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/ts-interface-checker": {
|
||||
"version": "0.1.13",
|
||||
"resolved": "https://registry.npmjs.org/ts-interface-checker/-/ts-interface-checker-0.1.13.tgz",
|
||||
|
@ -24440,6 +24643,44 @@
|
|||
"minimalistic-assert": "^1.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/weaviate-client": {
|
||||
"version": "3.5.5",
|
||||
"resolved": "https://registry.npmjs.org/weaviate-client/-/weaviate-client-3.5.5.tgz",
|
||||
"integrity": "sha512-wAjJtJmBQn2KiTPkfUGEzddBIbySpN0y0wAcYPWDCBXVjXqf0UOExujFJ+QeeRp+AjHk15B6BmUaUX9NHVLzsw==",
|
||||
"license": "SEE LICENSE IN LICENSE",
|
||||
"dependencies": {
|
||||
"abort-controller-x": "^0.4.3",
|
||||
"graphql": "^16.10.0",
|
||||
"graphql-request": "^6.1.0",
|
||||
"long": "^5.2.4",
|
||||
"nice-grpc": "^2.1.11",
|
||||
"nice-grpc-client-middleware-retry": "^3.1.10",
|
||||
"nice-grpc-common": "^2.0.2",
|
||||
"uuid": "^9.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/weaviate-client/node_modules/long": {
|
||||
"version": "5.3.2",
|
||||
"resolved": "https://registry.npmjs.org/long/-/long-5.3.2.tgz",
|
||||
"integrity": "sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==",
|
||||
"license": "Apache-2.0"
|
||||
},
|
||||
"node_modules/weaviate-client/node_modules/uuid": {
|
||||
"version": "9.0.1",
|
||||
"resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz",
|
||||
"integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==",
|
||||
"funding": [
|
||||
"https://github.com/sponsors/broofa",
|
||||
"https://github.com/sponsors/ctavan"
|
||||
],
|
||||
"license": "MIT",
|
||||
"bin": {
|
||||
"uuid": "dist/bin/uuid"
|
||||
}
|
||||
},
|
||||
"node_modules/web-streams-polyfill": {
|
||||
"version": "4.0.0-beta.3",
|
||||
"resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz",
|
||||
|
|
|
@ -57,9 +57,9 @@ def chunk_text(text):
|
|||
text_chunks = text_splitter.create_documents([text])
|
||||
for chunk in text_chunks:
|
||||
chunk = chunk.page_content
|
||||
chunk_kwargs = request_kwargs | {"json": {"input": chunk}}
|
||||
count = requests.post(f"{model_service[:-2]}extras/tokenize/count", **chunk_kwargs).content
|
||||
count = json.loads(count)["count"]
|
||||
chunk_kwargs = request_kwargs | {"json": {"content": chunk}}
|
||||
count = requests.post(f"{model_service[:-2]}tokenize", **chunk_kwargs).content
|
||||
count = len(json.loads(count)["tokens"])
|
||||
if count >= 2048:
|
||||
split_append_chunk(chunk, chunks)
|
||||
else:
|
||||
|
|
Loading…
Reference in New Issue