Spaces:
Runtime error
Runtime error
Saiteja Solleti
commited on
Commit
·
e8e78ae
1
Parent(s):
39560b9
milvas schema addition
Browse files- app.py +7 -0
- createmilvusschema.py +47 -0
- crudmilvus.py +2 -1
app.py
CHANGED
|
@@ -2,6 +2,7 @@ import gradio as gr
|
|
| 2 |
import os
|
| 3 |
|
| 4 |
from loaddataset import ExtractRagBenchData
|
|
|
|
| 5 |
from model import generate_response
|
| 6 |
from huggingface_hub import login
|
| 7 |
from huggingface_hub import whoami
|
|
@@ -13,6 +14,12 @@ login(hf_token)
|
|
| 13 |
|
| 14 |
rag_extracted_data = ExtractRagBenchData()
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
print(rag_extracted_data.head(5))
|
| 17 |
|
| 18 |
def chatbot(prompt):
|
|
|
|
| 2 |
import os
|
| 3 |
|
| 4 |
from loaddataset import ExtractRagBenchData
|
| 5 |
+
from createmilvusschema import CreateMilvusDbSchema
|
| 6 |
from model import generate_response
|
| 7 |
from huggingface_hub import login
|
| 8 |
from huggingface_hub import whoami
|
|
|
|
| 14 |
|
| 15 |
rag_extracted_data = ExtractRagBenchData()
|
| 16 |
|
| 17 |
+
#invoke create milvus db function
|
| 18 |
+
try:
|
| 19 |
+
db_collection = CreateMilvusDbSchema()
|
| 20 |
+
except Exception as e:
|
| 21 |
+
print(f"Error creating Milvus DB schema: {e}")
|
| 22 |
+
|
| 23 |
print(rag_extracted_data.head(5))
|
| 24 |
|
| 25 |
def chatbot(prompt):
|
createmilvusschema.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection
|
| 3 |
+
|
| 4 |
+
milvus_token = os.getenv("MILVUS_TOKEN")
|
| 5 |
+
|
| 6 |
+
COLLECTION_NAME = "final_ragbench_document_embeddings"
|
| 7 |
+
MILVUS_CLOUD_URI = "https://in03-7b4da1b7b588a88.serverless.gcp-us-west1.cloud.zilliz.com"
|
| 8 |
+
connections.connect("default", uri=MILVUS_CLOUD_URI, token=milvus_token)
|
| 9 |
+
|
| 10 |
+
#Function to create milvus db schema to insert the data
|
| 11 |
+
def CreateMilvusDbSchema():
|
| 12 |
+
|
| 13 |
+
connections.connect("default", uri=MILVUS_CLOUD_URI, token=milvus_token)
|
| 14 |
+
print(connections.get_connection_addr("default"))
|
| 15 |
+
|
| 16 |
+
# Define the fields for the collection
|
| 17 |
+
fields = [
|
| 18 |
+
FieldSchema(name="chunk_doc_id", dtype=DataType.VARCHAR, max_length=350, is_primary=True, auto_id=False), # Primary Key
|
| 19 |
+
FieldSchema(name="doc_id", dtype=DataType.VARCHAR, max_length=300), # Document ID
|
| 20 |
+
FieldSchema(name="chunk_embedding", dtype=DataType.FLOAT_VECTOR, dim=384), # Vector Field (embedding)
|
| 21 |
+
FieldSchema(name="context_relevance", dtype=DataType.FLOAT), # Context Relevance Score
|
| 22 |
+
FieldSchema(name="context_utilization", dtype=DataType.FLOAT), # Context Utilization Score
|
| 23 |
+
FieldSchema(name="adherence", dtype=DataType.FLOAT), # Adherence Score
|
| 24 |
+
FieldSchema(name="dataset_name", dtype=DataType.VARCHAR, max_length=300), # Dataset Name
|
| 25 |
+
FieldSchema(name="relevance_score", dtype=DataType.FLOAT), # Relevance Score
|
| 26 |
+
FieldSchema(name="utilization_score", dtype=DataType.FLOAT), # Utilization Score
|
| 27 |
+
FieldSchema(name="completeness_score", dtype=DataType.FLOAT) # Completeness Score
|
| 28 |
+
]
|
| 29 |
+
|
| 30 |
+
# Define the collection schema
|
| 31 |
+
schema = CollectionSchema(fields, description="RAG Bench document vector collection")
|
| 32 |
+
|
| 33 |
+
# Create the collection in Milvus
|
| 34 |
+
collection = Collection(name=COLLECTION_NAME, schema=schema)
|
| 35 |
+
|
| 36 |
+
# Create an optimized index for fast vector search
|
| 37 |
+
collection.create_index(
|
| 38 |
+
"chunk_embedding",
|
| 39 |
+
{
|
| 40 |
+
"index_type": "HNSW", # Hierarchical Navigable Small World (HNSW) index
|
| 41 |
+
"metric_type": "COSINE", # Cosine similarity for vector search
|
| 42 |
+
"params": {"M": 16, "efConstruction": 200} # HNSW parameters
|
| 43 |
+
}
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
print(f"Collection '{COLLECTION_NAME}' created successfully.")
|
| 47 |
+
return collection
|
crudmilvus.py
CHANGED
|
@@ -9,4 +9,5 @@ MILVUS_CLOUD_URI = "https://in03-7b4da1b7b588a88.serverless.gcp-us-west1.cloud.z
|
|
| 9 |
connections.connect("default", uri=MILVUS_CLOUD_URI, token=milvus_token)
|
| 10 |
|
| 11 |
# Verify connection
|
| 12 |
-
print(connections.get_connection_addr("default"))
|
|
|
|
|
|
| 9 |
connections.connect("default", uri=MILVUS_CLOUD_URI, token=milvus_token)
|
| 10 |
|
| 11 |
# Verify connection
|
| 12 |
+
print(connections.get_connection_addr("default"))
|
| 13 |
+
|