modelx / src /storage /config.py
nivakaran's picture
Upload folder using huggingface_hub
16ec2cf verified
"""
src/storage/config.py
Centralized storage configuration with environment variable support
"""
import os
from pathlib import Path
# Base paths
PROJECT_ROOT = Path(__file__).parent.parent.parent
DATA_DIR = PROJECT_ROOT / "data"
CACHE_DIR = DATA_DIR / "cache"
CHROMADB_DIR = DATA_DIR / "chromadb"
NEO4J_DATA_DIR = DATA_DIR / "neo4j"
FEEDS_CSV_DIR = DATA_DIR / "feeds"
# Ensure directories exist
for dir_path in [DATA_DIR, CACHE_DIR, CHROMADB_DIR, NEO4J_DATA_DIR, FEEDS_CSV_DIR]:
dir_path.mkdir(parents=True, exist_ok=True)
class StorageConfig:
"""Configuration for all storage backends"""
# SQLite Configuration
SQLITE_DB_PATH: str = os.getenv("SQLITE_DB_PATH", str(CACHE_DIR / "feeds.db"))
SQLITE_RETENTION_HOURS: int = int(os.getenv("SQLITE_RETENTION_HOURS", "24"))
# ChromaDB Configuration
CHROMADB_PATH: str = os.getenv("CHROMADB_PATH", str(CHROMADB_DIR))
CHROMADB_COLLECTION: str = os.getenv("CHROMADB_COLLECTION", "Roger_feeds")
CHROMADB_SIMILARITY_THRESHOLD: float = float(
os.getenv("CHROMADB_SIMILARITY_THRESHOLD", "0.85")
)
CHROMADB_EMBEDDING_MODEL: str = os.getenv(
"CHROMADB_EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2"
)
# Neo4j Configuration (supports both NEO4J_USER and NEO4J_USERNAME)
NEO4J_URI: str = os.getenv("NEO4J_URI", "bolt://localhost:7687")
NEO4J_USER: str = os.getenv("NEO4J_USERNAME", os.getenv("NEO4J_USER", "neo4j"))
NEO4J_PASSWORD: str = os.getenv("NEO4J_PASSWORD", "")
NEO4J_DATABASE: str = os.getenv("NEO4J_DATABASE", "neo4j")
# Auto-enable if URI contains 'neo4j.io' (Aura) or explicitly set
NEO4J_ENABLED: bool = os.getenv(
"NEO4J_ENABLED", ""
).lower() == "true" or "neo4j.io" in os.getenv("NEO4J_URI", "")
# CSV Export Configuration
CSV_EXPORT_DIR: str = os.getenv("CSV_EXPORT_DIR", str(FEEDS_CSV_DIR))
# Deduplication Settings
EXACT_MATCH_CHARS: int = int(os.getenv("EXACT_MATCH_CHARS", "120"))
@classmethod
def get_config_summary(cls) -> dict:
"""Get configuration summary for logging"""
return {
"sqlite_path": cls.SQLITE_DB_PATH,
"chromadb_path": cls.CHROMADB_PATH,
"chromadb_collection": cls.CHROMADB_COLLECTION,
"similarity_threshold": cls.CHROMADB_SIMILARITY_THRESHOLD,
"neo4j_enabled": cls.NEO4J_ENABLED,
"neo4j_uri": cls.NEO4J_URI if cls.NEO4J_ENABLED else "disabled",
}
config = StorageConfig()