File size: 2,494 Bytes
b4856f1
 
 
 
752f5cc
 
b4856f1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
752f5cc
b4856f1
752f5cc
b4856f1
752f5cc
b4856f1
752f5cc
b4856f1
752f5cc
 
 
b4856f1
752f5cc
b4856f1
752f5cc
aa62cae
b4856f1
aa62cae
 
 
 
752f5cc
 
 
 
b4856f1
752f5cc
 
b4856f1
 
752f5cc
b4856f1
 
 
 
 
 
 
 
 
752f5cc
b4856f1
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
"""
src/storage/config.py
Centralized storage configuration with environment variable support
"""

import os
from pathlib import Path

# Base paths
PROJECT_ROOT = Path(__file__).parent.parent.parent
DATA_DIR = PROJECT_ROOT / "data"
CACHE_DIR = DATA_DIR / "cache"
CHROMADB_DIR = DATA_DIR / "chromadb"
NEO4J_DATA_DIR = DATA_DIR / "neo4j"
FEEDS_CSV_DIR = DATA_DIR / "feeds"

# Ensure directories exist
for dir_path in [DATA_DIR, CACHE_DIR, CHROMADB_DIR, NEO4J_DATA_DIR, FEEDS_CSV_DIR]:
    dir_path.mkdir(parents=True, exist_ok=True)


class StorageConfig:
    """Configuration for all storage backends"""

    # SQLite Configuration
    SQLITE_DB_PATH: str = os.getenv("SQLITE_DB_PATH", str(CACHE_DIR / "feeds.db"))
    SQLITE_RETENTION_HOURS: int = int(os.getenv("SQLITE_RETENTION_HOURS", "24"))

    # ChromaDB Configuration
    CHROMADB_PATH: str = os.getenv("CHROMADB_PATH", str(CHROMADB_DIR))
    CHROMADB_COLLECTION: str = os.getenv("CHROMADB_COLLECTION", "Roger_feeds")
    CHROMADB_SIMILARITY_THRESHOLD: float = float(
        os.getenv("CHROMADB_SIMILARITY_THRESHOLD", "0.85")
    )
    CHROMADB_EMBEDDING_MODEL: str = os.getenv(
        "CHROMADB_EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2"
    )

    # Neo4j Configuration (supports both NEO4J_USER and NEO4J_USERNAME)
    NEO4J_URI: str = os.getenv("NEO4J_URI", "bolt://localhost:7687")
    NEO4J_USER: str = os.getenv("NEO4J_USERNAME", os.getenv("NEO4J_USER", "neo4j"))
    NEO4J_PASSWORD: str = os.getenv("NEO4J_PASSWORD", "")
    NEO4J_DATABASE: str = os.getenv("NEO4J_DATABASE", "neo4j")
    # Auto-enable if URI contains 'neo4j.io' (Aura) or explicitly set
    NEO4J_ENABLED: bool = os.getenv(
        "NEO4J_ENABLED", ""
    ).lower() == "true" or "neo4j.io" in os.getenv("NEO4J_URI", "")

    # CSV Export Configuration
    CSV_EXPORT_DIR: str = os.getenv("CSV_EXPORT_DIR", str(FEEDS_CSV_DIR))

    # Deduplication Settings
    EXACT_MATCH_CHARS: int = int(os.getenv("EXACT_MATCH_CHARS", "120"))

    @classmethod
    def get_config_summary(cls) -> dict:
        """Get configuration summary for logging"""
        return {
            "sqlite_path": cls.SQLITE_DB_PATH,
            "chromadb_path": cls.CHROMADB_PATH,
            "chromadb_collection": cls.CHROMADB_COLLECTION,
            "similarity_threshold": cls.CHROMADB_SIMILARITY_THRESHOLD,
            "neo4j_enabled": cls.NEO4J_ENABLED,
            "neo4j_uri": cls.NEO4J_URI if cls.NEO4J_ENABLED else "disabled",
        }


config = StorageConfig()