680 lines
27 KiB
Python
680 lines
27 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
RAG Pipeline Designer - Designs complete RAG pipelines based on requirements.
|
|
|
|
This script analyzes requirements and generates a comprehensive RAG pipeline design
|
|
including architecture diagrams, component recommendations, configuration templates,
|
|
and cost projections.
|
|
|
|
Components designed:
|
|
- Chunking strategy recommendation
|
|
- Embedding model selection
|
|
- Vector database recommendation
|
|
- Retrieval approach (dense/sparse/hybrid)
|
|
- Reranking configuration
|
|
- Evaluation framework setup
|
|
- Production deployment patterns
|
|
|
|
No external dependencies - uses only Python standard library.
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import math
|
|
import os
|
|
from typing import Dict, List, Tuple, Any, Optional
|
|
from dataclasses import dataclass, asdict
|
|
from enum import Enum
|
|
|
|
|
|
class Scale(Enum):
|
|
"""System scale categories."""
|
|
SMALL = "small" # < 1M documents, < 1K queries/day
|
|
MEDIUM = "medium" # 1M-100M documents, 1K-100K queries/day
|
|
LARGE = "large" # 100M+ documents, 100K+ queries/day
|
|
|
|
|
|
class DocumentType(Enum):
|
|
"""Document type categories."""
|
|
TEXT = "text" # Plain text, articles
|
|
TECHNICAL = "technical" # Documentation, manuals
|
|
CODE = "code" # Source code files
|
|
SCIENTIFIC = "scientific" # Research papers, journals
|
|
LEGAL = "legal" # Legal documents, contracts
|
|
MIXED = "mixed" # Multiple document types
|
|
|
|
|
|
class Latency(Enum):
|
|
"""Latency requirements."""
|
|
REAL_TIME = "real_time" # < 100ms
|
|
INTERACTIVE = "interactive" # < 500ms
|
|
BATCH = "batch" # > 1s acceptable
|
|
|
|
|
|
@dataclass
|
|
class Requirements:
|
|
"""RAG system requirements."""
|
|
document_types: List[str]
|
|
document_count: int
|
|
avg_document_size: int # characters
|
|
queries_per_day: int
|
|
query_patterns: List[str] # e.g., ["factual", "conversational", "analytical"]
|
|
latency_requirement: str
|
|
budget_monthly: float # USD
|
|
accuracy_priority: float # 0-1 scale
|
|
cost_priority: float # 0-1 scale
|
|
maintenance_complexity: str # "low", "medium", "high"
|
|
|
|
|
|
@dataclass
|
|
class ComponentRecommendation:
|
|
"""Recommendation for a pipeline component."""
|
|
name: str
|
|
type: str
|
|
config: Dict[str, Any]
|
|
rationale: str
|
|
pros: List[str]
|
|
cons: List[str]
|
|
cost_monthly: float
|
|
|
|
|
|
@dataclass
|
|
class PipelineDesign:
|
|
"""Complete RAG pipeline design."""
|
|
chunking: ComponentRecommendation
|
|
embedding: ComponentRecommendation
|
|
vector_db: ComponentRecommendation
|
|
retrieval: ComponentRecommendation
|
|
reranking: Optional[ComponentRecommendation]
|
|
evaluation: ComponentRecommendation
|
|
total_cost: float
|
|
architecture_diagram: str
|
|
config_templates: Dict[str, Any]
|
|
|
|
|
|
class RAGPipelineDesigner:
|
|
"""Main pipeline designer class."""
|
|
|
|
def __init__(self):
|
|
self.embedding_models = self._load_embedding_models()
|
|
self.vector_databases = self._load_vector_databases()
|
|
self.chunking_strategies = self._load_chunking_strategies()
|
|
|
|
def design_pipeline(self, requirements: Requirements) -> PipelineDesign:
|
|
"""Design complete RAG pipeline based on requirements."""
|
|
print(f"Designing RAG pipeline for {requirements.document_count:,} documents...")
|
|
|
|
# Determine system scale
|
|
scale = self._determine_scale(requirements)
|
|
print(f"System scale: {scale.value}")
|
|
|
|
# Design each component
|
|
chunking = self._recommend_chunking(requirements, scale)
|
|
embedding = self._recommend_embedding(requirements, scale)
|
|
vector_db = self._recommend_vector_db(requirements, scale)
|
|
retrieval = self._recommend_retrieval(requirements, scale)
|
|
reranking = self._recommend_reranking(requirements, scale)
|
|
evaluation = self._recommend_evaluation(requirements, scale)
|
|
|
|
# Calculate total cost
|
|
total_cost = (chunking.cost_monthly + embedding.cost_monthly +
|
|
vector_db.cost_monthly + retrieval.cost_monthly +
|
|
evaluation.cost_monthly)
|
|
if reranking:
|
|
total_cost += reranking.cost_monthly
|
|
|
|
# Generate architecture diagram
|
|
architecture = self._generate_architecture_diagram(
|
|
chunking, embedding, vector_db, retrieval, reranking, evaluation
|
|
)
|
|
|
|
# Generate configuration templates
|
|
configs = self._generate_config_templates(
|
|
chunking, embedding, vector_db, retrieval, reranking, evaluation
|
|
)
|
|
|
|
return PipelineDesign(
|
|
chunking=chunking,
|
|
embedding=embedding,
|
|
vector_db=vector_db,
|
|
retrieval=retrieval,
|
|
reranking=reranking,
|
|
evaluation=evaluation,
|
|
total_cost=total_cost,
|
|
architecture_diagram=architecture,
|
|
config_templates=configs
|
|
)
|
|
|
|
def _determine_scale(self, req: Requirements) -> Scale:
|
|
"""Determine system scale based on requirements."""
|
|
if req.document_count < 1_000_000 and req.queries_per_day < 1_000:
|
|
return Scale.SMALL
|
|
elif req.document_count < 100_000_000 and req.queries_per_day < 100_000:
|
|
return Scale.MEDIUM
|
|
else:
|
|
return Scale.LARGE
|
|
|
|
def _recommend_chunking(self, req: Requirements, scale: Scale) -> ComponentRecommendation:
|
|
"""Recommend chunking strategy."""
|
|
doc_types = set(req.document_types)
|
|
|
|
if "code" in doc_types:
|
|
strategy = "semantic_code_aware"
|
|
config = {"max_size": 1000, "preserve_functions": True, "overlap": 50}
|
|
rationale = "Code documents benefit from function/class boundary awareness"
|
|
elif "technical" in doc_types or "scientific" in doc_types:
|
|
strategy = "semantic_heading_aware"
|
|
config = {"max_size": 1500, "heading_weight": 2.0, "overlap": 100}
|
|
rationale = "Technical documents have clear hierarchical structure"
|
|
elif len(doc_types) > 2 or "mixed" in doc_types:
|
|
strategy = "adaptive_chunking"
|
|
config = {"strategies": ["paragraph", "sentence", "fixed"], "auto_select": True}
|
|
rationale = "Mixed document types require adaptive strategy selection"
|
|
else:
|
|
if req.avg_document_size > 5000:
|
|
strategy = "paragraph_based"
|
|
config = {"max_size": 2000, "min_paragraph_size": 100}
|
|
rationale = "Large documents benefit from paragraph-based chunking"
|
|
else:
|
|
strategy = "sentence_based"
|
|
config = {"max_size": 1000, "sentence_overlap": 1}
|
|
rationale = "Small to medium documents work well with sentence chunking"
|
|
|
|
return ComponentRecommendation(
|
|
name=strategy,
|
|
type="chunking",
|
|
config=config,
|
|
rationale=rationale,
|
|
pros=self._get_chunking_pros(strategy),
|
|
cons=self._get_chunking_cons(strategy),
|
|
cost_monthly=0.0 # Processing cost only
|
|
)
|
|
|
|
def _recommend_embedding(self, req: Requirements, scale: Scale) -> ComponentRecommendation:
|
|
"""Recommend embedding model."""
|
|
doc_types = set(req.document_types)
|
|
|
|
# Consider accuracy vs cost priority
|
|
high_accuracy = req.accuracy_priority > 0.7
|
|
cost_sensitive = req.cost_priority > 0.6
|
|
|
|
if "code" in doc_types:
|
|
if high_accuracy and not cost_sensitive:
|
|
model = "openai-code-search-ada-002"
|
|
cost_per_1k_tokens = 0.0001
|
|
dimensions = 1536
|
|
else:
|
|
model = "sentence-transformers/code-bert-base"
|
|
cost_per_1k_tokens = 0.0 # Self-hosted
|
|
dimensions = 768
|
|
elif "scientific" in doc_types:
|
|
if high_accuracy:
|
|
model = "openai-text-embedding-ada-002"
|
|
cost_per_1k_tokens = 0.0001
|
|
dimensions = 1536
|
|
else:
|
|
model = "sentence-transformers/scibert-nli"
|
|
cost_per_1k_tokens = 0.0
|
|
dimensions = 768
|
|
else:
|
|
if cost_sensitive or scale == Scale.SMALL:
|
|
model = "sentence-transformers/all-MiniLM-L6-v2"
|
|
cost_per_1k_tokens = 0.0
|
|
dimensions = 384
|
|
elif high_accuracy:
|
|
model = "openai-text-embedding-ada-002"
|
|
cost_per_1k_tokens = 0.0001
|
|
dimensions = 1536
|
|
else:
|
|
model = "sentence-transformers/all-mpnet-base-v2"
|
|
cost_per_1k_tokens = 0.0
|
|
dimensions = 768
|
|
|
|
# Calculate monthly embedding cost
|
|
total_tokens = req.document_count * (req.avg_document_size / 4) # ~4 chars per token
|
|
query_tokens = req.queries_per_day * 30 * 20 # ~20 tokens per query per month
|
|
monthly_cost = (total_tokens + query_tokens) * cost_per_1k_tokens / 1000
|
|
|
|
return ComponentRecommendation(
|
|
name=model,
|
|
type="embedding",
|
|
config={
|
|
"model": model,
|
|
"dimensions": dimensions,
|
|
"batch_size": 100 if scale == Scale.SMALL else 1000,
|
|
"cache_embeddings": True
|
|
},
|
|
rationale=f"Selected for {doc_types} with accuracy priority {req.accuracy_priority}",
|
|
pros=self._get_embedding_pros(model),
|
|
cons=self._get_embedding_cons(model),
|
|
cost_monthly=monthly_cost
|
|
)
|
|
|
|
def _recommend_vector_db(self, req: Requirements, scale: Scale) -> ComponentRecommendation:
|
|
"""Recommend vector database."""
|
|
if scale == Scale.SMALL and req.cost_priority > 0.7:
|
|
db = "chroma"
|
|
cost = 0.0
|
|
rationale = "Local/embedded database suitable for small scale and cost optimization"
|
|
elif scale == Scale.SMALL and req.maintenance_complexity == "low":
|
|
db = "pgvector"
|
|
cost = 50.0 # PostgreSQL hosting
|
|
rationale = "Leverage existing PostgreSQL infrastructure"
|
|
elif scale == Scale.LARGE or req.latency_requirement == "real_time":
|
|
db = "pinecone"
|
|
vectors = req.document_count * 2 # Account for chunking
|
|
cost = max(70, vectors * 0.00005) # $70 base + $0.00005 per vector
|
|
rationale = "Managed service with excellent performance for large scale"
|
|
elif req.maintenance_complexity == "low":
|
|
db = "weaviate_cloud"
|
|
vectors = req.document_count * 2
|
|
cost = max(25, vectors * 0.00003)
|
|
rationale = "Managed Weaviate with good balance of features and cost"
|
|
else:
|
|
db = "qdrant"
|
|
cost = 100.0 # Self-hosted infrastructure estimate
|
|
rationale = "High performance self-hosted option with good scaling"
|
|
|
|
return ComponentRecommendation(
|
|
name=db,
|
|
type="vector_database",
|
|
config=self._get_vector_db_config(db, req, scale),
|
|
rationale=rationale,
|
|
pros=self._get_vector_db_pros(db),
|
|
cons=self._get_vector_db_cons(db),
|
|
cost_monthly=cost
|
|
)
|
|
|
|
def _recommend_retrieval(self, req: Requirements, scale: Scale) -> ComponentRecommendation:
|
|
"""Recommend retrieval strategy."""
|
|
if req.accuracy_priority > 0.8:
|
|
strategy = "hybrid"
|
|
rationale = "Hybrid retrieval for maximum accuracy combining dense and sparse methods"
|
|
elif "technical" in req.document_types or "code" in req.document_types:
|
|
strategy = "hybrid"
|
|
rationale = "Technical content benefits from both semantic and keyword matching"
|
|
elif req.latency_requirement == "real_time":
|
|
strategy = "dense"
|
|
rationale = "Dense retrieval faster for real-time requirements"
|
|
else:
|
|
strategy = "dense"
|
|
rationale = "Dense retrieval suitable for general text search"
|
|
|
|
return ComponentRecommendation(
|
|
name=strategy,
|
|
type="retrieval",
|
|
config={
|
|
"strategy": strategy,
|
|
"dense_weight": 0.7 if strategy == "hybrid" else 1.0,
|
|
"sparse_weight": 0.3 if strategy == "hybrid" else 0.0,
|
|
"top_k": 20 if req.accuracy_priority > 0.7 else 10,
|
|
"similarity_threshold": 0.7
|
|
},
|
|
rationale=rationale,
|
|
pros=self._get_retrieval_pros(strategy),
|
|
cons=self._get_retrieval_cons(strategy),
|
|
cost_monthly=0.0
|
|
)
|
|
|
|
def _recommend_reranking(self, req: Requirements, scale: Scale) -> Optional[ComponentRecommendation]:
|
|
"""Recommend reranking if beneficial."""
|
|
if req.accuracy_priority < 0.6 or req.latency_requirement == "real_time":
|
|
return None
|
|
|
|
if req.cost_priority > 0.8:
|
|
return None
|
|
|
|
# Estimate reranking queries per month
|
|
monthly_queries = req.queries_per_day * 30
|
|
cost_per_query = 0.002 # Estimated cost for cross-encoder reranking
|
|
monthly_cost = monthly_queries * cost_per_query
|
|
|
|
if monthly_cost > req.budget_monthly * 0.3: # Don't exceed 30% of budget
|
|
return None
|
|
|
|
return ComponentRecommendation(
|
|
name="cross_encoder_reranking",
|
|
type="reranking",
|
|
config={
|
|
"model": "cross-encoder/ms-marco-MiniLM-L-12-v2",
|
|
"rerank_top_k": 20,
|
|
"return_top_k": 5,
|
|
"batch_size": 16
|
|
},
|
|
rationale="Reranking improves precision for high-accuracy requirements",
|
|
pros=["Higher precision", "Better ranking quality", "Handles complex queries"],
|
|
cons=["Additional latency", "Higher cost", "More complexity"],
|
|
cost_monthly=monthly_cost
|
|
)
|
|
|
|
def _recommend_evaluation(self, req: Requirements, scale: Scale) -> ComponentRecommendation:
|
|
"""Recommend evaluation framework."""
|
|
return ComponentRecommendation(
|
|
name="comprehensive_evaluation",
|
|
type="evaluation",
|
|
config={
|
|
"metrics": ["precision@k", "recall@k", "mrr", "ndcg"],
|
|
"k_values": [1, 3, 5, 10],
|
|
"faithfulness_check": True,
|
|
"relevance_scoring": True,
|
|
"evaluation_frequency": "weekly" if scale == Scale.LARGE else "monthly",
|
|
"sample_size": min(1000, req.queries_per_day * 7)
|
|
},
|
|
rationale="Comprehensive evaluation essential for production RAG systems",
|
|
pros=["Quality monitoring", "Performance tracking", "Issue detection"],
|
|
cons=["Additional overhead", "Requires ground truth data"],
|
|
cost_monthly=20.0 # Evaluation tooling and compute
|
|
)
|
|
|
|
def _generate_architecture_diagram(self, chunking: ComponentRecommendation,
|
|
embedding: ComponentRecommendation,
|
|
vector_db: ComponentRecommendation,
|
|
retrieval: ComponentRecommendation,
|
|
reranking: Optional[ComponentRecommendation],
|
|
evaluation: ComponentRecommendation) -> str:
|
|
"""Generate Mermaid architecture diagram."""
|
|
|
|
diagram = """```mermaid
|
|
graph TB
|
|
%% Document Processing Pipeline
|
|
A[Document Corpus] --> B[Document Chunking]
|
|
B --> C[Embedding Generation]
|
|
C --> D[Vector Database Storage]
|
|
|
|
%% Query Processing Pipeline
|
|
E[User Query] --> F[Query Processing]
|
|
F --> G[Vector Search]
|
|
D --> G
|
|
G --> H[Retrieved Chunks]
|
|
"""
|
|
|
|
if reranking:
|
|
diagram += " H --> I[Reranking]\n I --> J[Final Results]\n"
|
|
else:
|
|
diagram += " H --> J[Final Results]\n"
|
|
|
|
diagram += """
|
|
%% Evaluation Pipeline
|
|
J --> K[Response Generation]
|
|
K --> L[Evaluation Metrics]
|
|
|
|
%% Component Details
|
|
B -.-> B1[Strategy: """ + chunking.name + """]
|
|
C -.-> C1[Model: """ + embedding.name + """]
|
|
D -.-> D1[Database: """ + vector_db.name + """]
|
|
G -.-> G1[Method: """ + retrieval.name + """]
|
|
"""
|
|
|
|
if reranking:
|
|
diagram += " I -.-> I1[Model: " + reranking.name + "]\n"
|
|
|
|
diagram += " L -.-> L1[Framework: " + evaluation.name + "]\n```"
|
|
|
|
return diagram
|
|
|
|
def _generate_config_templates(self, *components) -> Dict[str, Any]:
|
|
"""Generate configuration templates for all components."""
|
|
configs = {}
|
|
|
|
for component in components:
|
|
if component:
|
|
configs[component.type] = {
|
|
"component": component.name,
|
|
"config": component.config,
|
|
"rationale": component.rationale
|
|
}
|
|
|
|
# Add deployment configuration
|
|
configs["deployment"] = {
|
|
"infrastructure": "cloud" if any("pinecone" in str(c.name) for c in components if c) else "hybrid",
|
|
"scaling": {
|
|
"auto_scaling": True,
|
|
"min_replicas": 1,
|
|
"max_replicas": 10
|
|
},
|
|
"monitoring": {
|
|
"metrics": ["latency", "throughput", "accuracy"],
|
|
"alerts": ["high_latency", "low_accuracy", "service_down"]
|
|
}
|
|
}
|
|
|
|
return configs
|
|
|
|
def _load_embedding_models(self) -> Dict[str, Dict[str, Any]]:
|
|
"""Load embedding model specifications."""
|
|
return {
|
|
"openai-text-embedding-ada-002": {
|
|
"dimensions": 1536,
|
|
"cost_per_1k_tokens": 0.0001,
|
|
"quality": "high",
|
|
"speed": "medium"
|
|
},
|
|
"sentence-transformers/all-mpnet-base-v2": {
|
|
"dimensions": 768,
|
|
"cost_per_1k_tokens": 0.0,
|
|
"quality": "high",
|
|
"speed": "medium"
|
|
},
|
|
"sentence-transformers/all-MiniLM-L6-v2": {
|
|
"dimensions": 384,
|
|
"cost_per_1k_tokens": 0.0,
|
|
"quality": "medium",
|
|
"speed": "fast"
|
|
}
|
|
}
|
|
|
|
def _load_vector_databases(self) -> Dict[str, Dict[str, Any]]:
|
|
"""Load vector database specifications."""
|
|
return {
|
|
"pinecone": {"managed": True, "scaling": "excellent", "cost": "high"},
|
|
"weaviate": {"managed": False, "scaling": "good", "cost": "medium"},
|
|
"qdrant": {"managed": False, "scaling": "excellent", "cost": "low"},
|
|
"chroma": {"managed": False, "scaling": "poor", "cost": "free"},
|
|
"pgvector": {"managed": False, "scaling": "good", "cost": "medium"}
|
|
}
|
|
|
|
def _load_chunking_strategies(self) -> Dict[str, Dict[str, Any]]:
|
|
"""Load chunking strategy specifications."""
|
|
return {
|
|
"fixed_size": {"complexity": "low", "quality": "medium"},
|
|
"sentence_based": {"complexity": "medium", "quality": "good"},
|
|
"paragraph_based": {"complexity": "medium", "quality": "good"},
|
|
"semantic_heading_aware": {"complexity": "high", "quality": "excellent"}
|
|
}
|
|
|
|
def _get_vector_db_config(self, db: str, req: Requirements, scale: Scale) -> Dict[str, Any]:
|
|
"""Get vector database configuration."""
|
|
base_config = {
|
|
"collection_name": "rag_documents",
|
|
"distance_metric": "cosine",
|
|
"index_type": "hnsw"
|
|
}
|
|
|
|
if db == "pinecone":
|
|
base_config.update({
|
|
"environment": "us-east1-gcp",
|
|
"replicas": 1 if scale == Scale.SMALL else 2,
|
|
"shards": 1 if scale != Scale.LARGE else 3
|
|
})
|
|
elif db == "qdrant":
|
|
base_config.update({
|
|
"memory_mapping": True,
|
|
"quantization": scale == Scale.LARGE,
|
|
"replication_factor": 1 if scale == Scale.SMALL else 2
|
|
})
|
|
|
|
return base_config
|
|
|
|
def _get_chunking_pros(self, strategy: str) -> List[str]:
|
|
"""Get pros for chunking strategy."""
|
|
pros_map = {
|
|
"semantic_heading_aware": ["Preserves document structure", "High semantic coherence", "Good for technical docs"],
|
|
"paragraph_based": ["Respects natural boundaries", "Good balance", "Readable chunks"],
|
|
"sentence_based": ["Natural language boundaries", "Consistent quality", "Good for general text"],
|
|
"fixed_size": ["Predictable sizes", "Simple implementation", "Consistent processing"],
|
|
"adaptive_chunking": ["Handles mixed content", "Optimizes per document", "Best quality"]
|
|
}
|
|
return pros_map.get(strategy, ["Good general purpose strategy"])
|
|
|
|
def _get_chunking_cons(self, strategy: str) -> List[str]:
|
|
"""Get cons for chunking strategy."""
|
|
cons_map = {
|
|
"semantic_heading_aware": ["Complex implementation", "May create large chunks", "Document-dependent"],
|
|
"paragraph_based": ["Variable sizes", "May break context", "Document-dependent"],
|
|
"sentence_based": ["May create small chunks", "Sentence detection issues", "Variable sizes"],
|
|
"fixed_size": ["Breaks semantic boundaries", "May split sentences", "Context loss"],
|
|
"adaptive_chunking": ["High complexity", "Slower processing", "Harder to debug"]
|
|
}
|
|
return cons_map.get(strategy, ["May not fit all use cases"])
|
|
|
|
def _get_embedding_pros(self, model: str) -> List[str]:
|
|
"""Get pros for embedding model."""
|
|
if "openai" in model:
|
|
return ["High quality", "Regular updates", "Good performance"]
|
|
elif "all-mpnet" in model:
|
|
return ["High quality", "Free to use", "Good balance"]
|
|
elif "MiniLM" in model:
|
|
return ["Fast processing", "Small size", "Good for real-time"]
|
|
else:
|
|
return ["Specialized for domain", "Good performance"]
|
|
|
|
def _get_embedding_cons(self, model: str) -> List[str]:
|
|
"""Get cons for embedding model."""
|
|
if "openai" in model:
|
|
return ["API costs", "Vendor lock-in", "Rate limits"]
|
|
elif "sentence-transformers" in model:
|
|
return ["Self-hosting required", "Model updates needed", "GPU beneficial"]
|
|
else:
|
|
return ["May require fine-tuning", "Domain-specific"]
|
|
|
|
def _get_vector_db_pros(self, db: str) -> List[str]:
|
|
"""Get pros for vector database."""
|
|
pros_map = {
|
|
"pinecone": ["Fully managed", "Excellent performance", "Auto-scaling"],
|
|
"weaviate": ["Rich features", "GraphQL API", "Multi-modal"],
|
|
"qdrant": ["High performance", "Rust-based", "Good scaling"],
|
|
"chroma": ["Simple setup", "Free", "Good for development"],
|
|
"pgvector": ["SQL integration", "ACID compliance", "Familiar"]
|
|
}
|
|
return pros_map.get(db, ["Good performance"])
|
|
|
|
def _get_vector_db_cons(self, db: str) -> List[str]:
|
|
"""Get cons for vector database."""
|
|
cons_map = {
|
|
"pinecone": ["Expensive", "Vendor lock-in", "Limited customization"],
|
|
"weaviate": ["Complex setup", "Learning curve", "Resource intensive"],
|
|
"qdrant": ["Self-managed", "Smaller community", "Setup complexity"],
|
|
"chroma": ["Limited scaling", "Not production-ready", "Basic features"],
|
|
"pgvector": ["PostgreSQL knowledge needed", "Less specialized", "Manual optimization"]
|
|
}
|
|
return cons_map.get(db, ["Requires maintenance"])
|
|
|
|
def _get_retrieval_pros(self, strategy: str) -> List[str]:
|
|
"""Get pros for retrieval strategy."""
|
|
pros_map = {
|
|
"dense": ["Semantic understanding", "Good for paraphrases", "Fast"],
|
|
"sparse": ["Exact matching", "Interpretable", "Good for keywords"],
|
|
"hybrid": ["Best of both", "High accuracy", "Robust"]
|
|
}
|
|
return pros_map.get(strategy, ["Good performance"])
|
|
|
|
def _get_retrieval_cons(self, strategy: str) -> List[str]:
|
|
"""Get cons for retrieval strategy."""
|
|
cons_map = {
|
|
"dense": ["May miss exact matches", "Embedding dependent", "Less interpretable"],
|
|
"sparse": ["Vocabulary mismatch", "No semantic understanding", "Synonym issues"],
|
|
"hybrid": ["More complex", "Tuning required", "Higher latency"]
|
|
}
|
|
return cons_map.get(strategy, ["May require tuning"])
|
|
|
|
|
|
def load_requirements(file_path: str) -> Requirements:
|
|
"""Load requirements from JSON file."""
|
|
with open(file_path, 'r') as f:
|
|
data = json.load(f)
|
|
|
|
return Requirements(**data)
|
|
|
|
|
|
def save_design(design: PipelineDesign, output_path: str):
|
|
"""Save pipeline design to JSON file."""
|
|
# Convert to dict for JSON serialization
|
|
design_dict = {}
|
|
|
|
for field_name in design.__dataclass_fields__:
|
|
value = getattr(design, field_name)
|
|
if isinstance(value, ComponentRecommendation):
|
|
design_dict[field_name] = asdict(value)
|
|
elif value is None:
|
|
design_dict[field_name] = None
|
|
else:
|
|
design_dict[field_name] = value
|
|
|
|
with open(output_path, 'w') as f:
|
|
json.dump(design_dict, f, indent=2)
|
|
|
|
|
|
def print_design_summary(design: PipelineDesign):
|
|
"""Print human-readable design summary."""
|
|
print("\n" + "="*60)
|
|
print("RAG PIPELINE DESIGN SUMMARY")
|
|
print("="*60)
|
|
|
|
print(f"\n💰 Total Monthly Cost: ${design.total_cost:.2f}")
|
|
|
|
print(f"\n🔧 Component Recommendations:")
|
|
components = [design.chunking, design.embedding, design.vector_db,
|
|
design.retrieval, design.reranking, design.evaluation]
|
|
|
|
for component in components:
|
|
if component:
|
|
print(f"\n {component.type.upper()}: {component.name}")
|
|
print(f" Rationale: {component.rationale}")
|
|
if component.cost_monthly > 0:
|
|
print(f" Monthly Cost: ${component.cost_monthly:.2f}")
|
|
|
|
print(f"\n📊 Architecture Diagram:")
|
|
print(design.architecture_diagram)
|
|
|
|
|
|
def main():
|
|
"""Main function with command-line interface."""
|
|
parser = argparse.ArgumentParser(description='Design RAG pipeline based on requirements')
|
|
parser.add_argument('requirements', help='JSON file containing system requirements')
|
|
parser.add_argument('--output', '-o', help='Output file for pipeline design (JSON)')
|
|
parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output')
|
|
|
|
args = parser.parse_args()
|
|
|
|
try:
|
|
# Load requirements
|
|
print("Loading requirements...")
|
|
requirements = load_requirements(args.requirements)
|
|
|
|
# Design pipeline
|
|
designer = RAGPipelineDesigner()
|
|
design = designer.design_pipeline(requirements)
|
|
|
|
# Save design
|
|
if args.output:
|
|
save_design(design, args.output)
|
|
print(f"Pipeline design saved to {args.output}")
|
|
|
|
# Print summary
|
|
print_design_summary(design)
|
|
|
|
if args.verbose:
|
|
print(f"\n📋 Configuration Templates:")
|
|
for component_type, config in design.config_templates.items():
|
|
print(f"\n {component_type.upper()}:")
|
|
print(f" {json.dumps(config, indent=4)}")
|
|
|
|
except Exception as e:
|
|
print(f"Error: {e}")
|
|
return 1
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == '__main__':
|
|
exit(main()) |