Files
litellm-vector-store/app/middleware/rate_limit.py
2026-04-29 08:17:35 +00:00

36 lines
974 B
Python

from fastapi import Request, HTTPException
from collections import defaultdict
import time
# Einfaches In-Memory Rate Limiting
request_counts: dict = defaultdict(list)
RATE_LIMITS = {
"search": (100, 60), # 100 Requests pro 60 Sekunden
"upsert": (50, 60),
"embed": (200, 60),
"rag": (20, 60),
}
def check_rate_limit(user_id: str, action: str):
limit, window = RATE_LIMITS.get(action, (100, 60))
now = time.time()
key = f"{user_id}:{action}"
# Alte Requests entfernen
request_counts[key] = [
t for t in request_counts[key]
if now - t < window
]
if len(request_counts[key]) >= limit:
raise HTTPException(429, {
"error": {
"message": f"Rate limit erreicht: {limit} Requests pro {window}s",
"type": "rate_limit_error",
"code": "rate_limit_exceeded"
}
})
request_counts[key].append(now)