Guardrails

Validation and safety layers for agent inputs, outputs, and tool calls.

Overview

Guardrails are async functions that validate and optionally transform content at key points in agent execution:

Input Guardrails: Validate user messages before LLM call
Output Guardrails: Validate LLM responses before returning to user
Tool Guardrails: Validate tool arguments before execution

Quick Start

from vel import Agent
from vel.core import GuardrailResult

# Define guardrail functions
async def no_pii(content, ctx):
    """Reject content containing PII patterns"""
    import re
    if re.search(r'\b\d{3}-\d{2}-\d{4}\b', content):  # SSN pattern
        return GuardrailResult(passed=False, message="Content contains SSN")
    return GuardrailResult(passed=True)

async def require_json(content, ctx):
    """Ensure output is valid JSON"""
    import json
    try:
        json.loads(content)
        return GuardrailResult(passed=True)
    except:
        return GuardrailResult(passed=False, message="Output must be valid JSON")

# Use guardrails
agent = Agent(
    id='safe-agent',
    model={'provider': 'openai', 'model': 'gpt-4o'},
    input_guardrails=[no_pii],
    output_guardrails=[require_json]
)

Guardrail Function Signature

async def my_guardrail(content: Any, ctx: dict) -> GuardrailResult | bool:
    """
    Args:
        content: The content to validate
            - For input: user message string or input dict
            - For output: LLM response string
            - For tool: tool arguments dict
        ctx: Context dict with run_id, session_id, etc.

    Returns:
        GuardrailResult or bool
    """
    pass

GuardrailResult

from vel.core import GuardrailResult

# Simple pass/fail
GuardrailResult(passed=True)
GuardrailResult(passed=False, message="Validation failed")

# With content modification
GuardrailResult(
    passed=True,
    modified_content="sanitized content"  # Replaces original
)

Input Guardrails

Validate user messages before calling the LLM.

async def validate_length(content, ctx):
    """Require minimum message length"""
    if len(content) < 10:
        return GuardrailResult(
            passed=False,
            message="Message too short (min 10 chars)"
        )
    return GuardrailResult(passed=True)

async def sanitize_input(content, ctx):
    """Remove potentially harmful content"""
    sanitized = content.replace('<script>', '').replace('</script>', '')
    return GuardrailResult(passed=True, modified_content=sanitized)

agent = Agent(
    id='my-agent',
    model={'provider': 'openai', 'model': 'gpt-4o'},
    input_guardrails=[validate_length, sanitize_input]
)

Behavior:

Run sequentially before LLM call
If any guardrail fails, agent raises GuardrailError
Modified content is passed to subsequent guardrails and LLM

Output Guardrails

Validate LLM responses before returning to user.

async def no_harmful_content(content, ctx):
    """Block harmful or inappropriate responses"""
    harmful_patterns = ['confidential', 'password', 'secret key']
    for pattern in harmful_patterns:
        if pattern.lower() in content.lower():
            return GuardrailResult(
                passed=False,
                message=f"Response contains blocked term: {pattern}"
            )
    return GuardrailResult(passed=True)

async def format_response(content, ctx):
    """Ensure consistent formatting"""
    # Add disclaimer to all responses
    modified = f"{content}\n\n---\n*AI-generated response*"
    return GuardrailResult(passed=True, modified_content=modified)

agent = Agent(
    id='my-agent',
    model={'provider': 'openai', 'model': 'gpt-4o'},
    output_guardrails=[no_harmful_content, format_response]
)

Tool Guardrails

Validate tool arguments before execution.

async def validate_email_recipient(args, ctx):
    """Only allow emails to approved domains"""
    to = args.get('to', '')
    allowed_domains = ['company.com', 'partner.org']

    domain = to.split('@')[-1] if '@' in to else ''
    if domain not in allowed_domains:
        return GuardrailResult(
            passed=False,
            message=f"Email domain '{domain}' not in allowed list"
        )
    return GuardrailResult(passed=True)

async def validate_query_params(args, ctx):
    """Sanitize database query parameters"""
    # Prevent SQL injection
    query = args.get('query', '')
    if any(keyword in query.upper() for keyword in ['DROP', 'DELETE', 'TRUNCATE']):
        return GuardrailResult(
            passed=False,
            message="Dangerous SQL keywords detected"
        )
    return GuardrailResult(passed=True)

agent = Agent(
    id='my-agent',
    model={'provider': 'openai', 'model': 'gpt-4o'},
    tools=['send_email', 'query_database'],
    tool_guardrails={
        'send_email': [validate_email_recipient],
        'query_database': [validate_query_params]
    }
)

Context Object

The ctx parameter provides runtime information:

async def context_aware_guardrail(content, ctx):
    run_id = ctx.get('run_id')
    session_id = ctx.get('session_id')
    tool_name = ctx.get('tool_name')  # Only for tool guardrails

    # Use context for logging, conditional logic, etc.
    return GuardrailResult(passed=True)

Simplified Return Types

For simple pass/fail, you can return a boolean:

async def simple_check(content, ctx):
    return len(content) > 0  # True = pass, False = fail

Or a dict:

async def dict_check(content, ctx):
    return {
        'passed': True,
        'message': 'Validation successful',
        'modified_content': content.strip()
    }

Error Handling

When a guardrail fails:

Non-Streaming (`run()`)

from vel.core import GuardrailError

try:
    result = await agent.run({'message': 'test'})
except GuardrailError as e:
    print(f"Guardrail '{e.guardrail_name}' failed: {e.message}")
    print(f"Original content: {e.content}")

Streaming (`run_stream()`)

async for event in agent.run_stream({'message': 'test'}):
    if event['type'] == 'error':
        if 'guardrail' in event['error'].lower():
            print(f"Guardrail failed: {event['error']}")

Common Patterns

Rate Limiting

from datetime import datetime, timedelta

user_requests = {}  # In production, use Redis or similar

async def rate_limit(content, ctx):
    user_id = ctx.get('user_id', 'anonymous')
    now = datetime.now()

    # Get user's request history
    history = user_requests.get(user_id, [])

    # Remove old requests (older than 1 minute)
    history = [t for t in history if now - t < timedelta(minutes=1)]

    # Check rate limit (10 requests per minute)
    if len(history) >= 10:
        return GuardrailResult(
            passed=False,
            message="Rate limit exceeded. Please wait."
        )

    # Record this request
    history.append(now)
    user_requests[user_id] = history

    return GuardrailResult(passed=True)

Content Moderation

async def moderate_content(content, ctx):
    """Use external moderation API"""
    import httpx

    async with httpx.AsyncClient() as client:
        response = await client.post(
            'https://api.moderationservice.com/check',
            json={'text': content}
        )
        result = response.json()

    if result.get('flagged'):
        return GuardrailResult(
            passed=False,
            message=f"Content flagged: {result.get('reason')}"
        )

    return GuardrailResult(passed=True)

Schema Validation

from pydantic import BaseModel, ValidationError

class ExpectedOutput(BaseModel):
    answer: str
    confidence: float

async def validate_schema(content, ctx):
    """Ensure output matches expected schema"""
    import json

    try:
        data = json.loads(content)
        ExpectedOutput(**data)
        return GuardrailResult(passed=True)
    except (json.JSONDecodeError, ValidationError) as e:
        return GuardrailResult(
            passed=False,
            message=f"Schema validation failed: {e}"
        )

Logging / Audit

import logging

logger = logging.getLogger('guardrails')

async def audit_log(content, ctx):
    """Log all inputs for audit purposes"""
    logger.info(
        f"Input received",
        extra={
            'run_id': ctx.get('run_id'),
            'session_id': ctx.get('session_id'),
            'content_length': len(content),
            'content_preview': content[:100]
        }
    )
    return GuardrailResult(passed=True)

Best Practices

Keep guardrails fast - They run on every request
Use async for I/O - External API calls should be async
Fail with clear messages - Help users understand what went wrong
Order matters - Guardrails run sequentially; put fast checks first
Don’t over-validate - Balance safety with user experience