Stress Test
Decorator for performance and load testing with concurrent requests.
Signature
@stress_test(concurrent_requests=10, duration=60)
@evaluate("gpt-3.5-turbo")
async def stress_test_function(model, dataset):
...
Parameters
concurrent_requests(int): Number of concurrent requests to runduration(int): Duration in seconds to run the stress test
Returns
A decorator that runs the evaluation function under load conditions.
Basic Usage
from benchwise import stress_test, evaluate
@stress_test(concurrent_requests=10, duration=60)
@evaluate("gpt-3.5-turbo")
async def test_performance(model, dataset):
response = await model.generate(["Hello, world!"])
return {"response": response}
Complete Example
import asyncio
import time
from benchwise import stress_test, evaluate, create_qa_dataset
dataset = create_qa_dataset(
questions=["What is AI?"],
answers=["Artificial Intelligence"]
)
@stress_test(concurrent_requests=5, duration=30)
@evaluate("gpt-3.5-turbo")
async def test_load(model, dataset):
start_time = time.time()
# Generate response
response = await model.generate(dataset.prompts[:1])
# Calculate latency
latency = time.time() - start_time
return {
"latency": latency,
"response_length": len(response[0]) if response else 0
}
# Run stress test
results = asyncio.run(test_load(dataset))
# Analyze performance
for result in results:
if result.success:
print(f"Model: {result.model_name}")
print(f"Latency: {result.result['latency']:.3f}s")
print(f"Total Duration: {result.duration:.2f}s")
Measuring Throughput
@stress_test(concurrent_requests=20, duration=60)
@evaluate("gpt-3.5-turbo")
async def test_throughput(model, dataset):
...
Latency Testing
@stress_test(concurrent_requests=10, duration=30)
@evaluate("gpt-3.5-turbo", "gemini-2.5-flash")
async def test_latency(model, dataset):
...
Testing Under Load
@stress_test(concurrent_requests=50, duration=120)
@evaluate("gpt-3.5-turbo")
async def test_under_load(model, dataset):
...
Rate Limiting Test
@stress_test(concurrent_requests=100, duration=60)
@evaluate("gpt-3.5-turbo")
async def test_rate_limits(model, dataset):
...
Cost Under Load
@stress_test(concurrent_requests=20, duration=60)
@evaluate("gpt-3.5-turbo", "gemini-2.5-flash")
async def test_cost_under_load(model, dataset):
...
Combining with @benchmark
from benchwise import benchmark
@benchmark("Performance Benchmark", "Tests model performance under load")
@stress_test(concurrent_requests=10, duration=60)
@evaluate("gpt-3.5-turbo", "gemini-2.5-flash")
async def performance_benchmark(model, dataset):
start = time.time()
response = await model.generate(dataset.prompts[:1])
latency = time.time() - start
assert latency < 2.0, f"Latency {latency}s exceeds 2s threshold"
return {"latency": latency}
Best Practices
1. Start Small
# Start with low concurrency
@stress_test(concurrent_requests=5, duration=30)
@evaluate("gpt-3.5-turbo")
async def gradual_test(model, dataset):
...
# Gradually increase
@stress_test(concurrent_requests=50, duration=60)
@evaluate("gpt-3.5-turbo")
async def heavy_test(model, dataset):
...
2. Monitor Costs
@stress_test(concurrent_requests=10, duration=30)
@evaluate("gpt-3.5-turbo")
async def cost_aware_test(model, dataset):
...
3. Set Performance Thresholds
@stress_test(concurrent_requests=10, duration=60)
@evaluate("gpt-3.5-turbo")
async def threshold_test(model, dataset):
start = time.time()
response = await model.generate(["Test"])
latency = time.time() - start
# Assert performance requirements
assert latency < 2.0, f"Latency {latency}s exceeds threshold"
return {"latency": latency, "passed": latency < 2.0}
See Also
- @evaluate - Main evaluation decorator
- @benchmark - Create named benchmarks
- Evaluation Guide - Evaluation patterns