You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

sse_benchmark.py 29KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750
  1. #!/usr/bin/env python3
  2. """
  3. SSE (Server-Sent Events) Stress Test for Dify Workflow API
  4. This script stress tests the streaming performance of Dify's workflow execution API,
  5. measuring key metrics like connection rate, event throughput, and time to first event (TTFE).
  6. """
  7. import json
  8. import logging
  9. import os
  10. import random
  11. import statistics
  12. import sys
  13. import threading
  14. import time
  15. from collections import deque
  16. from dataclasses import asdict, dataclass
  17. from datetime import datetime
  18. from pathlib import Path
  19. from typing import Literal, TypeAlias, TypedDict
  20. import requests.exceptions
  21. from locust import HttpUser, between, constant, events, task
  22. # Add the stress-test directory to path to import common modules
  23. sys.path.insert(0, str(Path(__file__).parent))
  24. from common.config_helper import ConfigHelper # type: ignore[import-not-found]
  25. # Configure logging
  26. logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
  27. logger = logging.getLogger(__name__)
  28. # Configuration from environment
  29. WORKFLOW_PATH = os.getenv("WORKFLOW_PATH", "/v1/workflows/run")
  30. CONNECT_TIMEOUT = float(os.getenv("CONNECT_TIMEOUT", "10"))
  31. READ_TIMEOUT = float(os.getenv("READ_TIMEOUT", "60"))
  32. TERMINAL_EVENTS = [e.strip() for e in os.getenv("TERMINAL_EVENTS", "workflow_finished,error").split(",") if e.strip()]
  33. QUESTIONS_FILE = os.getenv("QUESTIONS_FILE", "")
  34. # Type definitions
  35. ErrorType: TypeAlias = Literal[
  36. "connection_error",
  37. "timeout",
  38. "invalid_json",
  39. "http_4xx",
  40. "http_5xx",
  41. "early_termination",
  42. "invalid_response",
  43. ]
  44. class ErrorCounts(TypedDict):
  45. """Error count tracking"""
  46. connection_error: int
  47. timeout: int
  48. invalid_json: int
  49. http_4xx: int
  50. http_5xx: int
  51. early_termination: int
  52. invalid_response: int
  53. class SSEEvent(TypedDict):
  54. """Server-Sent Event structure"""
  55. data: str
  56. event: str
  57. id: str | None
  58. class WorkflowInputs(TypedDict):
  59. """Workflow input structure"""
  60. question: str
  61. class WorkflowRequestData(TypedDict):
  62. """Workflow request payload"""
  63. inputs: WorkflowInputs
  64. response_mode: Literal["streaming"]
  65. user: str
  66. class ParsedEventData(TypedDict, total=False):
  67. """Parsed event data from SSE stream"""
  68. event: str
  69. task_id: str
  70. workflow_run_id: str
  71. data: object # For dynamic content
  72. created_at: int
  73. class LocustStats(TypedDict):
  74. """Locust statistics structure"""
  75. total_requests: int
  76. total_failures: int
  77. avg_response_time: float
  78. min_response_time: float
  79. max_response_time: float
  80. class ReportData(TypedDict):
  81. """JSON report structure"""
  82. timestamp: str
  83. duration_seconds: float
  84. metrics: dict[str, object] # Metrics as dict for JSON serialization
  85. locust_stats: LocustStats | None
  86. @dataclass
  87. class StreamMetrics:
  88. """Metrics for a single stream"""
  89. stream_duration: float
  90. events_count: int
  91. bytes_received: int
  92. ttfe: float
  93. inter_event_times: list[float]
  94. @dataclass
  95. class MetricsSnapshot:
  96. """Snapshot of current metrics state"""
  97. active_connections: int
  98. total_connections: int
  99. total_events: int
  100. connection_rate: float
  101. event_rate: float
  102. overall_conn_rate: float
  103. overall_event_rate: float
  104. ttfe_avg: float
  105. ttfe_min: float
  106. ttfe_max: float
  107. ttfe_p50: float
  108. ttfe_p95: float
  109. ttfe_samples: int
  110. ttfe_total_samples: int # Total TTFE samples collected (not limited by window)
  111. error_counts: ErrorCounts
  112. stream_duration_avg: float
  113. stream_duration_p50: float
  114. stream_duration_p95: float
  115. events_per_stream_avg: float
  116. inter_event_latency_avg: float
  117. inter_event_latency_p50: float
  118. inter_event_latency_p95: float
  119. class MetricsTracker:
  120. def __init__(self) -> None:
  121. self.lock = threading.Lock()
  122. self.active_connections = 0
  123. self.total_connections = 0
  124. self.total_events = 0
  125. self.start_time = time.time()
  126. # Enhanced metrics with memory limits
  127. self.max_samples = 10000 # Prevent unbounded growth
  128. self.ttfe_samples: deque[float] = deque(maxlen=self.max_samples)
  129. self.ttfe_total_count = 0 # Track total TTFE samples collected
  130. # For rate calculations - no maxlen to avoid artificial limits
  131. self.connection_times: deque[float] = deque()
  132. self.event_times: deque[float] = deque()
  133. self.last_stats_time = time.time()
  134. self.last_total_connections = 0
  135. self.last_total_events = 0
  136. self.stream_metrics: deque[StreamMetrics] = deque(maxlen=self.max_samples)
  137. self.error_counts: ErrorCounts = ErrorCounts(
  138. connection_error=0,
  139. timeout=0,
  140. invalid_json=0,
  141. http_4xx=0,
  142. http_5xx=0,
  143. early_termination=0,
  144. invalid_response=0,
  145. )
  146. def connection_started(self) -> None:
  147. with self.lock:
  148. self.active_connections += 1
  149. self.total_connections += 1
  150. self.connection_times.append(time.time())
  151. def connection_ended(self) -> None:
  152. with self.lock:
  153. self.active_connections -= 1
  154. def event_received(self) -> None:
  155. with self.lock:
  156. self.total_events += 1
  157. self.event_times.append(time.time())
  158. def record_ttfe(self, ttfe_ms: float) -> None:
  159. with self.lock:
  160. self.ttfe_samples.append(ttfe_ms) # deque handles maxlen
  161. self.ttfe_total_count += 1 # Increment total counter
  162. def record_stream_metrics(self, metrics: StreamMetrics) -> None:
  163. with self.lock:
  164. self.stream_metrics.append(metrics) # deque handles maxlen
  165. def record_error(self, error_type: ErrorType) -> None:
  166. with self.lock:
  167. self.error_counts[error_type] += 1
  168. def get_stats(self) -> MetricsSnapshot:
  169. with self.lock:
  170. current_time = time.time()
  171. time_window = 10.0 # 10 second window for rate calculation
  172. # Clean up old timestamps outside the window
  173. cutoff_time = current_time - time_window
  174. while self.connection_times and self.connection_times[0] < cutoff_time:
  175. self.connection_times.popleft()
  176. while self.event_times and self.event_times[0] < cutoff_time:
  177. self.event_times.popleft()
  178. # Calculate rates based on actual window or elapsed time
  179. window_duration = min(time_window, current_time - self.start_time)
  180. if window_duration > 0:
  181. conn_rate = len(self.connection_times) / window_duration
  182. event_rate = len(self.event_times) / window_duration
  183. else:
  184. conn_rate = 0
  185. event_rate = 0
  186. # Calculate TTFE statistics
  187. if self.ttfe_samples:
  188. avg_ttfe = statistics.mean(self.ttfe_samples)
  189. min_ttfe = min(self.ttfe_samples)
  190. max_ttfe = max(self.ttfe_samples)
  191. p50_ttfe = statistics.median(self.ttfe_samples)
  192. if len(self.ttfe_samples) >= 2:
  193. quantiles = statistics.quantiles(self.ttfe_samples, n=20, method="inclusive")
  194. p95_ttfe = quantiles[18] # 19th of 19 quantiles = 95th percentile
  195. else:
  196. p95_ttfe = max_ttfe
  197. else:
  198. avg_ttfe = min_ttfe = max_ttfe = p50_ttfe = p95_ttfe = 0
  199. # Calculate stream metrics
  200. if self.stream_metrics:
  201. durations = [m.stream_duration for m in self.stream_metrics]
  202. events_per_stream = [m.events_count for m in self.stream_metrics]
  203. stream_duration_avg = statistics.mean(durations)
  204. stream_duration_p50 = statistics.median(durations)
  205. stream_duration_p95 = (
  206. statistics.quantiles(durations, n=20, method="inclusive")[18]
  207. if len(durations) >= 2
  208. else max(durations)
  209. if durations
  210. else 0
  211. )
  212. events_per_stream_avg = statistics.mean(events_per_stream) if events_per_stream else 0
  213. # Calculate inter-event latency statistics
  214. all_inter_event_times = []
  215. for m in self.stream_metrics:
  216. all_inter_event_times.extend(m.inter_event_times)
  217. if all_inter_event_times:
  218. inter_event_latency_avg = statistics.mean(all_inter_event_times)
  219. inter_event_latency_p50 = statistics.median(all_inter_event_times)
  220. inter_event_latency_p95 = (
  221. statistics.quantiles(all_inter_event_times, n=20, method="inclusive")[18]
  222. if len(all_inter_event_times) >= 2
  223. else max(all_inter_event_times)
  224. )
  225. else:
  226. inter_event_latency_avg = inter_event_latency_p50 = inter_event_latency_p95 = 0
  227. else:
  228. stream_duration_avg = stream_duration_p50 = stream_duration_p95 = events_per_stream_avg = 0
  229. inter_event_latency_avg = inter_event_latency_p50 = inter_event_latency_p95 = 0
  230. # Also calculate overall average rates
  231. total_elapsed = current_time - self.start_time
  232. overall_conn_rate = self.total_connections / total_elapsed if total_elapsed > 0 else 0
  233. overall_event_rate = self.total_events / total_elapsed if total_elapsed > 0 else 0
  234. return MetricsSnapshot(
  235. active_connections=self.active_connections,
  236. total_connections=self.total_connections,
  237. total_events=self.total_events,
  238. connection_rate=conn_rate,
  239. event_rate=event_rate,
  240. overall_conn_rate=overall_conn_rate,
  241. overall_event_rate=overall_event_rate,
  242. ttfe_avg=avg_ttfe,
  243. ttfe_min=min_ttfe,
  244. ttfe_max=max_ttfe,
  245. ttfe_p50=p50_ttfe,
  246. ttfe_p95=p95_ttfe,
  247. ttfe_samples=len(self.ttfe_samples),
  248. ttfe_total_samples=self.ttfe_total_count, # Return total count
  249. error_counts=ErrorCounts(**self.error_counts),
  250. stream_duration_avg=stream_duration_avg,
  251. stream_duration_p50=stream_duration_p50,
  252. stream_duration_p95=stream_duration_p95,
  253. events_per_stream_avg=events_per_stream_avg,
  254. inter_event_latency_avg=inter_event_latency_avg,
  255. inter_event_latency_p50=inter_event_latency_p50,
  256. inter_event_latency_p95=inter_event_latency_p95,
  257. )
  258. # Global metrics instance
  259. metrics = MetricsTracker()
  260. class SSEParser:
  261. """Parser for Server-Sent Events according to W3C spec"""
  262. def __init__(self) -> None:
  263. self.data_buffer: list[str] = []
  264. self.event_type: str | None = None
  265. self.event_id: str | None = None
  266. def parse_line(self, line: str) -> SSEEvent | None:
  267. """Parse a single SSE line and return event if complete"""
  268. # Empty line signals end of event
  269. if not line:
  270. if self.data_buffer:
  271. event = SSEEvent(
  272. data="\n".join(self.data_buffer),
  273. event=self.event_type or "message",
  274. id=self.event_id,
  275. )
  276. self.data_buffer = []
  277. self.event_type = None
  278. self.event_id = None
  279. return event
  280. return None
  281. # Comment line
  282. if line.startswith(":"):
  283. return None
  284. # Parse field
  285. if ":" in line:
  286. field, value = line.split(":", 1)
  287. value = value.lstrip()
  288. if field == "data":
  289. self.data_buffer.append(value)
  290. elif field == "event":
  291. self.event_type = value
  292. elif field == "id":
  293. self.event_id = value
  294. return None
  295. # Note: SSEClient removed - we'll handle SSE parsing directly in the task for better Locust integration
  296. class DifyWorkflowUser(HttpUser):
  297. """Locust user for testing Dify workflow SSE endpoints"""
  298. # Use constant wait for streaming workloads
  299. wait_time = constant(0) if os.getenv("WAIT_TIME", "0") == "0" else between(1, 3)
  300. def __init__(self, *args: object, **kwargs: object) -> None:
  301. super().__init__(*args, **kwargs) # type: ignore[arg-type]
  302. # Load API configuration
  303. config_helper = ConfigHelper()
  304. self.api_token = config_helper.get_api_key()
  305. if not self.api_token:
  306. raise ValueError("API key not found. Please run setup_all.py first.")
  307. # Load questions from file or use defaults
  308. if QUESTIONS_FILE and os.path.exists(QUESTIONS_FILE):
  309. with open(QUESTIONS_FILE) as f:
  310. self.questions = [line.strip() for line in f if line.strip()]
  311. else:
  312. self.questions = [
  313. "What is artificial intelligence?",
  314. "Explain quantum computing",
  315. "What is machine learning?",
  316. "How do neural networks work?",
  317. "What is renewable energy?",
  318. ]
  319. self.user_counter = 0
  320. def on_start(self) -> None:
  321. """Called when a user starts"""
  322. self.user_counter = 0
  323. @task
  324. def test_workflow_stream(self) -> None:
  325. """Test workflow SSE streaming endpoint"""
  326. question = random.choice(self.questions)
  327. self.user_counter += 1
  328. headers = {
  329. "Authorization": f"Bearer {self.api_token}",
  330. "Content-Type": "application/json",
  331. "Accept": "text/event-stream",
  332. "Cache-Control": "no-cache",
  333. }
  334. data = WorkflowRequestData(
  335. inputs=WorkflowInputs(question=question),
  336. response_mode="streaming",
  337. user=f"user_{self.user_counter}",
  338. )
  339. start_time = time.time()
  340. first_event_time = None
  341. event_count = 0
  342. inter_event_times: list[float] = []
  343. last_event_time = None
  344. ttfe = 0
  345. request_success = False
  346. bytes_received = 0
  347. metrics.connection_started()
  348. # Use catch_response context manager directly
  349. with self.client.request(
  350. method="POST",
  351. url=WORKFLOW_PATH,
  352. headers=headers,
  353. json=data,
  354. stream=True,
  355. catch_response=True,
  356. timeout=(CONNECT_TIMEOUT, READ_TIMEOUT),
  357. name="/v1/workflows/run", # Name for Locust stats
  358. ) as response:
  359. try:
  360. # Validate response
  361. if response.status_code >= 400:
  362. error_type: ErrorType = "http_4xx" if response.status_code < 500 else "http_5xx"
  363. metrics.record_error(error_type)
  364. response.failure(f"HTTP {response.status_code}")
  365. return
  366. content_type = response.headers.get("Content-Type", "")
  367. if "text/event-stream" not in content_type and "application/json" not in content_type:
  368. logger.error(f"Expected text/event-stream, got: {content_type}")
  369. metrics.record_error("invalid_response")
  370. response.failure(f"Invalid content type: {content_type}")
  371. return
  372. # Parse SSE events
  373. parser = SSEParser()
  374. for line in response.iter_lines(decode_unicode=True):
  375. # Check if runner is stopping
  376. if getattr(self.environment.runner, "state", "") in (
  377. "stopping",
  378. "stopped",
  379. ):
  380. logger.debug("Runner stopping, breaking streaming loop")
  381. break
  382. if line is not None:
  383. bytes_received += len(line.encode("utf-8"))
  384. # Parse SSE line
  385. event = parser.parse_line(line if line is not None else "")
  386. if event:
  387. event_count += 1
  388. current_time = time.time()
  389. metrics.event_received()
  390. # Track inter-event timing
  391. if last_event_time:
  392. inter_event_times.append((current_time - last_event_time) * 1000)
  393. last_event_time = current_time
  394. if first_event_time is None:
  395. first_event_time = current_time
  396. ttfe = (first_event_time - start_time) * 1000
  397. metrics.record_ttfe(ttfe)
  398. try:
  399. # Parse event data
  400. event_data = event.get("data", "")
  401. if event_data:
  402. if event_data == "[DONE]":
  403. logger.debug("Received [DONE] sentinel")
  404. request_success = True
  405. break
  406. try:
  407. parsed_event: ParsedEventData = json.loads(event_data)
  408. # Check for terminal events
  409. if parsed_event.get("event") in TERMINAL_EVENTS:
  410. logger.debug(f"Received terminal event: {parsed_event.get('event')}")
  411. request_success = True
  412. break
  413. except json.JSONDecodeError as e:
  414. logger.debug(f"JSON decode error: {e} for data: {event_data[:100]}")
  415. metrics.record_error("invalid_json")
  416. except Exception as e:
  417. logger.error(f"Error processing event: {e}")
  418. # Mark success only if terminal condition was met or events were received
  419. if request_success:
  420. response.success()
  421. elif event_count > 0:
  422. # Got events but no proper terminal condition
  423. metrics.record_error("early_termination")
  424. response.failure("Stream ended without terminal event")
  425. else:
  426. response.failure("No events received")
  427. except (
  428. requests.exceptions.ConnectTimeout,
  429. requests.exceptions.ReadTimeout,
  430. ) as e:
  431. metrics.record_error("timeout")
  432. response.failure(f"Timeout: {e}")
  433. except (
  434. requests.exceptions.ConnectionError,
  435. requests.exceptions.RequestException,
  436. ) as e:
  437. metrics.record_error("connection_error")
  438. response.failure(f"Connection error: {e}")
  439. except Exception as e:
  440. response.failure(str(e))
  441. raise
  442. finally:
  443. metrics.connection_ended()
  444. # Record stream metrics
  445. if event_count > 0:
  446. stream_duration = (time.time() - start_time) * 1000
  447. stream_metrics = StreamMetrics(
  448. stream_duration=stream_duration,
  449. events_count=event_count,
  450. bytes_received=bytes_received,
  451. ttfe=ttfe,
  452. inter_event_times=inter_event_times,
  453. )
  454. metrics.record_stream_metrics(stream_metrics)
  455. logger.debug(
  456. f"Stream completed: {event_count} events, {stream_duration:.1f}ms, success={request_success}"
  457. )
  458. else:
  459. logger.warning("No events received in stream")
  460. # Event handlers
  461. @events.test_start.add_listener # type: ignore[misc]
  462. def on_test_start(environment: object, **kwargs: object) -> None:
  463. logger.info("=" * 80)
  464. logger.info(" " * 25 + "DIFY SSE BENCHMARK - REAL-TIME METRICS")
  465. logger.info("=" * 80)
  466. logger.info(f"Started at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
  467. logger.info("=" * 80)
  468. # Periodic stats reporting
  469. def report_stats() -> None:
  470. if not hasattr(environment, "runner"):
  471. return
  472. runner = environment.runner
  473. while hasattr(runner, "state") and runner.state not in ["stopped", "stopping"]:
  474. time.sleep(5) # Report every 5 seconds
  475. if hasattr(runner, "state") and runner.state == "running":
  476. stats = metrics.get_stats()
  477. # Only log on master node in distributed mode
  478. is_master = (
  479. not getattr(environment.runner, "worker_id", None) if hasattr(environment, "runner") else True
  480. )
  481. if is_master:
  482. # Clear previous lines and show updated stats
  483. logger.info("\n" + "=" * 80)
  484. logger.info(
  485. f"{'METRIC':<25} {'CURRENT':>15} {'RATE (10s)':>15} {'AVG (overall)':>15} {'TOTAL':>12}"
  486. )
  487. logger.info("-" * 80)
  488. # Active SSE Connections
  489. logger.info(
  490. f"{'Active SSE Connections':<25} {stats.active_connections:>15,d} {'-':>15} {'-':>12} {'-':>12}"
  491. )
  492. # New Connection Rate
  493. logger.info(
  494. f"{'New Connections':<25} {'-':>15} {stats.connection_rate:>13.2f}/s {stats.overall_conn_rate:>13.2f}/s {stats.total_connections:>12,d}"
  495. )
  496. # Event Throughput
  497. logger.info(
  498. f"{'Event Throughput':<25} {'-':>15} {stats.event_rate:>13.2f}/s {stats.overall_event_rate:>13.2f}/s {stats.total_events:>12,d}"
  499. )
  500. logger.info("-" * 80)
  501. logger.info(
  502. f"{'TIME TO FIRST EVENT':<25} {'AVG':>15} {'P50':>10} {'P95':>10} {'MIN':>10} {'MAX':>10}"
  503. )
  504. logger.info(
  505. f"{'(TTFE in ms)':<25} {stats.ttfe_avg:>15.1f} {stats.ttfe_p50:>10.1f} {stats.ttfe_p95:>10.1f} {stats.ttfe_min:>10.1f} {stats.ttfe_max:>10.1f}"
  506. )
  507. logger.info(
  508. f"{'Window Samples':<25} {stats.ttfe_samples:>15,d} (last {min(10000, stats.ttfe_total_samples):,d} samples)"
  509. )
  510. logger.info(f"{'Total Samples':<25} {stats.ttfe_total_samples:>15,d}")
  511. # Inter-event latency
  512. if stats.inter_event_latency_avg > 0:
  513. logger.info("-" * 80)
  514. logger.info(f"{'INTER-EVENT LATENCY':<25} {'AVG':>15} {'P50':>10} {'P95':>10}")
  515. logger.info(
  516. f"{'(ms between events)':<25} {stats.inter_event_latency_avg:>15.1f} {stats.inter_event_latency_p50:>10.1f} {stats.inter_event_latency_p95:>10.1f}"
  517. )
  518. # Error stats
  519. if any(stats.error_counts.values()):
  520. logger.info("-" * 80)
  521. logger.info(f"{'ERROR TYPE':<25} {'COUNT':>15}")
  522. for error_type, count in stats.error_counts.items():
  523. if isinstance(count, int) and count > 0:
  524. logger.info(f"{error_type:<25} {count:>15,d}")
  525. logger.info("=" * 80)
  526. # Show Locust stats summary
  527. if hasattr(environment, "stats") and hasattr(environment.stats, "total"):
  528. total = environment.stats.total
  529. if hasattr(total, "num_requests") and total.num_requests > 0:
  530. logger.info(
  531. f"{'LOCUST STATS':<25} {'Requests':>12} {'Fails':>8} {'Avg (ms)':>12} {'Min':>8} {'Max':>8}"
  532. )
  533. logger.info("-" * 80)
  534. logger.info(
  535. f"{'Aggregated':<25} {total.num_requests:>12,d} "
  536. f"{total.num_failures:>8,d} "
  537. f"{total.avg_response_time:>12.1f} "
  538. f"{total.min_response_time:>8.0f} "
  539. f"{total.max_response_time:>8.0f}"
  540. )
  541. logger.info("=" * 80)
  542. threading.Thread(target=report_stats, daemon=True).start()
  543. @events.test_stop.add_listener # type: ignore[misc]
  544. def on_test_stop(environment: object, **kwargs: object) -> None:
  545. stats = metrics.get_stats()
  546. test_duration = time.time() - metrics.start_time
  547. # Log final results
  548. logger.info("\n" + "=" * 80)
  549. logger.info(" " * 30 + "FINAL BENCHMARK RESULTS")
  550. logger.info("=" * 80)
  551. logger.info(f"Test Duration: {test_duration:.1f} seconds")
  552. logger.info("-" * 80)
  553. logger.info("")
  554. logger.info("CONNECTIONS")
  555. logger.info(f" {'Total Connections:':<30} {stats.total_connections:>10,d}")
  556. logger.info(f" {'Final Active:':<30} {stats.active_connections:>10,d}")
  557. logger.info(f" {'Average Rate:':<30} {stats.overall_conn_rate:>10.2f} conn/s")
  558. logger.info("")
  559. logger.info("EVENTS")
  560. logger.info(f" {'Total Events Received:':<30} {stats.total_events:>10,d}")
  561. logger.info(f" {'Average Throughput:':<30} {stats.overall_event_rate:>10.2f} events/s")
  562. logger.info(f" {'Final Rate (10s window):':<30} {stats.event_rate:>10.2f} events/s")
  563. logger.info("")
  564. logger.info("STREAM METRICS")
  565. logger.info(f" {'Avg Stream Duration:':<30} {stats.stream_duration_avg:>10.1f} ms")
  566. logger.info(f" {'P50 Stream Duration:':<30} {stats.stream_duration_p50:>10.1f} ms")
  567. logger.info(f" {'P95 Stream Duration:':<30} {stats.stream_duration_p95:>10.1f} ms")
  568. logger.info(f" {'Avg Events per Stream:':<30} {stats.events_per_stream_avg:>10.1f}")
  569. logger.info("")
  570. logger.info("INTER-EVENT LATENCY")
  571. logger.info(f" {'Average:':<30} {stats.inter_event_latency_avg:>10.1f} ms")
  572. logger.info(f" {'Median (P50):':<30} {stats.inter_event_latency_p50:>10.1f} ms")
  573. logger.info(f" {'95th Percentile:':<30} {stats.inter_event_latency_p95:>10.1f} ms")
  574. logger.info("")
  575. logger.info("TIME TO FIRST EVENT (ms)")
  576. logger.info(f" {'Average:':<30} {stats.ttfe_avg:>10.1f} ms")
  577. logger.info(f" {'Median (P50):':<30} {stats.ttfe_p50:>10.1f} ms")
  578. logger.info(f" {'95th Percentile:':<30} {stats.ttfe_p95:>10.1f} ms")
  579. logger.info(f" {'Minimum:':<30} {stats.ttfe_min:>10.1f} ms")
  580. logger.info(f" {'Maximum:':<30} {stats.ttfe_max:>10.1f} ms")
  581. logger.info(
  582. f" {'Window Samples:':<30} {stats.ttfe_samples:>10,d} (last {min(10000, stats.ttfe_total_samples):,d})"
  583. )
  584. logger.info(f" {'Total Samples:':<30} {stats.ttfe_total_samples:>10,d}")
  585. # Error summary
  586. if any(stats.error_counts.values()):
  587. logger.info("")
  588. logger.info("ERRORS")
  589. for error_type, count in stats.error_counts.items():
  590. if isinstance(count, int) and count > 0:
  591. logger.info(f" {error_type:<30} {count:>10,d}")
  592. logger.info("=" * 80 + "\n")
  593. # Export machine-readable report (only on master node)
  594. is_master = not getattr(environment.runner, "worker_id", None) if hasattr(environment, "runner") else True
  595. if is_master:
  596. export_json_report(stats, test_duration, environment)
  597. def export_json_report(stats: MetricsSnapshot, duration: float, environment: object) -> None:
  598. """Export metrics to JSON file for CI/CD analysis"""
  599. reports_dir = Path(__file__).parent / "reports"
  600. reports_dir.mkdir(exist_ok=True)
  601. timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
  602. report_file = reports_dir / f"sse_metrics_{timestamp}.json"
  603. # Access environment.stats.total attributes safely
  604. locust_stats: LocustStats | None = None
  605. if hasattr(environment, "stats") and hasattr(environment.stats, "total"):
  606. total = environment.stats.total
  607. if hasattr(total, "num_requests") and total.num_requests > 0:
  608. locust_stats = LocustStats(
  609. total_requests=total.num_requests,
  610. total_failures=total.num_failures,
  611. avg_response_time=total.avg_response_time,
  612. min_response_time=total.min_response_time,
  613. max_response_time=total.max_response_time,
  614. )
  615. report_data = ReportData(
  616. timestamp=datetime.now().isoformat(),
  617. duration_seconds=duration,
  618. metrics=asdict(stats), # type: ignore[arg-type]
  619. locust_stats=locust_stats,
  620. )
  621. with open(report_file, "w") as f:
  622. json.dump(report_data, f, indent=2)
  623. logger.info(f"Exported metrics to {report_file}")