Content is user-generated and unverified.
#!/usr/bin/env python3 """ Enhanced benchmark to compare performance between if-elif chains and match-case pattern matching for AST node inspection in pytest's assertion rewriter, with hot path analysis. """ import ast import timeit import statistics import time from typing import List, Tuple, Dict import sys import gc import json from datetime import datetime # Sample Python code templates for testing TEST_TEMPLATES = { "simple": ''' """Simple module docstring.""" def test_function(): assert True ''', "with_future": ''' """Module with future imports.""" from __future__ import annotations from __future__ import print_function def test_function(): assert True ''', "no_docstring": ''' from __future__ import annotations import sys def test_function(): assert True ''', "complex": ''' """Complex module with multiple imports and functions.""" from __future__ import annotations from __future__ import division from __future__ import print_function import os import sys from typing import List, Dict class TestClass: def test_method(self): assert True def test_function(): assert 1 + 1 == 2 ''', "rewrite_disabled": ''' """Module with PYTEST_DONT_REWRITE marker.""" from __future__ import annotations def test_function(): assert True ''', "many_imports": ''' """Module with many future imports.""" from __future__ import annotations from __future__ import division from __future__ import print_function from __future__ import unicode_literals from __future__ import absolute_import from __future__ import generators from __future__ import nested_scopes import os import sys def test_function(): assert True ''' + '\n'.join([f'from __future__ import annotations' for _ in range(10)]) } def original_implementation(mod: ast.Module) -> Tuple[int, bool, bool]: """Original if-elif implementation from pytest.""" if not mod.body: return 0, False, False doc = getattr(mod, "docstring", None) expect_docstring = doc is None found_disabled = False pos = 0 for item in mod.body: if ( expect_docstring and isinstance(item, ast.Expr) and isinstance(item.value, ast.Constant) and isinstance(item.value.value, str) ): doc = item.value.value if "PYTEST_DONT_REWRITE" in doc: found_disabled = True return pos, found_disabled, expect_docstring expect_docstring = False elif ( isinstance(item, ast.ImportFrom) and item.level == 0 and item.module == "__future__" ): pass else: break pos += 1 return pos, found_disabled, expect_docstring def match_case_implementation(mod: ast.Module) -> Tuple[int, bool, bool]: """New match-case implementation.""" if not mod.body: return 0, False, False doc = getattr(mod, "docstring", None) expect_docstring = doc is None found_disabled = False pos = 0 for item in mod.body: match item: case ast.Expr(value=ast.Constant(value=str(doc))) if expect_docstring: if "PYTEST_DONT_REWRITE" in doc: found_disabled = True return pos, found_disabled, expect_docstring expect_docstring = False case ast.ImportFrom(level=0, module="__future__"): pass case _: break pos += 1 return pos, found_disabled, expect_docstring def warmup_function(func, iterations=1000): """Warm up a function to ensure it's in cache and potentially optimized.""" for _ in range(iterations): func() def measure_with_warmup(func, iterations=10000, warmup_iterations=5000): """Measure performance after warming up the function.""" # Warmup phase warmup_function(func, warmup_iterations) # Actual measurement start = time.perf_counter() for _ in range(iterations): func() end = time.perf_counter() return end - start def run_hot_path_analysis(code_template: str, runs: int = 10, iterations_per_run: int = 10000): """Analyze performance over multiple runs to see hot path optimization.""" tree = ast.parse(code_template) original_times = [] match_times = [] for run in range(runs): # Force garbage collection between runs for consistency gc.collect() # Measure original implementation orig_time = measure_with_warmup( lambda: original_implementation(tree), iterations_per_run, warmup_iterations=1000 if run == 0 else 100 ) original_times.append(orig_time) # Measure match-case implementation match_time = measure_with_warmup( lambda: match_case_implementation(tree), iterations_per_run, warmup_iterations=1000 if run == 0 else 100 ) match_times.append(match_time) return original_times, match_times def analyze_convergence(times: List[float], name: str) -> Dict: """Analyze how performance converges over runs.""" analysis = { 'name': name, 'initial': times[0] if times else 0, 'final': times[-1] if times else 0, 'mean': statistics.mean(times) if times else 0, 'stdev': statistics.stdev(times) if len(times) > 1 else 0, 'min': min(times) if times else 0, 'max': max(times) if times else 0, 'convergence_rate': 0, 'stabilized_at_run': 0 } if len(times) > 1: # Calculate convergence rate (how much performance improves from first to best) analysis['convergence_rate'] = ((times[0] - min(times)) / times[0]) * 100 if times[0] > 0 else 0 # Find when performance stabilizes (within 5% of minimum) min_time = min(times) threshold = min_time * 1.05 for i, t in enumerate(times): if t <= threshold: analysis['stabilized_at_run'] = i + 1 break return analysis def plot_ascii_graph(original_times: List[float], match_times: List[float], width: int = 60): """Create an ASCII graph showing performance over runs.""" if not original_times or not match_times: return max_time = max(max(original_times), max(match_times)) min_time = min(min(original_times), min(match_times)) scale = width / (max_time - min_time) if max_time > min_time else 1 print("\n Performance Over Runs (shorter is better)") print(" " + "─" * (width + 2)) for i in range(len(original_times)): orig_bar_len = int((original_times[i] - min_time) * scale) match_bar_len = int((match_times[i] - min_time) * scale) print(f" Run {i+1:2d}:") print(f" Orig │{'█' * orig_bar_len} {original_times[i]:.4f}s") print(f" Match │{'▓' * match_bar_len} {match_times[i]:.4f}s") if i < len(original_times) - 1: print(" │") def verify_correctness(): """Verify both implementations produce same results.""" print("Verifying correctness...") all_correct = True for name, code in TEST_TEMPLATES.items(): tree = ast.parse(code) original_result = original_implementation(tree) match_result = match_case_implementation(tree) if original_result != match_result: print(f" ❌ {name}: Results differ!") print(f" Original: {original_result}") print(f" Match: {match_result}") all_correct = False else: print(f" ✓ {name}: Results match {original_result}") return all_correct def main(): """Main benchmark runner with hot path analysis.""" print("=" * 80) print("AST Pattern Matching Hot Path Performance Analysis") print(f"Python version: {sys.version}") print(f"Timestamp: {datetime.now().isoformat()}") print("=" * 80) # Check Python version if sys.version_info < (3, 10): print("⚠️ Warning: This benchmark requires Python 3.10+ for match-case syntax") return # Configuration num_launches = 5 # Number of separate process launches to simulate runs_per_launch = 10 # Number of runs within each launch iterations_per_run = 50000 print(f"\nConfiguration:") print(f" • Launches: {num_launches}") print(f" • Runs per launch: {runs_per_launch}") print(f" • Iterations per run: {iterations_per_run:,}") print(f" • Total iterations: {num_launches * runs_per_launch * iterations_per_run:,}") # First verify correctness if not verify_correctness(): print("\n⚠️ Correctness check failed! Results may not be comparable.") return all_results = [] # Multiple launches to see cold vs hot performance for launch in range(num_launches): print("\n" + "=" * 80) print(f"Launch {launch + 1}/{num_launches}") print("=" * 80) launch_results = {} for test_name, code in TEST_TEMPLATES.items(): print(f"\n📊 Test case: {test_name}") print(f" Code length: {len(code)} chars") # Run hot path analysis original_times, match_times = run_hot_path_analysis( code, runs_per_launch, iterations_per_run ) # Analyze convergence orig_analysis = analyze_convergence(original_times, "Original") match_analysis = analyze_convergence(match_times, "Match-case") # Store results launch_results[test_name] = { 'original_times': original_times, 'match_times': match_times, 'orig_analysis': orig_analysis, 'match_analysis': match_analysis } # Print run-by-run comparison print(f"\n Run-by-run timing (seconds for {iterations_per_run:,} iterations):") print(f" {'Run':<6} {'Original':<12} {'Match-case':<12} {'Diff %':<10} {'Winner'}") print(f" {'-'*6} {'-'*12} {'-'*12} {'-'*10} {'-'*10}") for run_idx in range(runs_per_launch): orig_t = original_times[run_idx] match_t = match_times[run_idx] diff_pct = ((match_t - orig_t) / orig_t) * 100 winner = "Match 🚀" if match_t < orig_t else "Original 🐢" # Highlight best times orig_marker = " *" if orig_t == min(original_times) else " " match_marker = " *" if match_t == min(match_times) else " " print(f" {run_idx+1:<6} {orig_t:<10.4f}{orig_marker} {match_t:<10.4f}{match_marker} " f"{diff_pct:>+9.2f}% {winner}") # Print convergence analysis print(f"\n Convergence Analysis:") print(f" • Original: Initial={orig_analysis['initial']:.4f}s, " f"Best={orig_analysis['min']:.4f}s, " f"Convergence={orig_analysis['convergence_rate']:.1f}%, " f"Stable at run #{orig_analysis['stabilized_at_run']}") print(f" • Match: Initial={match_analysis['initial']:.4f}s, " f"Best={match_analysis['min']:.4f}s, " f"Convergence={match_analysis['convergence_rate']:.1f}%, " f"Stable at run #{match_analysis['stabilized_at_run']}") # Best performance comparison best_orig = min(original_times) best_match = min(match_times) best_diff = ((best_match - best_orig) / best_orig) * 100 if best_diff < 0: print(f"\n 🏆 Best Performance: Match-case is {abs(best_diff):.2f}% faster") else: print(f"\n 🏆 Best Performance: Original is {best_diff:.2f}% faster") # Plot ASCII graph for this test case if launch == 0: # Only plot for first launch to save space plot_ascii_graph(original_times, match_times, width=40) all_results.append(launch_results) # Overall summary across all launches print("\n" + "=" * 80) print("OVERALL SUMMARY ACROSS ALL LAUNCHES") print("=" * 80) aggregate_results = {} for test_name in TEST_TEMPLATES.keys(): all_orig_times = [] all_match_times = [] for launch_result in all_results: all_orig_times.extend(launch_result[test_name]['original_times']) all_match_times.extend(launch_result[test_name]['match_times']) aggregate_results[test_name] = { 'orig_mean': statistics.mean(all_orig_times), 'orig_stdev': statistics.stdev(all_orig_times), 'orig_min': min(all_orig_times), 'orig_max': max(all_orig_times), 'match_mean': statistics.mean(all_match_times), 'match_stdev': statistics.stdev(all_match_times), 'match_min': min(all_match_times), 'match_max': max(all_match_times), } print("\n📈 Performance Statistics (all times in seconds):") print(f"{'Test Case':<20} {'Implementation':<15} {'Mean ± StdDev':<20} {'Min':<10} {'Max':<10}") print("-" * 75) for test_name, stats in aggregate_results.items(): print(f"{test_name:<20} {'Original':<15} " f"{stats['orig_mean']:.4f} ± {stats['orig_stdev']:.4f} " f"{stats['orig_min']:.4f} {stats['orig_max']:.4f}") print(f"{'':<20} {'Match-case':<15} " f"{stats['match_mean']:.4f} ± {stats['match_stdev']:.4f} " f"{stats['match_min']:.4f} {stats['match_max']:.4f}") diff_pct = ((stats['match_mean'] - stats['orig_mean']) / stats['orig_mean']) * 100 if diff_pct < 0: verdict = f"✅ Match {abs(diff_pct):.1f}% faster" else: verdict = f"⚠️ Match {diff_pct:.1f}% slower" print(f"{'':<20} {'Verdict:':<15} {verdict}") print() # Hot path insights print("\n" + "=" * 80) print("🔥 HOT PATH INSIGHTS") print("=" * 80) print("\nKey Observations:") # Calculate average convergence rates avg_orig_convergence = [] avg_match_convergence = [] for launch_result in all_results: for test_name, data in launch_result.items(): avg_orig_convergence.append(data['orig_analysis']['convergence_rate']) avg_match_convergence.append(data['match_analysis']['convergence_rate']) print(f"\n1. Warm-up Effect:") print(f" • Original avg convergence: {statistics.mean(avg_orig_convergence):.2f}%") print(f" • Match-case avg convergence: {statistics.mean(avg_match_convergence):.2f}%") print(f"\n2. Stability:") stability_comparison = [] for test_name in TEST_TEMPLATES.keys(): orig_stdev = aggregate_results[test_name]['orig_stdev'] match_stdev = aggregate_results[test_name]['match_stdev'] stability_comparison.append((test_name, orig_stdev, match_stdev)) stability_comparison.sort(key=lambda x: abs(x[1] - x[2]), reverse=True) print(" Most variable performance differences:") for name, orig_std, match_std in stability_comparison[:3]: more_stable = "Original" if orig_std < match_std else "Match-case" print(f" • {name}: {more_stable} is more stable " f"(σ_orig={orig_std:.4f}, σ_match={match_std:.4f})") # Final recommendation print("\n" + "=" * 80) print("📋 RECOMMENDATIONS") print("=" * 80) total_orig = sum(stats['orig_mean'] for stats in aggregate_results.values()) total_match = sum(stats['match_mean'] for stats in aggregate_results.values()) overall_diff = ((total_match - total_orig) / total_orig) * 100 print(f"\nOverall Performance: ", end="") if overall_diff < -5: print(f"Match-case is {abs(overall_diff):.1f}% faster ✅") print("\n✅ ADOPT: Match-case shows consistent performance improvement") print(" • Better hot path performance") print(" • More readable and maintainable code") print(" • Future Python versions likely to optimize further") elif overall_diff < 5: print(f"Difference is {abs(overall_diff):.1f}% {'faster' if overall_diff < 0 else 'slower'} 🤔") print("\n🤔 NEUTRAL: Performance difference is negligible") print(" • Decision should be based on code style preferences") print(" • Match-case offers better readability") print(" • Consider team familiarity with pattern matching") else: print(f"Match-case is {overall_diff:.1f}% slower ⚠️") print("\n⚠️ CAUTION: Match-case shows performance regression") print(" • Consider keeping original implementation") print(" • May improve in future Python versions") print(" • Test with your specific Python version and workload") print(f"\nTested on Python {sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}") print("Note: Results vary by Python version, CPU, and system load") if __name__ == "__main__": main()
Content is user-generated and unverified.
    AST Pattern Matching Performance Benchmark | Claude