Coverage for muutils / misc / typing_breakdown.py: 0%
191 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-18 02:51 -0700
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-18 02:51 -0700
1"""Parse type checker outputs and generate detailed breakdown of errors by type and file.
3Usage:
4 python -m muutils.misc.typing_breakdown [OPTIONS]
6Examples:
7 python -m muutils.misc.typing_breakdown
8 python -m muutils.misc.typing_breakdown --error-dir .meta/.type-errors
9 python -m muutils.misc.typing_breakdown --top-n 15 --output .meta/typing-summary.txt
10"""
12from __future__ import annotations
14import argparse
15import os
16import re
17from collections import defaultdict
18from dataclasses import dataclass, field
19from pathlib import Path
20from typing import Callable, Dict, List, Literal, Tuple
23def strip_cwd(path: str) -> str:
24 """Strip the current working directory from a file path to make it relative.
26 Args:
27 path: File path (absolute or relative)
29 Returns:
30 Relative path with CWD stripped, or original path if not under CWD
31 """
32 cwd: str = os.getcwd()
33 # Normalize both paths to handle different separators and resolve symlinks
34 abs_path: str = os.path.abspath(path)
35 abs_cwd: str = os.path.abspath(cwd)
37 # Ensure CWD ends with separator for proper prefix matching
38 if not abs_cwd.endswith(os.sep):
39 abs_cwd += os.sep
41 # Strip CWD prefix if present
42 if abs_path.startswith(abs_cwd):
43 return abs_path[len(abs_cwd) :]
45 return path
48@dataclass
49class TypeCheckResult:
50 "results from parsing a type checker output"
52 type_checker: Literal["mypy", "basedpyright", "ty"]
53 by_type: Dict[str, int] = field(default_factory=lambda: defaultdict(int))
54 by_file: Dict[str, int] = field(default_factory=lambda: defaultdict(int))
55 # Separate tracking for warnings (used by basedpyright)
56 warnings_by_type: Dict[str, int] = field(default_factory=lambda: defaultdict(int))
57 warnings_by_file: Dict[str, int] = field(default_factory=lambda: defaultdict(int))
59 @property
60 def total_errors(self) -> int:
61 "total number of errors across all types, validates they match between type and file dicts"
62 total_by_type: int = sum(self.by_type.values())
63 total_by_file: int = sum(self.by_file.values())
65 if total_by_type != total_by_file:
66 err_msg: str = f"Error count mismatch for {self.type_checker}: by_type={total_by_type}, by_file={total_by_file}"
67 raise ValueError(err_msg)
69 return total_by_type
71 def filter_by(self, top_n: int | None) -> TypeCheckResult:
72 "return a copy with errors sorted by count and filtered to top_n items (or all if None)"
73 # Sort by count (descending)
74 sorted_by_type: List[Tuple[str, int]] = sorted(
75 self.by_type.items(),
76 key=lambda x: x[1],
77 reverse=True,
78 )
79 sorted_by_file: List[Tuple[str, int]] = sorted(
80 self.by_file.items(),
81 key=lambda x: x[1],
82 reverse=True,
83 )
84 sorted_warnings_by_type: List[Tuple[str, int]] = sorted(
85 self.warnings_by_type.items(),
86 key=lambda x: x[1],
87 reverse=True,
88 )
89 sorted_warnings_by_file: List[Tuple[str, int]] = sorted(
90 self.warnings_by_file.items(),
91 key=lambda x: x[1],
92 reverse=True,
93 )
95 # Apply top_n limit if specified
96 if top_n is not None:
97 sorted_by_type = sorted_by_type[:top_n]
98 sorted_by_file = sorted_by_file[:top_n]
99 sorted_warnings_by_type = sorted_warnings_by_type[:top_n]
100 sorted_warnings_by_file = sorted_warnings_by_file[:top_n]
102 # Create new instance with filtered data (dicts maintain insertion order in Python 3.7+)
103 result: TypeCheckResult = TypeCheckResult(type_checker=self.type_checker)
104 result.by_type = dict(sorted_by_type)
105 result.by_file = dict(sorted_by_file)
106 result.warnings_by_type = dict(sorted_warnings_by_type)
107 result.warnings_by_file = dict(sorted_warnings_by_file)
109 return result
111 @property
112 def total_warnings(self) -> int:
113 "total number of warnings across all types"
114 total_by_type: int = sum(self.warnings_by_type.values())
115 total_by_file: int = sum(self.warnings_by_file.values())
117 if total_by_type != total_by_file:
118 err_msg: str = f"Warning count mismatch for {self.type_checker}: by_type={total_by_type}, by_file={total_by_file}"
119 raise ValueError(err_msg)
121 return total_by_type
123 def to_toml(self) -> str:
124 "format as TOML-like output"
125 lines: List[str] = []
127 # Main section with total
128 lines.append(f"[type_errors.{self.type_checker}]")
129 try:
130 lines.append(f"total_errors = {self.total_errors}")
131 except ValueError:
132 lines.append(f"total_errors_by_type = {sum(self.by_type.values())}")
133 lines.append(f"total_errors_by_file = {sum(self.by_file.values())}")
134 lines.append("")
136 # by_type section
137 lines.append(f"[type_errors.{self.type_checker}.by_type]")
138 error_type: str
139 count: int
140 for error_type, count in self.by_type.items():
141 # Always quote keys
142 lines.append(f'"{error_type}" = {count}')
144 lines.append("")
146 # by_file section
147 lines.append(f"[type_errors.{self.type_checker}.by_file]")
148 file_path: str
149 for file_path, count in self.by_file.items():
150 # Always quote file paths
151 lines.append(f'"{file_path}" = {count}')
153 # Add warnings sections if there are any warnings
154 if self.warnings_by_type or self.warnings_by_file:
155 lines.append("")
156 lines.append(f"[type_warnings.{self.type_checker}]")
157 try:
158 lines.append(f"total_warnings = {self.total_warnings}")
159 except ValueError:
160 lines.append(
161 f"total_warnings_by_type = {sum(self.warnings_by_type.values())}"
162 )
163 lines.append(
164 f"total_warnings_by_file = {sum(self.warnings_by_file.values())}"
165 )
166 lines.append("")
168 # warnings by_type section
169 lines.append(f"[type_warnings.{self.type_checker}.by_type]")
170 warning_type: str
171 for warning_type, count in self.warnings_by_type.items():
172 lines.append(f'"{warning_type}" = {count}')
174 lines.append("")
176 # warnings by_file section
177 lines.append(f"[type_warnings.{self.type_checker}.by_file]")
178 for file_path, count in self.warnings_by_file.items():
179 lines.append(f'"{file_path}" = {count}')
181 return "\n".join(lines)
184def parse_mypy(content: str) -> TypeCheckResult:
185 "parse mypy output: file.py:line: error: message [error-code]"
186 result: TypeCheckResult = TypeCheckResult(type_checker="mypy")
188 pattern: re.Pattern[str] = re.compile(
189 r"^(.+?):\d+: error: .+ \[(.+?)\]", re.MULTILINE
190 )
191 match: re.Match[str]
192 for match in pattern.finditer(content):
193 file_path: str = match.group(1)
194 error_code: str = match.group(2)
195 result.by_type[error_code] += 1
196 result.by_file[file_path] += 1
198 return result
201def parse_basedpyright(content: str) -> TypeCheckResult:
202 "parse basedpyright output: path on line, then indented errors with (code)"
203 result: TypeCheckResult = TypeCheckResult(type_checker="basedpyright")
205 # Pattern for file paths (lines that start with /)
206 # Pattern for errors: indented line with - error/warning: message (code)
207 # Some diagnostics span multiple lines with (reportCode) on a continuation line
208 current_file: str = ""
209 pending_diagnostic_type: str | None = None # "error" or "warning" waiting for code
211 line: str
212 for line in content.splitlines():
213 # Check if this is a file path line (starts with / and no leading space)
214 if line and not line.startswith(" ") and line.startswith("/"):
215 current_file = strip_cwd(line.strip())
216 pending_diagnostic_type = None
218 elif line.strip() and current_file:
219 # Try to match single-line format: " path:line:col - warning: message (reportCode)"
220 match: re.Match[str] | None = re.search(
221 r"\s+.+:\d+:\d+ - (error|warning): .+ \((\w+)\)", line
222 )
223 if match:
224 diagnostic_type: str = match.group(1)
225 error_code: str = match.group(2)
226 if diagnostic_type == "warning":
227 result.warnings_by_type[error_code] += 1
228 result.warnings_by_file[current_file] += 1
229 else:
230 result.by_type[error_code] += 1
231 result.by_file[current_file] += 1
232 pending_diagnostic_type = None
233 else:
234 # Check if this is a diagnostic line without code (multi-line format start)
235 diag_match: re.Match[str] | None = re.search(
236 r"\s+.+:\d+:\d+ - (error|warning): ", line
237 )
238 if diag_match:
239 pending_diagnostic_type = diag_match.group(1)
240 # Check if this is a continuation line with the code
241 elif pending_diagnostic_type:
242 code_match: re.Match[str] | None = re.search(r"\((\w+)\)\s*$", line)
243 if code_match:
244 error_code = code_match.group(1)
245 if pending_diagnostic_type == "warning":
246 result.warnings_by_type[error_code] += 1
247 result.warnings_by_file[current_file] += 1
248 else:
249 result.by_type[error_code] += 1
250 result.by_file[current_file] += 1
251 pending_diagnostic_type = None
253 return result
256def parse_ty(content: str) -> TypeCheckResult:
257 "parse ty output: error[error-code]: message then --> file:line:col"
258 result: TypeCheckResult = TypeCheckResult(type_checker="ty")
260 # Pattern for error type: error[code]: or warning[code]:
261 error_pattern: re.Pattern[str] = re.compile(
262 r"^(error|warning)\[(.+?)\]:", re.MULTILINE
263 )
264 # Pattern for location: --> file:line:col
265 location_pattern: re.Pattern[str] = re.compile(
266 r"^\s+-->\s+(.+?):\d+:\d+", re.MULTILINE
267 )
269 # Find all errors and their locations
270 errors: List[re.Match[str]] = list(error_pattern.finditer(content))
271 locations: List[re.Match[str]] = list(location_pattern.finditer(content))
273 # Match errors with locations (they should be in order)
274 error_match: re.Match[str]
275 for error_match in errors:
276 error_code: str = error_match.group(2)
277 result.by_type[error_code] += 1
279 # Find the next location after this error
280 error_pos: int = error_match.end()
281 loc_match: re.Match[str]
282 for loc_match in locations:
283 if loc_match.start() > error_pos:
284 file_path: str = loc_match.group(1)
285 result.by_file[file_path] += 1
286 break
288 return result
291def extract_summary_line(file_path: Path) -> str:
292 "extract the last non-empty line from a file (typically the summary line)"
293 content: str = file_path.read_text(encoding="utf-8")
294 lines: List[str] = [line.strip() for line in content.splitlines() if line.strip()]
295 return lines[-1]
298def main(error_dir: str, output_file: str, top_n: int | None = 10) -> None:
299 "parse all type checker outputs and generate breakdown"
300 error_path: Path = Path(error_dir)
301 output_path: Path = Path(output_file)
303 output_lines: List[str] = []
305 # Add header comment with top_n info
306 if top_n is None:
307 output_lines.append("# Showing all errors")
308 else:
309 output_lines.append(f"# Showing top {top_n} errors per category")
310 output_lines.append("")
312 # First, extract summary lines from each type checker
313 checkers_files: List[Tuple[str, str]] = [
314 ("mypy", "mypy.txt"),
315 ("basedpyright", "basedpyright.txt"),
316 ("ty", "ty.txt"),
317 ]
319 name: str
320 filename: str
321 for name, filename in checkers_files:
322 file_path: Path = error_path / filename
323 summary: str = extract_summary_line(file_path)
324 output_lines.append(f"# {name}: {summary}")
326 output_lines.append("")
328 # Parse each type checker
329 checkers: List[Tuple[str, str, Callable[[str], TypeCheckResult]]] = [
330 ("mypy", "mypy.txt", parse_mypy),
331 ("basedpyright", "basedpyright.txt", parse_basedpyright),
332 ("ty", "ty.txt", parse_ty),
333 ]
335 parser_fn: Callable[[str], TypeCheckResult]
336 for name, filename, parser_fn in checkers:
337 file_path_: Path = error_path / filename
338 content: str = file_path_.read_text(encoding="utf-8")
339 result: TypeCheckResult = parser_fn(content)
340 # Filter and sort the result
341 filtered_result: TypeCheckResult = result.filter_by(top_n)
342 # Convert to TOML
343 breakdown: str = filtered_result.to_toml()
344 output_lines.append(breakdown)
345 output_lines.append("") # Add blank line between checkers
347 # Write to output file
348 final_output: str = "\n".join(output_lines)
349 output_path.parent.mkdir(parents=True, exist_ok=True)
350 _ = output_path.write_text(final_output, encoding="utf-8")
352 # Also print to stdout
353 print(final_output)
356if __name__ == "__main__":
357 parser: argparse.ArgumentParser = argparse.ArgumentParser(
358 description="Parse type checker outputs and generate detailed breakdown of errors by type and file",
359 formatter_class=argparse.RawDescriptionHelpFormatter,
360 )
361 _ = parser.add_argument(
362 "--error-dir",
363 type=str,
364 default=".meta/.type-errors",
365 help="Directory containing type checker output files (default: .meta/.type-errors)",
366 )
367 _ = parser.add_argument(
368 "--output",
369 "-o",
370 type=str,
371 default=".meta/typing-summary.txt",
372 help="Output file to write summary to (default: .meta/typing-summary.txt)",
373 )
374 _ = parser.add_argument(
375 "--top-n",
376 "-n",
377 type=str,
378 default="10",
379 help='Number of top items to show in each category (default: 10). Use "all" or negative number for all items.',
380 )
382 args: argparse.Namespace = parser.parse_args()
384 # Parse top_n value
385 assert isinstance(args.top_n, str) # pyright: ignore[reportAny]
386 top_n_value: int | None
387 if args.top_n.lower() == "all":
388 top_n_value = None
389 else:
390 top_n_int: int = int(args.top_n)
391 top_n_value = None if top_n_int < 0 else top_n_int
393 main(error_dir=args.error_dir, output_file=args.output, top_n=top_n_value) # pyright: ignore[reportAny]