Coverage for muutils / misc / typing_breakdown.py: 0%

191 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-02-18 02:51 -0700

1"""Parse type checker outputs and generate detailed breakdown of errors by type and file. 

2 

3Usage: 

4 python -m muutils.misc.typing_breakdown [OPTIONS] 

5 

6Examples: 

7 python -m muutils.misc.typing_breakdown 

8 python -m muutils.misc.typing_breakdown --error-dir .meta/.type-errors 

9 python -m muutils.misc.typing_breakdown --top-n 15 --output .meta/typing-summary.txt 

10""" 

11 

12from __future__ import annotations 

13 

14import argparse 

15import os 

16import re 

17from collections import defaultdict 

18from dataclasses import dataclass, field 

19from pathlib import Path 

20from typing import Callable, Dict, List, Literal, Tuple 

21 

22 

23def strip_cwd(path: str) -> str: 

24 """Strip the current working directory from a file path to make it relative. 

25 

26 Args: 

27 path: File path (absolute or relative) 

28 

29 Returns: 

30 Relative path with CWD stripped, or original path if not under CWD 

31 """ 

32 cwd: str = os.getcwd() 

33 # Normalize both paths to handle different separators and resolve symlinks 

34 abs_path: str = os.path.abspath(path) 

35 abs_cwd: str = os.path.abspath(cwd) 

36 

37 # Ensure CWD ends with separator for proper prefix matching 

38 if not abs_cwd.endswith(os.sep): 

39 abs_cwd += os.sep 

40 

41 # Strip CWD prefix if present 

42 if abs_path.startswith(abs_cwd): 

43 return abs_path[len(abs_cwd) :] 

44 

45 return path 

46 

47 

48@dataclass 

49class TypeCheckResult: 

50 "results from parsing a type checker output" 

51 

52 type_checker: Literal["mypy", "basedpyright", "ty"] 

53 by_type: Dict[str, int] = field(default_factory=lambda: defaultdict(int)) 

54 by_file: Dict[str, int] = field(default_factory=lambda: defaultdict(int)) 

55 # Separate tracking for warnings (used by basedpyright) 

56 warnings_by_type: Dict[str, int] = field(default_factory=lambda: defaultdict(int)) 

57 warnings_by_file: Dict[str, int] = field(default_factory=lambda: defaultdict(int)) 

58 

59 @property 

60 def total_errors(self) -> int: 

61 "total number of errors across all types, validates they match between type and file dicts" 

62 total_by_type: int = sum(self.by_type.values()) 

63 total_by_file: int = sum(self.by_file.values()) 

64 

65 if total_by_type != total_by_file: 

66 err_msg: str = f"Error count mismatch for {self.type_checker}: by_type={total_by_type}, by_file={total_by_file}" 

67 raise ValueError(err_msg) 

68 

69 return total_by_type 

70 

71 def filter_by(self, top_n: int | None) -> TypeCheckResult: 

72 "return a copy with errors sorted by count and filtered to top_n items (or all if None)" 

73 # Sort by count (descending) 

74 sorted_by_type: List[Tuple[str, int]] = sorted( 

75 self.by_type.items(), 

76 key=lambda x: x[1], 

77 reverse=True, 

78 ) 

79 sorted_by_file: List[Tuple[str, int]] = sorted( 

80 self.by_file.items(), 

81 key=lambda x: x[1], 

82 reverse=True, 

83 ) 

84 sorted_warnings_by_type: List[Tuple[str, int]] = sorted( 

85 self.warnings_by_type.items(), 

86 key=lambda x: x[1], 

87 reverse=True, 

88 ) 

89 sorted_warnings_by_file: List[Tuple[str, int]] = sorted( 

90 self.warnings_by_file.items(), 

91 key=lambda x: x[1], 

92 reverse=True, 

93 ) 

94 

95 # Apply top_n limit if specified 

96 if top_n is not None: 

97 sorted_by_type = sorted_by_type[:top_n] 

98 sorted_by_file = sorted_by_file[:top_n] 

99 sorted_warnings_by_type = sorted_warnings_by_type[:top_n] 

100 sorted_warnings_by_file = sorted_warnings_by_file[:top_n] 

101 

102 # Create new instance with filtered data (dicts maintain insertion order in Python 3.7+) 

103 result: TypeCheckResult = TypeCheckResult(type_checker=self.type_checker) 

104 result.by_type = dict(sorted_by_type) 

105 result.by_file = dict(sorted_by_file) 

106 result.warnings_by_type = dict(sorted_warnings_by_type) 

107 result.warnings_by_file = dict(sorted_warnings_by_file) 

108 

109 return result 

110 

111 @property 

112 def total_warnings(self) -> int: 

113 "total number of warnings across all types" 

114 total_by_type: int = sum(self.warnings_by_type.values()) 

115 total_by_file: int = sum(self.warnings_by_file.values()) 

116 

117 if total_by_type != total_by_file: 

118 err_msg: str = f"Warning count mismatch for {self.type_checker}: by_type={total_by_type}, by_file={total_by_file}" 

119 raise ValueError(err_msg) 

120 

121 return total_by_type 

122 

123 def to_toml(self) -> str: 

124 "format as TOML-like output" 

125 lines: List[str] = [] 

126 

127 # Main section with total 

128 lines.append(f"[type_errors.{self.type_checker}]") 

129 try: 

130 lines.append(f"total_errors = {self.total_errors}") 

131 except ValueError: 

132 lines.append(f"total_errors_by_type = {sum(self.by_type.values())}") 

133 lines.append(f"total_errors_by_file = {sum(self.by_file.values())}") 

134 lines.append("") 

135 

136 # by_type section 

137 lines.append(f"[type_errors.{self.type_checker}.by_type]") 

138 error_type: str 

139 count: int 

140 for error_type, count in self.by_type.items(): 

141 # Always quote keys 

142 lines.append(f'"{error_type}" = {count}') 

143 

144 lines.append("") 

145 

146 # by_file section 

147 lines.append(f"[type_errors.{self.type_checker}.by_file]") 

148 file_path: str 

149 for file_path, count in self.by_file.items(): 

150 # Always quote file paths 

151 lines.append(f'"{file_path}" = {count}') 

152 

153 # Add warnings sections if there are any warnings 

154 if self.warnings_by_type or self.warnings_by_file: 

155 lines.append("") 

156 lines.append(f"[type_warnings.{self.type_checker}]") 

157 try: 

158 lines.append(f"total_warnings = {self.total_warnings}") 

159 except ValueError: 

160 lines.append( 

161 f"total_warnings_by_type = {sum(self.warnings_by_type.values())}" 

162 ) 

163 lines.append( 

164 f"total_warnings_by_file = {sum(self.warnings_by_file.values())}" 

165 ) 

166 lines.append("") 

167 

168 # warnings by_type section 

169 lines.append(f"[type_warnings.{self.type_checker}.by_type]") 

170 warning_type: str 

171 for warning_type, count in self.warnings_by_type.items(): 

172 lines.append(f'"{warning_type}" = {count}') 

173 

174 lines.append("") 

175 

176 # warnings by_file section 

177 lines.append(f"[type_warnings.{self.type_checker}.by_file]") 

178 for file_path, count in self.warnings_by_file.items(): 

179 lines.append(f'"{file_path}" = {count}') 

180 

181 return "\n".join(lines) 

182 

183 

184def parse_mypy(content: str) -> TypeCheckResult: 

185 "parse mypy output: file.py:line: error: message [error-code]" 

186 result: TypeCheckResult = TypeCheckResult(type_checker="mypy") 

187 

188 pattern: re.Pattern[str] = re.compile( 

189 r"^(.+?):\d+: error: .+ \[(.+?)\]", re.MULTILINE 

190 ) 

191 match: re.Match[str] 

192 for match in pattern.finditer(content): 

193 file_path: str = match.group(1) 

194 error_code: str = match.group(2) 

195 result.by_type[error_code] += 1 

196 result.by_file[file_path] += 1 

197 

198 return result 

199 

200 

201def parse_basedpyright(content: str) -> TypeCheckResult: 

202 "parse basedpyright output: path on line, then indented errors with (code)" 

203 result: TypeCheckResult = TypeCheckResult(type_checker="basedpyright") 

204 

205 # Pattern for file paths (lines that start with /) 

206 # Pattern for errors: indented line with - error/warning: message (code) 

207 # Some diagnostics span multiple lines with (reportCode) on a continuation line 

208 current_file: str = "" 

209 pending_diagnostic_type: str | None = None # "error" or "warning" waiting for code 

210 

211 line: str 

212 for line in content.splitlines(): 

213 # Check if this is a file path line (starts with / and no leading space) 

214 if line and not line.startswith(" ") and line.startswith("/"): 

215 current_file = strip_cwd(line.strip()) 

216 pending_diagnostic_type = None 

217 

218 elif line.strip() and current_file: 

219 # Try to match single-line format: " path:line:col - warning: message (reportCode)" 

220 match: re.Match[str] | None = re.search( 

221 r"\s+.+:\d+:\d+ - (error|warning): .+ \((\w+)\)", line 

222 ) 

223 if match: 

224 diagnostic_type: str = match.group(1) 

225 error_code: str = match.group(2) 

226 if diagnostic_type == "warning": 

227 result.warnings_by_type[error_code] += 1 

228 result.warnings_by_file[current_file] += 1 

229 else: 

230 result.by_type[error_code] += 1 

231 result.by_file[current_file] += 1 

232 pending_diagnostic_type = None 

233 else: 

234 # Check if this is a diagnostic line without code (multi-line format start) 

235 diag_match: re.Match[str] | None = re.search( 

236 r"\s+.+:\d+:\d+ - (error|warning): ", line 

237 ) 

238 if diag_match: 

239 pending_diagnostic_type = diag_match.group(1) 

240 # Check if this is a continuation line with the code 

241 elif pending_diagnostic_type: 

242 code_match: re.Match[str] | None = re.search(r"\((\w+)\)\s*$", line) 

243 if code_match: 

244 error_code = code_match.group(1) 

245 if pending_diagnostic_type == "warning": 

246 result.warnings_by_type[error_code] += 1 

247 result.warnings_by_file[current_file] += 1 

248 else: 

249 result.by_type[error_code] += 1 

250 result.by_file[current_file] += 1 

251 pending_diagnostic_type = None 

252 

253 return result 

254 

255 

256def parse_ty(content: str) -> TypeCheckResult: 

257 "parse ty output: error[error-code]: message then --> file:line:col" 

258 result: TypeCheckResult = TypeCheckResult(type_checker="ty") 

259 

260 # Pattern for error type: error[code]: or warning[code]: 

261 error_pattern: re.Pattern[str] = re.compile( 

262 r"^(error|warning)\[(.+?)\]:", re.MULTILINE 

263 ) 

264 # Pattern for location: --> file:line:col 

265 location_pattern: re.Pattern[str] = re.compile( 

266 r"^\s+-->\s+(.+?):\d+:\d+", re.MULTILINE 

267 ) 

268 

269 # Find all errors and their locations 

270 errors: List[re.Match[str]] = list(error_pattern.finditer(content)) 

271 locations: List[re.Match[str]] = list(location_pattern.finditer(content)) 

272 

273 # Match errors with locations (they should be in order) 

274 error_match: re.Match[str] 

275 for error_match in errors: 

276 error_code: str = error_match.group(2) 

277 result.by_type[error_code] += 1 

278 

279 # Find the next location after this error 

280 error_pos: int = error_match.end() 

281 loc_match: re.Match[str] 

282 for loc_match in locations: 

283 if loc_match.start() > error_pos: 

284 file_path: str = loc_match.group(1) 

285 result.by_file[file_path] += 1 

286 break 

287 

288 return result 

289 

290 

291def extract_summary_line(file_path: Path) -> str: 

292 "extract the last non-empty line from a file (typically the summary line)" 

293 content: str = file_path.read_text(encoding="utf-8") 

294 lines: List[str] = [line.strip() for line in content.splitlines() if line.strip()] 

295 return lines[-1] 

296 

297 

298def main(error_dir: str, output_file: str, top_n: int | None = 10) -> None: 

299 "parse all type checker outputs and generate breakdown" 

300 error_path: Path = Path(error_dir) 

301 output_path: Path = Path(output_file) 

302 

303 output_lines: List[str] = [] 

304 

305 # Add header comment with top_n info 

306 if top_n is None: 

307 output_lines.append("# Showing all errors") 

308 else: 

309 output_lines.append(f"# Showing top {top_n} errors per category") 

310 output_lines.append("") 

311 

312 # First, extract summary lines from each type checker 

313 checkers_files: List[Tuple[str, str]] = [ 

314 ("mypy", "mypy.txt"), 

315 ("basedpyright", "basedpyright.txt"), 

316 ("ty", "ty.txt"), 

317 ] 

318 

319 name: str 

320 filename: str 

321 for name, filename in checkers_files: 

322 file_path: Path = error_path / filename 

323 summary: str = extract_summary_line(file_path) 

324 output_lines.append(f"# {name}: {summary}") 

325 

326 output_lines.append("") 

327 

328 # Parse each type checker 

329 checkers: List[Tuple[str, str, Callable[[str], TypeCheckResult]]] = [ 

330 ("mypy", "mypy.txt", parse_mypy), 

331 ("basedpyright", "basedpyright.txt", parse_basedpyright), 

332 ("ty", "ty.txt", parse_ty), 

333 ] 

334 

335 parser_fn: Callable[[str], TypeCheckResult] 

336 for name, filename, parser_fn in checkers: 

337 file_path_: Path = error_path / filename 

338 content: str = file_path_.read_text(encoding="utf-8") 

339 result: TypeCheckResult = parser_fn(content) 

340 # Filter and sort the result 

341 filtered_result: TypeCheckResult = result.filter_by(top_n) 

342 # Convert to TOML 

343 breakdown: str = filtered_result.to_toml() 

344 output_lines.append(breakdown) 

345 output_lines.append("") # Add blank line between checkers 

346 

347 # Write to output file 

348 final_output: str = "\n".join(output_lines) 

349 output_path.parent.mkdir(parents=True, exist_ok=True) 

350 _ = output_path.write_text(final_output, encoding="utf-8") 

351 

352 # Also print to stdout 

353 print(final_output) 

354 

355 

356if __name__ == "__main__": 

357 parser: argparse.ArgumentParser = argparse.ArgumentParser( 

358 description="Parse type checker outputs and generate detailed breakdown of errors by type and file", 

359 formatter_class=argparse.RawDescriptionHelpFormatter, 

360 ) 

361 _ = parser.add_argument( 

362 "--error-dir", 

363 type=str, 

364 default=".meta/.type-errors", 

365 help="Directory containing type checker output files (default: .meta/.type-errors)", 

366 ) 

367 _ = parser.add_argument( 

368 "--output", 

369 "-o", 

370 type=str, 

371 default=".meta/typing-summary.txt", 

372 help="Output file to write summary to (default: .meta/typing-summary.txt)", 

373 ) 

374 _ = parser.add_argument( 

375 "--top-n", 

376 "-n", 

377 type=str, 

378 default="10", 

379 help='Number of top items to show in each category (default: 10). Use "all" or negative number for all items.', 

380 ) 

381 

382 args: argparse.Namespace = parser.parse_args() 

383 

384 # Parse top_n value 

385 assert isinstance(args.top_n, str) # pyright: ignore[reportAny] 

386 top_n_value: int | None 

387 if args.top_n.lower() == "all": 

388 top_n_value = None 

389 else: 

390 top_n_int: int = int(args.top_n) 

391 top_n_value = None if top_n_int < 0 else top_n_int 

392 

393 main(error_dir=args.error_dir, output_file=args.output, top_n=top_n_value) # pyright: ignore[reportAny]