Coverage for muutils/misc/typing

1"""Parse type checker outputs and generate detailed breakdown of errors by type and file.

3Usage:

4 python -m muutils.misc.typing_breakdown [OPTIONS]

6Examples:

7 python -m muutils.misc.typing_breakdown

8 python -m muutils.misc.typing_breakdown --error-dir .meta/.type-errors

9 python -m muutils.misc.typing_breakdown --top-n 15 --output .meta/typing-summary.txt

10"""

12from __future__ import annotations

14import argparse

15import os

16import re

17from collections import defaultdict

18from dataclasses import dataclass, field

19from pathlib import Path

20from typing import Callable, Dict, List, Literal, Tuple

23def strip_cwd(path: str) -> str:

24 """Strip the current working directory from a file path to make it relative.

26 Args:

27 path: File path (absolute or relative)

29 Returns:

30 Relative path with CWD stripped, or original path if not under CWD

31 """

32 cwd: str = os.getcwd()

33 # Normalize both paths to handle different separators and resolve symlinks

34 abs_path: str = os.path.abspath(path)

35 abs_cwd: str = os.path.abspath(cwd)

37 # Ensure CWD ends with separator for proper prefix matching

38 if not abs_cwd.endswith(os.sep):

39 abs_cwd += os.sep

41 # Strip CWD prefix if present

42 if abs_path.startswith(abs_cwd):

43 return abs_path[len(abs_cwd) :]

45 return path

48@dataclass

49class TypeCheckResult:

50 "results from parsing a type checker output"

52 type_checker: Literal["mypy", "basedpyright", "ty"]

53 by_type: Dict[str, int] = field(default_factory=lambda: defaultdict(int))

54 by_file: Dict[str, int] = field(default_factory=lambda: defaultdict(int))

55 # Separate tracking for warnings (used by basedpyright)

56 warnings_by_type: Dict[str, int] = field(default_factory=lambda: defaultdict(int))

57 warnings_by_file: Dict[str, int] = field(default_factory=lambda: defaultdict(int))

59 @property

60 def total_errors(self) -> int:

61 "total number of errors across all types, validates they match between type and file dicts"

62 total_by_type: int = sum(self.by_type.values())

63 total_by_file: int = sum(self.by_file.values())

65 if total_by_type != total_by_file:

66 err_msg: str = f"Error count mismatch for {self.type_checker}: by_type={total_by_type}, by_file={total_by_file}"

67 raise ValueError(err_msg)

69 return total_by_type

71 def filter_by(self, top_n: int | None) -> TypeCheckResult:

72 "return a copy with errors sorted by count and filtered to top_n items (or all if None)"

73 # Sort by count (descending)

74 sorted_by_type: List[Tuple[str, int]] = sorted(

75 self.by_type.items(),

76 key=lambda x: x[1],

77 reverse=True,

78 )

79 sorted_by_file: List[Tuple[str, int]] = sorted(

80 self.by_file.items(),

81 key=lambda x: x[1],

82 reverse=True,

83 )

84 sorted_warnings_by_type: List[Tuple[str, int]] = sorted(

85 self.warnings_by_type.items(),

86 key=lambda x: x[1],

87 reverse=True,

88 )

89 sorted_warnings_by_file: List[Tuple[str, int]] = sorted(

90 self.warnings_by_file.items(),

91 key=lambda x: x[1],

92 reverse=True,

93 )

95 # Apply top_n limit if specified

96 if top_n is not None:

97 sorted_by_type = sorted_by_type[:top_n]

98 sorted_by_file = sorted_by_file[:top_n]

99 sorted_warnings_by_type = sorted_warnings_by_type[:top_n]

100 sorted_warnings_by_file = sorted_warnings_by_file[:top_n]

101

102 # Create new instance with filtered data (dicts maintain insertion order in Python 3.7+)

103 result: TypeCheckResult = TypeCheckResult(type_checker=self.type_checker)

104 result.by_type = dict(sorted_by_type)

105 result.by_file = dict(sorted_by_file)

106 result.warnings_by_type = dict(sorted_warnings_by_type)

107 result.warnings_by_file = dict(sorted_warnings_by_file)

108

109 return result

110

111 @property

112 def total_warnings(self) -> int:

113 "total number of warnings across all types"

114 total_by_type: int = sum(self.warnings_by_type.values())

115 total_by_file: int = sum(self.warnings_by_file.values())

116

117 if total_by_type != total_by_file:

118 err_msg: str = f"Warning count mismatch for {self.type_checker}: by_type={total_by_type}, by_file={total_by_file}"

119 raise ValueError(err_msg)

120

121 return total_by_type

122

123 def to_toml(self) -> str:

124 "format as TOML-like output"

125 lines: List[str] = []

126

127 # Main section with total

128 lines.append(f"[type_errors.{self.type_checker}]")

129 try:

130 lines.append(f"total_errors = {self.total_errors}")

131 except ValueError:

132 lines.append(f"total_errors_by_type = {sum(self.by_type.values())}")

133 lines.append(f"total_errors_by_file = {sum(self.by_file.values())}")

134 lines.append("")

135

136 # by_type section

137 lines.append(f"[type_errors.{self.type_checker}.by_type]")

138 error_type: str

139 count: int

140 for error_type, count in self.by_type.items():

141 # Always quote keys

142 lines.append(f'"{error_type}" = {count}')

143

144 lines.append("")

145

146 # by_file section

147 lines.append(f"[type_errors.{self.type_checker}.by_file]")

148 file_path: str

149 for file_path, count in self.by_file.items():

150 # Always quote file paths

151 lines.append(f'"{file_path}" = {count}')

152

153 # Add warnings sections if there are any warnings

154 if self.warnings_by_type or self.warnings_by_file:

155 lines.append("")

156 lines.append(f"[type_warnings.{self.type_checker}]")

157 try:

158 lines.append(f"total_warnings = {self.total_warnings}")

159 except ValueError:

160 lines.append(

161 f"total_warnings_by_type = {sum(self.warnings_by_type.values())}"

162 )

163 lines.append(

164 f"total_warnings_by_file = {sum(self.warnings_by_file.values())}"

165 )

166 lines.append("")

167

168 # warnings by_type section

169 lines.append(f"[type_warnings.{self.type_checker}.by_type]")

170 warning_type: str

171 for warning_type, count in self.warnings_by_type.items():

172 lines.append(f'"{warning_type}" = {count}')

173

174 lines.append("")

175

176 # warnings by_file section

177 lines.append(f"[type_warnings.{self.type_checker}.by_file]")

178 for file_path, count in self.warnings_by_file.items():

179 lines.append(f'"{file_path}" = {count}')

180

181 return "\n".join(lines)

182

183

184def parse_mypy(content: str) -> TypeCheckResult:

185 "parse mypy output: file.py:line: error: message [error-code]"

186 result: TypeCheckResult = TypeCheckResult(type_checker="mypy")

187

188 pattern: re.Pattern[str] = re.compile(

189 r"^(.+?):\d+: error: .+ \[(.+?)\]", re.MULTILINE

190 )

191 match: re.Match[str]

192 for match in pattern.finditer(content):

193 file_path: str = match.group(1)

194 error_code: str = match.group(2)

195 result.by_type[error_code] += 1

196 result.by_file[file_path] += 1

197

198 return result

199

200

201def parse_basedpyright(content: str) -> TypeCheckResult:

202 "parse basedpyright output: path on line, then indented errors with (code)"

203 result: TypeCheckResult = TypeCheckResult(type_checker="basedpyright")

204

205 # Pattern for file paths (lines that start with /)

206 # Pattern for errors: indented line with - error/warning: message (code)

207 # Some diagnostics span multiple lines with (reportCode) on a continuation line

208 current_file: str = ""

209 pending_diagnostic_type: str | None = None # "error" or "warning" waiting for code

210

211 line: str

212 for line in content.splitlines():

213 # Check if this is a file path line (starts with / and no leading space)

214 if line and not line.startswith(" ") and line.startswith("/"):

215 current_file = strip_cwd(line.strip())

216 pending_diagnostic_type = None

217

218 elif line.strip() and current_file:

219 # Try to match single-line format: " path:line:col - warning: message (reportCode)"

220 match: re.Match[str] | None = re.search(

221 r"\s+.+:\d+:\d+ - (error|warning): .+ $(\w+)$", line

222 )

223 if match:

224 diagnostic_type: str = match.group(1)

225 error_code: str = match.group(2)

226 if diagnostic_type == "warning":

227 result.warnings_by_type[error_code] += 1

228 result.warnings_by_file[current_file] += 1

229 else:

230 result.by_type[error_code] += 1

231 result.by_file[current_file] += 1

232 pending_diagnostic_type = None

233 else:

234 # Check if this is a diagnostic line without code (multi-line format start)

235 diag_match: re.Match[str] | None = re.search(

236 r"\s+.+:\d+:\d+ - (error|warning): ", line

237 )

238 if diag_match:

239 pending_diagnostic_type = diag_match.group(1)

240 # Check if this is a continuation line with the code

241 elif pending_diagnostic_type:

242 code_match: re.Match[str] | None = re.search(r"$(\w+)$\s*$", line)

243 if code_match:

244 error_code = code_match.group(1)

245 if pending_diagnostic_type == "warning":

246 result.warnings_by_type[error_code] += 1

247 result.warnings_by_file[current_file] += 1

248 else:

249 result.by_type[error_code] += 1

250 result.by_file[current_file] += 1

251 pending_diagnostic_type = None

252

253 return result

254

255

256def parse_ty(content: str) -> TypeCheckResult:

257 "parse ty output: error[error-code]: message then --> file:line:col"

258 result: TypeCheckResult = TypeCheckResult(type_checker="ty")

259

260 # Pattern for error type: error[code]: or warning[code]:

261 error_pattern: re.Pattern[str] = re.compile(

262 r"^(error|warning)\[(.+?)\]:", re.MULTILINE

263 )

264 # Pattern for location: --> file:line:col

265 location_pattern: re.Pattern[str] = re.compile(

266 r"^\s+-->\s+(.+?):\d+:\d+", re.MULTILINE

267 )

268

269 # Find all errors and their locations

270 errors: List[re.Match[str]] = list(error_pattern.finditer(content))

271 locations: List[re.Match[str]] = list(location_pattern.finditer(content))

272

273 # Match errors with locations (they should be in order)

274 error_match: re.Match[str]

275 for error_match in errors:

276 error_code: str = error_match.group(2)

277 result.by_type[error_code] += 1

278

279 # Find the next location after this error

280 error_pos: int = error_match.end()

281 loc_match: re.Match[str]

282 for loc_match in locations:

283 if loc_match.start() > error_pos:

284 file_path: str = loc_match.group(1)

285 result.by_file[file_path] += 1

286 break

287

288 return result

289

290

291def extract_summary_line(file_path: Path) -> str:

292 "extract the last non-empty line from a file (typically the summary line)"

293 content: str = file_path.read_text(encoding="utf-8")

294 lines: List[str] = [line.strip() for line in content.splitlines() if line.strip()]

295 return lines[-1]

296

297

298def main(error_dir: str, output_file: str, top_n: int | None = 10) -> None:

299 "parse all type checker outputs and generate breakdown"

300 error_path: Path = Path(error_dir)

301 output_path: Path = Path(output_file)

302

303 output_lines: List[str] = []

304

305 # Add header comment with top_n info

306 if top_n is None:

307 output_lines.append("# Showing all errors")

308 else:

309 output_lines.append(f"# Showing top {top_n} errors per category")

310 output_lines.append("")

311

312 # First, extract summary lines from each type checker

313 checkers_files: List[Tuple[str, str]] = [

314 ("mypy", "mypy.txt"),

315 ("basedpyright", "basedpyright.txt"),

316 ("ty", "ty.txt"),

317 ]

318

319 name: str

320 filename: str

321 for name, filename in checkers_files:

322 file_path: Path = error_path / filename

323 summary: str = extract_summary_line(file_path)

324 output_lines.append(f"# {name}: {summary}")

325

326 output_lines.append("")

327

328 # Parse each type checker

329 checkers: List[Tuple[str, str, Callable[[str], TypeCheckResult]]] = [

330 ("mypy", "mypy.txt", parse_mypy),

331 ("basedpyright", "basedpyright.txt", parse_basedpyright),

332 ("ty", "ty.txt", parse_ty),

333 ]

334

335 parser_fn: Callable[[str], TypeCheckResult]

336 for name, filename, parser_fn in checkers:

337 file_path_: Path = error_path / filename

338 content: str = file_path_.read_text(encoding="utf-8")

339 result: TypeCheckResult = parser_fn(content)

340 # Filter and sort the result

341 filtered_result: TypeCheckResult = result.filter_by(top_n)

342 # Convert to TOML

343 breakdown: str = filtered_result.to_toml()

344 output_lines.append(breakdown)

345 output_lines.append("") # Add blank line between checkers

346

347 # Write to output file

348 final_output: str = "\n".join(output_lines)

349 output_path.parent.mkdir(parents=True, exist_ok=True)

350 _ = output_path.write_text(final_output, encoding="utf-8")

351

352 # Also print to stdout

353 print(final_output)

354

355

356if __name__ == "__main__":

357 parser: argparse.ArgumentParser = argparse.ArgumentParser(

358 description="Parse type checker outputs and generate detailed breakdown of errors by type and file",

359 formatter_class=argparse.RawDescriptionHelpFormatter,

360 )

361 _ = parser.add_argument(

362 "--error-dir",

363 type=str,

364 default=".meta/.type-errors",

365 help="Directory containing type checker output files (default: .meta/.type-errors)",

366 )

367 _ = parser.add_argument(

368 "--output",

369 "-o",

370 type=str,

371 default=".meta/typing-summary.txt",

372 help="Output file to write summary to (default: .meta/typing-summary.txt)",

373 )

374 _ = parser.add_argument(

375 "--top-n",

376 "-n",

377 type=str,

378 default="10",

379 help='Number of top items to show in each category (default: 10). Use "all" or negative number for all items.',

380 )

381

382 args: argparse.Namespace = parser.parse_args()

383

384 # Parse top_n value

385 assert isinstance(args.top_n, str) # pyright: ignore[reportAny]

386 top_n_value: int | None

387 if args.top_n.lower() == "all":

388 top_n_value = None

389 else:

390 top_n_int: int = int(args.top_n)

391 top_n_value = None if top_n_int < 0 else top_n_int

392

393 main(error_dir=args.error_dir, output_file=args.output, top_n=top_n_value) # pyright: ignore[reportAny]

Coverage for muutils / misc / typing_breakdown.py: 0%

191 statements