Coverage for muutils/dbg.py: 55%

1"""

3this code is based on an implementation of the Rust builtin `dbg!` for Python, originally from

4https://github.com/tylerwince/pydbg/blob/master/pydbg.py

5although it has been significantly modified

7licensed under MIT:

11Permission is hereby granted, free of charge, to any person obtaining a copy

12of this software and associated documentation files (the "Software"), to deal

13in the Software without restriction, including without limitation the rights

14to use, copy, modify, merge, publish, distribute, sublicense, and/or sell

15copies of the Software, and to permit persons to whom the Software is

16furnished to do so, subject to the following conditions:

18The above copyright notice and this permission notice shall be included in

19all copies or substantial portions of the Software.

21THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

22IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

23FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

24AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

25LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

26OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN

27THE SOFTWARE.

29"""

31from __future__ import annotations

33import inspect

34import sys

35import typing

36from pathlib import Path

37import functools

39# type defs

40_ExpType = typing.TypeVar("_ExpType")

43# Sentinel type for no expression passed

44class _NoExpPassedSentinel:

45 """Unique sentinel type used to indicate that no expression was passed."""

47 pass

50_NoExpPassed = _NoExpPassedSentinel()

52# global variables

53_CWD: Path = Path.cwd().absolute()

54_COUNTER: int = 0

56# configuration

57PATH_MODE: typing.Literal["relative", "absolute"] = "relative"

58DEFAULT_VAL_JOINER: str = " = "

61# path processing

62def _process_path(path: Path) -> str:

63 path_abs: Path = path.absolute()

64 fname: Path

65 if PATH_MODE == "absolute":

66 fname = path_abs

67 elif PATH_MODE == "relative":

68 try:

69 # if it's inside the cwd, print the relative path

70 fname = path.relative_to(_CWD)

71 except ValueError:

72 # if its not in the subpath, use the absolute path

73 fname = path_abs

74 else:

75 raise ValueError("PATH_MODE must be either 'relative' or 'absolute")

77 return fname.as_posix()

80# actual dbg function

81@typing.overload

82def dbg() -> _NoExpPassedSentinel: ...

83@typing.overload

84def dbg(

85 exp: _NoExpPassedSentinel,

86 formatter: typing.Optional[typing.Callable[[typing.Any], str]] = None,

87 val_joiner: str = DEFAULT_VAL_JOINER,

88) -> _NoExpPassedSentinel: ...

89@typing.overload

90def dbg(

91 exp: _ExpType,

92 formatter: typing.Optional[typing.Callable[[typing.Any], str]] = None,

93 val_joiner: str = DEFAULT_VAL_JOINER,

94) -> _ExpType: ...

95def dbg(

96 exp: typing.Union[_ExpType, _NoExpPassedSentinel] = _NoExpPassed,

97 formatter: typing.Optional[typing.Callable[[typing.Any], str]] = None,

98 val_joiner: str = DEFAULT_VAL_JOINER,

99) -> typing.Union[_ExpType, _NoExpPassedSentinel]:

100 """Call dbg with any variable or expression.

101

102 Calling dbg will print to stderr the current filename and lineno,

103 as well as the passed expression and what the expression evaluates to:

104

105 from muutils.dbg import dbg

106

107 a = 2

108 b = 5

109

110 dbg(a+b)

111

112 def square(x: int) -> int:

113 return x * x

114

115 dbg(square(a))

116

117 """

118 global _COUNTER

119

120 # get the context

121 line_exp: str = "unknown"

122 current_file: str = "unknown"

123 dbg_frame: typing.Optional[inspect.FrameInfo] = None

124 for frame in inspect.stack():

125 if frame.code_context is None:

126 continue

127 line: str = frame.code_context[0]

128 if "dbg" in line:

129 current_file = _process_path(Path(frame.filename))

130 dbg_frame = frame

131 start: int = line.find("(") + 1

132 end: int = line.rfind(")")

133 if end == -1:

134 end = len(line)

135 line_exp = line[start:end]

136 break

137

138 fname: str = "unknown"

139 if current_file.startswith("/tmp/ipykernel_"):

140 stack: list[inspect.FrameInfo] = inspect.stack()

141 filtered_functions: list[str] = []

142 # this loop will find, in this order:

143 # - the dbg function call

144 # - the functions we care about displaying

145 # - `<module>`

146 # - a bunch of jupyter internals we don't care about

147 for frame_info in stack:

148 if _process_path(Path(frame_info.filename)) != current_file:

149 continue

150 if frame_info.function == "<module>":

151 break

152 if frame_info.function.startswith("dbg"):

153 continue

154 filtered_functions.append(frame_info.function)

155 if dbg_frame is not None:

156 filtered_functions.append(f"<ipykernel>:{dbg_frame.lineno}")

157 else:

158 filtered_functions.append(current_file)

159 filtered_functions.reverse()

160 fname = " -> ".join(filtered_functions)

161 elif dbg_frame is not None:

162 fname = f"{current_file}:{dbg_frame.lineno}"

163

164 # assemble the message

165 msg: str

166 if exp is _NoExpPassed:

167 # if no expression is passed, just show location and counter value

168 msg = f"[ {fname} ] <dbg {_COUNTER}>"

169 _COUNTER += 1

170 else:

171 # if expression passed, format its value and show location, expr, and value

172 exp_val: str = formatter(exp) if formatter else repr(exp)

173 msg = f"[ {fname} ] {line_exp}{val_joiner}{exp_val}"

174

175 # print the message

176 print(

177 msg,

178 file=sys.stderr,

179 )

180

181 # return the expression itself

182 return exp

183

184

185# formatted `dbg_*` functions with their helpers

186

187DBG_TENSOR_ARRAY_SUMMARY_DEFAULTS: typing.Dict[

188 str, typing.Union[None, bool, int, str]

189] = dict(

190 fmt="unicode",

191 precision=2,

192 stats=True,

193 shape=True,

194 dtype=True,

195 device=True,

196 requires_grad=True,

197 sparkline=True,

198 sparkline_bins=7,

199 sparkline_logy=None, # None means auto-detect

200 colored=True,

201 eq_char="=",

202)

203

204

205DBG_TENSOR_VAL_JOINER: str = ": "

206

207

208def tensor_info(tensor: typing.Any) -> str:

209 from muutils.tensor_info import array_summary

210

211 return array_summary(tensor, **DBG_TENSOR_ARRAY_SUMMARY_DEFAULTS)

212

213

214DBG_DICT_DEFAULTS: typing.Dict[str, typing.Union[bool, int, str]] = dict(

215 key_types=True,

216 val_types=True,

217 max_len=32,

218 indent=" ",

219 max_depth=3,

220)

221

222DBG_LIST_DEFAULTS: typing.Dict[str, typing.Union[bool, int, str]] = dict(

223 max_len=16,

224 summary_show_types=True,

225)

226

227

228def list_info(

229 lst: typing.List[typing.Any],

230) -> str:

231 len_l: int = len(lst)

232 output: str

233 # TYPING: make `DBG_LIST_DEFAULTS` and the others typed dicts

234 if len_l > DBG_LIST_DEFAULTS["max_len"]: # type: ignore[operator]

235 output = f"<list of len()={len_l}"

236 if DBG_LIST_DEFAULTS["summary_show_types"]:

237 val_types: typing.Set[str] = set(type(x).__name__ for x in lst)

238 output += f", types={ {', '.join(sorted(val_types))}} "

239 output += ">"

240 else:

241 output = "[" + ", ".join(repr(x) for x in lst) + "]"

242

243 return output

244

245

246TENSOR_STR_TYPES: typing.Set[str] = {

247 "<class 'torch.Tensor'>",

248 "<class 'numpy.ndarray'>",

249}

250

251

252def dict_info(

253 d: typing.Dict[typing.Any, typing.Any],

254 depth: int = 0,

255) -> str:

256 len_d: int = len(d)

257 indent: str = DBG_DICT_DEFAULTS["indent"] # type: ignore[assignment]

258

259 # summary line

260 output: str = f"{indent*depth}<dict of len()={len_d}"

261

262 if DBG_DICT_DEFAULTS["key_types"] and len_d > 0:

263 key_types: typing.Set[str] = set(type(k).__name__ for k in d.keys())

264 key_types_str: str = "{" + ", ".join(sorted(key_types)) + "}"

265 output += f", key_types={key_types_str}"

266

267 if DBG_DICT_DEFAULTS["val_types"] and len_d > 0:

268 val_types: typing.Set[str] = set(type(v).__name__ for v in d.values())

269 val_types_str: str = "{" + ", ".join(sorted(val_types)) + "}"

270 output += f", val_types={val_types_str}"

271

272 output += ">"

273

274 # keys/values if not to deep and not too many

275 if depth < DBG_DICT_DEFAULTS["max_depth"]: # type: ignore[operator]

276 if len_d > 0 and len_d < DBG_DICT_DEFAULTS["max_len"]: # type: ignore[operator]

277 for k, v in d.items():

278 key_str: str = repr(k) if not isinstance(k, str) else k

279

280 val_str: str

281 val_type_str: str = str(type(v))

282 if isinstance(v, dict):

283 val_str = dict_info(v, depth + 1)

284 elif val_type_str in TENSOR_STR_TYPES:

285 val_str = tensor_info(v)

286 elif isinstance(v, list):

287 val_str = list_info(v)

288 else:

289 val_str = repr(v)

290

291 output += (

292 f"\n{indent*(depth+1)}{key_str}{DBG_TENSOR_VAL_JOINER}{val_str}"

293 )

294

295 return output

296

297

298def info_auto(

299 obj: typing.Any,

300) -> str:

301 """Automatically format an object for debugging."""

302 if isinstance(obj, dict):

303 return dict_info(obj)

304 elif isinstance(obj, list):

305 return list_info(obj)

306 elif str(type(obj)) in TENSOR_STR_TYPES:

307 return tensor_info(obj)

308 else:

309 return repr(obj)

310

311

312dbg_tensor = functools.partial(

313 dbg, formatter=tensor_info, val_joiner=DBG_TENSOR_VAL_JOINER

314)

315

316

317dbg_dict = functools.partial(dbg, formatter=dict_info, val_joiner=DBG_TENSOR_VAL_JOINER)

318

319dbg_auto = functools.partial(dbg, formatter=info_auto, val_joiner=DBG_TENSOR_VAL_JOINER)