Coverage for tests/unit/web/test_bundle_html.py: 100%

202 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2025-06-06 11:28 -0600

1from __future__ import annotations 

2 

3import io 

4import subprocess 

5import sys 

6import textwrap 

7import urllib.request 

8from pathlib import Path 

9 

10import pytest 

11 

12import muutils.web.bundle_html as bundle_html 

13 

14# ---------------------------------------------------------------- 

15# helper / fixtures 

16# ---------------------------------------------------------------- 

17 

18 

19@pytest.fixture() 

20def site(tmp_path: Path) -> dict[str, Path]: 

21 """Return paths for a tiny site with four asset types.""" 

22 css = tmp_path / "style.css" 

23 css.write_text("body { color: red; }") 

24 

25 js = tmp_path / "app.js" 

26 js.write_text("console.log('hi');") 

27 

28 svg = tmp_path / "icon.svg" 

29 svg.write_text("<svg><rect/></svg>") 

30 

31 png = tmp_path / "pic.png" 

32 png.write_bytes(b"\x89PNG\r\n\x1a\n") 

33 

34 html = tmp_path / "index.html" 

35 html.write_text( 

36 textwrap.dedent( 

37 """\ 

38 <!doctype html> 

39 <html> 

40 <head> 

41 <link rel="stylesheet" href="style.css" > 

42 <script src='app.js'></script> 

43 </head> 

44 <body> 

45 <img src="pic.png"> 

46 <svg> 

47 <use 

48 xlink:href = "icon.svg#i" /> 

49 </svg> 

50 </body> 

51 </html> 

52 """ 

53 ) 

54 ) 

55 

56 return { 

57 "root": tmp_path, 

58 "html": html, 

59 "css": css, 

60 "js": js, 

61 "svg": svg, 

62 "png": png, 

63 } 

64 

65 

66def _get(cfg_patch: dict | None = None) -> bundle_html.InlineConfig: 

67 """Return a config object with optional overrides.""" 

68 return bundle_html.InlineConfig(**(cfg_patch or {})) 

69 

70 

71def _inline( 

72 text: str, 

73 base: Path, 

74 cfg_patch: dict | None = None, 

75) -> str: 

76 """Run the bundle_html with local/remote switches.""" 

77 cfg = _get(cfg_patch) 

78 return bundle_html.inline_html_assets(text, base_path=base, config=cfg) 

79 

80 

81def _has_b64_fragment(html: str, mime: str) -> bool: 

82 """Return True if a data URI for *mime* is present.""" 

83 return f"data:{mime};base64," in html 

84 

85 

86# ---------------------------------------------------------------- 

87# core behaviour (regex mode) 

88# ---------------------------------------------------------------- 

89 

90 

91def test_all_assets_inlined_regex(site: dict[str, Path]) -> None: 

92 html_raw = site["html"].read_text() 

93 out = _inline(html_raw, site["root"]) 

94 print(out) 

95 

96 assert "<style>" in out and "</style>" in out 

97 assert "<script>" in out and "</script>" in out 

98 assert _has_b64_fragment(out, "image/png") 

99 assert _has_b64_fragment(out, "image/svg+xml") 

100 assert "<!-- begin 'style.css' -->" in out 

101 assert "<!-- end 'app.js' -->" in out 

102 

103 

104def test_indentation_preserved(site: dict[str, Path]) -> None: 

105 html_raw = site["html"].read_text() 

106 out = _inline(html_raw, site["root"]) 

107 # original line had 4 spaces indent 

108 assert "\n <style>" in out or "\n <script>" in out 

109 

110 

111def test_skip_large_file(site: dict[str, Path]) -> None: 

112 big = site["root"] / "large.js" 

113 big.write_bytes(b"x" * 200_000) 

114 site["html"].write_text( 

115 site["html"] 

116 .read_text() 

117 .replace("</body>", '<script src="large.js"></script>\n</body>') 

118 ) 

119 out = _inline(site["html"].read_text(), site["root"], cfg_patch={"max_bytes": 1024}) 

120 assert '<script src="large.js"></script>' in out 

121 

122 

123def test_allowed_extensions_filter(site: dict[str, Path]) -> None: 

124 out = _inline( 

125 site["html"].read_text(), 

126 site["root"], 

127 cfg_patch={"allowed_extensions": {".css"}}, 

128 ) 

129 assert "<style>" in out and "<script src='app.js'>" in out 

130 assert not _has_b64_fragment(out, "image/png") 

131 

132 

133def test_comment_toggle(site: dict[str, Path]) -> None: 

134 out = _inline( 

135 site["html"].read_text(), 

136 site["root"], 

137 cfg_patch={"include_filename_comments": False}, 

138 ) 

139 assert "begin 'style.css'" not in out 

140 

141 

142def test_local_off_remote_off(site: dict[str, Path]) -> None: 

143 out = _inline( 

144 site["html"].read_text(), 

145 site["root"], 

146 cfg_patch={"local": False, "remote": False}, 

147 ) 

148 # nothing should change 

149 assert '<link rel="stylesheet" href="style.css" >' in out 

150 

151 

152# ---------------------------------------------------------------- 

153# remote asset handling 

154# ---------------------------------------------------------------- 

155 

156 

157def test_remote_fetch_allowed( 

158 monkeypatch: pytest.MonkeyPatch, site: dict[str, Path] 

159) -> None: 

160 remote_css = "https://cdn/foo.css" 

161 site["html"].write_text(site["html"].read_text().replace("style.css", remote_css)) 

162 

163 class FakeResp(io.BytesIO): 

164 def __enter__(self): # type: ignore[override] 

165 return self 

166 

167 def __exit__(self, *exc): # type: ignore[override] 

168 pass 

169 

170 def fake_open(url: str, *a, **k): # type: ignore[override] 

171 assert url == remote_css 

172 return FakeResp(b"body{background:blue;}") 

173 

174 monkeypatch.setattr(urllib.request, "urlopen", fake_open) 

175 out = _inline(site["html"].read_text(), site["root"], cfg_patch={"remote": True}) 

176 assert "<style>" in out and "background:blue" in out 

177 

178 

179def test_remote_disallowed( 

180 monkeypatch: pytest.MonkeyPatch, site: dict[str, Path] 

181) -> None: 

182 remote_css = "https://cdn/foo.css" 

183 site["html"].write_text(site["html"].read_text().replace("style.css", remote_css)) 

184 monkeypatch.setattr(urllib.request, "urlopen", lambda *a, **k: io.BytesIO(b"")) 

185 out = _inline(site["html"].read_text(), site["root"]) # default remote=False 

186 assert f'href="{remote_css}"' in out # untouched link remains 

187 

188 

189# ---------------------------------------------------------------- 

190# bs4 mode parity checks 

191# ---------------------------------------------------------------- 

192 

193 

194def test_bs4_matches_regex(site: dict[str, Path]) -> None: 

195 raw = site["html"].read_text() 

196 out_regex = _inline(raw, site["root"]) 

197 out_bs4 = _inline(raw, site["root"], {"use_bs4": True}) 

198 assert "<style>" in out_bs4 and "<style>" in out_regex 

199 assert _has_b64_fragment(out_bs4, "image/png") 

200 assert "<!-- begin 'style.css' -->" in out_bs4 

201 

202 

203def test_prettify_flag_bs4(site: dict[str, Path]) -> None: 

204 raw = site["html"].read_text() 

205 cfg = _get({"use_bs4": True}) 

206 pretty = bundle_html.inline_html_assets( 

207 raw, base_path=site["root"], config=cfg, prettify=True 

208 ) 

209 # prettified soup always starts with <!DOCTYPE or <html ...> on its own line 

210 assert pretty.lstrip().lower().startswith("<!doctype") 

211 

212 

213# ---------------------------------------------------------------- 

214# tag_attr override 

215# ---------------------------------------------------------------- 

216 

217 

218def test_tag_attr_override(site: dict[str, Path]) -> None: 

219 site["html"].write_text(site["html"].read_text().replace("href", "data-href")) 

220 cfg = _get({"tag_attr": {"link": "data-href"}}) 

221 out = bundle_html.inline_html_assets( 

222 site["html"].read_text(), base_path=site["root"], config=cfg 

223 ) 

224 assert "<style>" in out 

225 

226 

227# ---------------------------------------------------------------- 

228# CLI integration (subprocess) 

229# ---------------------------------------------------------------- 

230 

231 

232def test_cli_smoke(tmp_path: Path, site: dict[str, Path]) -> None: 

233 html_copy = tmp_path / "page.html" 

234 html_copy.write_text(site["html"].read_text()) 

235 exe = Path(bundle_html.__file__).resolve() 

236 subprocess.check_call( 

237 [sys.executable, str(exe), str(html_copy), "--output", str(html_copy)] 

238 ) 

239 

240 text = html_copy.read_text() 

241 assert "<style>" in text and "data:image/png;base64," in text 

242 

243 

244@pytest.fixture() 

245def tiny_site(tmp_path: Path) -> dict[str, Path]: 

246 """Create a minimal web-site with one asset of each supported type.""" 

247 css = tmp_path / "style.css" 

248 css.write_text("body { color: red; }") 

249 

250 js = tmp_path / "app.js" 

251 js.write_text("console.log('hi');") 

252 

253 svg = tmp_path / "icon.svg" 

254 svg.write_text("<svg><rect/></svg>") 

255 

256 png = tmp_path / "pic.png" 

257 png.write_bytes(b"\x89PNG\r\n\x1a\n") # PNG header only 

258 

259 html = tmp_path / "index.html" 

260 html.write_text( 

261 textwrap.dedent( 

262 """ 

263 <!doctype html> 

264 <html> 

265 <head> 

266 <link rel="stylesheet" href="style.css"> 

267 <script src="app.js"></script> 

268 </head> 

269 <body> 

270 <img src="pic.png"> 

271 <svg> 

272 <use xlink:href="icon.svg#i"></use> 

273 </svg> 

274 </body> 

275 </html> 

276 """ 

277 ) 

278 ) 

279 return { 

280 "root": tmp_path, 

281 "html": html, 

282 "css": css, 

283 "js": js, 

284 "svg": svg, 

285 "png": png, 

286 } 

287 

288 

289# utilities ----------------------------------------------------------- 

290 

291 

292def _b64_in(html: str, mime: str) -> bool: 

293 return f"data:{mime};base64," in html 

294 

295 

296# regex-mode tests ---------------------------------------------------- 

297 

298 

299def test_inline_everything_regex(tiny_site: dict[str, Path]) -> None: 

300 out = _inline(tiny_site["html"].read_text(), tiny_site["root"]) 

301 assert "<style>" in out 

302 assert "<script>" in out 

303 assert _b64_in(out, "image/png") 

304 assert _b64_in(out, "image/svg+xml") 

305 assert "<!-- begin 'style.css' -->" in out 

306 assert "<!-- end 'app.js' -->" in out 

307 

308 

309def test_indentation_preserved_2(tiny_site: dict[str, Path]) -> None: 

310 out = _inline(tiny_site["html"].read_text(), tiny_site["root"]) 

311 assert "\n <style>" in out or "\n <script>" in out 

312 

313 

314def test_max_bytes_limit(tiny_site: dict[str, Path]) -> None: 

315 big_js = tiny_site["root"] / "big.js" 

316 big_js.write_bytes(b"x" * 200_000) 

317 tiny_site["html"].write_text( 

318 tiny_site["html"] 

319 .read_text() 

320 .replace("</body>", '<script src="big.js"></script>\n</body>') 

321 ) 

322 out = _inline( 

323 tiny_site["html"].read_text(), 

324 tiny_site["root"], 

325 {"max_bytes": 1_024}, 

326 ) 

327 assert '<script src="big.js"></script>' in out 

328 

329 

330def test_allowed_extensions_filter_2(tiny_site: dict[str, Path]) -> None: 

331 out = _inline( 

332 tiny_site["html"].read_text(), 

333 tiny_site["root"], 

334 {"allowed_extensions": {".css"}}, 

335 ) 

336 assert "<style>" in out 

337 assert '<script src="app.js">' in out 

338 assert not _b64_in(out, "image/png") 

339 

340 

341def test_comment_toggle_off(tiny_site: dict[str, Path]) -> None: 

342 out = _inline( 

343 tiny_site["html"].read_text(), 

344 tiny_site["root"], 

345 {"include_filename_comments": False}, 

346 ) 

347 assert "begin 'style.css'" not in out 

348 

349 

350def test_disable_local_and_remote(tiny_site: dict[str, Path]) -> None: 

351 out = _inline( 

352 tiny_site["html"].read_text(), 

353 tiny_site["root"], 

354 {"local": False, "remote": False}, 

355 ) 

356 assert 'href="style.css"' in out 

357 

358 

359# remote fetch -------------------------------------------------------- 

360 

361 

362def test_remote_asset_inlined( 

363 monkeypatch: pytest.MonkeyPatch, tiny_site: dict[str, Path] 

364) -> None: 

365 remote_css = "https://cdn.example.com/remote.css" 

366 tiny_site["html"].write_text( 

367 tiny_site["html"].read_text().replace("style.css", remote_css) 

368 ) 

369 

370 class _Resp(io.BytesIO): 

371 def __enter__(self): # type: ignore[override] 

372 return self 

373 

374 def __exit__(self, *exc): # type: ignore[override] 

375 pass 

376 

377 def fake_open(url: str, *a, **k): # type: ignore[override] 

378 assert url == remote_css 

379 return _Resp(b"body{background:blue;}") 

380 

381 monkeypatch.setattr(urllib.request, "urlopen", fake_open) 

382 out = _inline(tiny_site["html"].read_text(), tiny_site["root"], {"remote": True}) 

383 assert "background:blue" in out 

384 

385 

386def test_remote_blocked( 

387 monkeypatch: pytest.MonkeyPatch, tiny_site: dict[str, Path] 

388) -> None: 

389 remote_css = "https://cdn.example.com/remote.css" 

390 tiny_site["html"].write_text( 

391 tiny_site["html"].read_text().replace("style.css", remote_css) 

392 ) 

393 monkeypatch.setattr(urllib.request, "urlopen", lambda *a, **k: io.BytesIO(b"")) 

394 out = _inline(tiny_site["html"].read_text(), tiny_site["root"]) 

395 assert f'href="{remote_css}"' in out 

396 

397 

398# bs4 parity ---------------------------------------------------------- 

399 

400 

401def test_bs4_equals_regex(tiny_site: dict[str, Path]) -> None: 

402 raw = tiny_site["html"].read_text() 

403 out_regex = _inline(raw, tiny_site["root"]) 

404 out_bs4 = _inline(raw, tiny_site["root"], {"use_bs4": True}) 

405 assert "<style>" in out_bs4 and "<style>" in out_regex 

406 assert _b64_in(out_bs4, "image/png") 

407 assert "<!-- begin 'style.css' -->" in out_bs4 

408 

409 

410def test_bs4_prettify(tiny_site: dict[str, Path]) -> None: 

411 cfg = bundle_html.InlineConfig(use_bs4=True) 

412 pretty = bundle_html.inline_html_assets( 

413 tiny_site["html"].read_text(), 

414 base_path=tiny_site["root"], 

415 config=cfg, 

416 prettify=True, 

417 ) 

418 assert pretty.lstrip().lower().startswith("<!doctype") 

419 

420 

421# tag-attr override --------------------------------------------------- 

422 

423 

424def test_custom_attribute_name(tiny_site: dict[str, Path]) -> None: 

425 tiny_site["html"].write_text( 

426 tiny_site["html"].read_text().replace("href", "data-href") 

427 ) 

428 cfg = {"tag_attr": {"link": "data-href"}} 

429 out = _inline(tiny_site["html"].read_text(), tiny_site["root"], cfg) 

430 assert "<style>" in out 

431 

432 

433# data-uri idempotence ----------------------------------------------- 

434 

435 

436def test_data_uri_not_reprocessed(tiny_site: dict[str, Path]) -> None: 

437 # first inline to get data URIs 

438 once = _inline(tiny_site["html"].read_text(), tiny_site["root"]) 

439 # second pass should leave them unchanged 

440 twice = _inline(once, tiny_site["root"]) 

441 assert twice == once 

442 

443 

444# mixed quotes -------------------------------------------------------- 

445 

446 

447def test_single_quotes_handled(tiny_site: dict[str, Path]) -> None: 

448 tiny_site["html"].write_text( 

449 tiny_site["html"] 

450 .read_text() 

451 .replace('"app.js"', "'app.js'") # mix quote styles 

452 ) 

453 out = _inline(tiny_site["html"].read_text(), tiny_site["root"]) 

454 assert "<script>" in out 

455 

456 

457# fragment ids -------------------------------------------------------- 

458 

459 

460def test_fragment_in_src_kept(tiny_site: dict[str, Path]) -> None: 

461 # icon.svg#i should be replaced by a data URI but the #i fragment removed 

462 out = _inline(tiny_site["html"].read_text(), tiny_site["root"]) 

463 assert "#i" not in out 

464 assert _b64_in(out, "image/svg+xml") 

465 

466 

467# CLI ----------------------------------------------------------------- 

468 

469 

470def test_cli_overwrite(tmp_path: Path, tiny_site: dict[str, Path]) -> None: 

471 copy = tmp_path / "page.html" 

472 copy.write_text(tiny_site["html"].read_text()) 

473 exe = Path(bundle_html.__file__).resolve() 

474 subprocess.check_call([sys.executable, str(exe), str(copy), "--output", str(copy)]) 

475 res = copy.read_text() 

476 assert "<style>" in res 

477 assert _b64_in(res, "image/png")