Coverage for tests / unit / web / test_bundle_html.py: 100%

204 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-02-18 02:51 -0700

1from __future__ import annotations 

2 

3import io 

4import subprocess 

5import sys 

6import textwrap 

7import urllib.request 

8from pathlib import Path 

9 

10import pytest 

11 

12import muutils.web.bundle_html as bundle_html 

13 

14# ---------------------------------------------------------------- 

15# helper / fixtures 

16# ---------------------------------------------------------------- 

17 

18 

19@pytest.fixture() 

20def site(tmp_path: Path) -> dict[str, Path]: 

21 """Return paths for a tiny site with four asset types.""" 

22 css = tmp_path / "style.css" 

23 css.write_text("body { color: red; }") 

24 

25 js = tmp_path / "app.js" 

26 js.write_text("console.log('hi');") 

27 

28 svg = tmp_path / "icon.svg" 

29 svg.write_text("<svg><rect/></svg>") 

30 

31 png = tmp_path / "pic.png" 

32 png.write_bytes(b"\x89PNG\r\n\x1a\n") 

33 

34 html = tmp_path / "index.html" 

35 html.write_text( 

36 textwrap.dedent( 

37 """\ 

38 <!doctype html> 

39 <html> 

40 <head> 

41 <link rel="stylesheet" href="style.css" > 

42 <script src='app.js'></script> 

43 </head> 

44 <body> 

45 <img src="pic.png"> 

46 <svg> 

47 <use 

48 xlink:href = "icon.svg#i" /> 

49 </svg> 

50 </body> 

51 </html> 

52 """ 

53 ) 

54 ) 

55 

56 return { 

57 "root": tmp_path, 

58 "html": html, 

59 "css": css, 

60 "js": js, 

61 "svg": svg, 

62 "png": png, 

63 } 

64 

65 

66def _get(cfg_patch: dict | None = None) -> bundle_html.InlineConfig: 

67 """Return a config object with optional overrides.""" 

68 return bundle_html.InlineConfig(**(cfg_patch or {})) 

69 

70 

71def _inline( 

72 text: str, 

73 base: Path, 

74 cfg_patch: dict | None = None, 

75) -> str: 

76 """Run the bundle_html with local/remote switches.""" 

77 cfg = _get(cfg_patch) 

78 return bundle_html.inline_html_assets(text, base_path=base, config=cfg) 

79 

80 

81def _has_b64_fragment(html: str, mime: str) -> bool: 

82 """Return True if a data URI for *mime* is present.""" 

83 return f"data:{mime};base64," in html 

84 

85 

86# ---------------------------------------------------------------- 

87# core behaviour (regex mode) 

88# ---------------------------------------------------------------- 

89 

90 

91def test_all_assets_inlined_regex(site: dict[str, Path]) -> None: 

92 html_raw = site["html"].read_text() 

93 out = _inline(html_raw, site["root"]) 

94 print(out) 

95 

96 assert "<style>" in out and "</style>" in out 

97 assert "<script>" in out and "</script>" in out 

98 assert _has_b64_fragment(out, "image/png") 

99 assert _has_b64_fragment(out, "image/svg+xml") 

100 assert "<!-- begin 'style.css' -->" in out 

101 assert "<!-- end 'app.js' -->" in out 

102 

103 

104def test_indentation_preserved(site: dict[str, Path]) -> None: 

105 html_raw = site["html"].read_text() 

106 out = _inline(html_raw, site["root"]) 

107 # original line had 4 spaces indent 

108 assert "\n <style>" in out or "\n <script>" in out 

109 

110 

111def test_skip_large_file(site: dict[str, Path]) -> None: 

112 big = site["root"] / "large.js" 

113 big.write_bytes(b"x" * 200_000) 

114 site["html"].write_text( 

115 site["html"] 

116 .read_text() 

117 .replace("</body>", '<script src="large.js"></script>\n</body>') 

118 ) 

119 out = _inline(site["html"].read_text(), site["root"], cfg_patch={"max_bytes": 1024}) 

120 assert '<script src="large.js"></script>' in out 

121 

122 

123def test_allowed_extensions_filter(site: dict[str, Path]) -> None: 

124 out = _inline( 

125 site["html"].read_text(), 

126 site["root"], 

127 cfg_patch={"allowed_extensions": {".css"}}, 

128 ) 

129 assert "<style>" in out and "<script src='app.js'>" in out 

130 assert not _has_b64_fragment(out, "image/png") 

131 

132 

133def test_comment_toggle(site: dict[str, Path]) -> None: 

134 out = _inline( 

135 site["html"].read_text(), 

136 site["root"], 

137 cfg_patch={"include_filename_comments": False}, 

138 ) 

139 assert "begin 'style.css'" not in out 

140 

141 

142def test_local_off_remote_off(site: dict[str, Path]) -> None: 

143 out = _inline( 

144 site["html"].read_text(), 

145 site["root"], 

146 cfg_patch={"local": False, "remote": False}, 

147 ) 

148 # nothing should change 

149 assert '<link rel="stylesheet" href="style.css" >' in out 

150 

151 

152# ---------------------------------------------------------------- 

153# remote asset handling 

154# ---------------------------------------------------------------- 

155 

156 

157def test_remote_fetch_allowed( 

158 monkeypatch: pytest.MonkeyPatch, site: dict[str, Path] 

159) -> None: 

160 remote_css = "https://cdn/foo.css" 

161 site["html"].write_text(site["html"].read_text().replace("style.css", remote_css)) 

162 

163 class FakeResp(io.BytesIO): 

164 def __enter__(self): # type: ignore[override] 

165 return self 

166 

167 def __exit__(self, *exc): # type: ignore[override] 

168 pass 

169 

170 def fake_open(url: str, *a, **k): # type: ignore[override] 

171 assert url == remote_css 

172 return FakeResp(b"body{background:blue;}") 

173 

174 monkeypatch.setattr(urllib.request, "urlopen", fake_open) 

175 out = _inline(site["html"].read_text(), site["root"], cfg_patch={"remote": True}) 

176 assert "<style>" in out and "background:blue" in out 

177 

178 

179def test_remote_disallowed( 

180 monkeypatch: pytest.MonkeyPatch, site: dict[str, Path] 

181) -> None: 

182 remote_css = "https://cdn/foo.css" 

183 site["html"].write_text(site["html"].read_text().replace("style.css", remote_css)) 

184 monkeypatch.setattr(urllib.request, "urlopen", lambda *a, **k: io.BytesIO(b"")) 

185 out = _inline(site["html"].read_text(), site["root"]) # default remote=False 

186 assert f'href="{remote_css}"' in out # untouched link remains 

187 

188 

189# ---------------------------------------------------------------- 

190# bs4 mode parity checks 

191# ---------------------------------------------------------------- 

192 

193 

194def test_bs4_matches_regex(site: dict[str, Path]) -> None: 

195 raw = site["html"].read_text() 

196 out_regex = _inline(raw, site["root"]) 

197 out_bs4 = _inline(raw, site["root"], {"use_bs4": True}) 

198 assert "<style>" in out_bs4 and "<style>" in out_regex 

199 assert _has_b64_fragment(out_bs4, "image/png") 

200 assert "<!-- begin 'style.css' -->" in out_bs4 

201 

202 

203def test_prettify_flag_bs4(site: dict[str, Path]) -> None: 

204 raw = site["html"].read_text() 

205 cfg = _get({"use_bs4": True}) 

206 pretty = bundle_html.inline_html_assets( 

207 raw, base_path=site["root"], config=cfg, prettify=True 

208 ) 

209 # prettified soup always starts with <!DOCTYPE or <html ...> on its own line 

210 assert pretty.lstrip().lower().startswith("<!doctype") 

211 

212 

213# ---------------------------------------------------------------- 

214# tag_attr override 

215# ---------------------------------------------------------------- 

216 

217 

218def test_tag_attr_override(site: dict[str, Path]) -> None: 

219 site["html"].write_text(site["html"].read_text().replace("href", "data-href")) 

220 cfg = _get({"tag_attr": {"link": "data-href"}}) 

221 out = bundle_html.inline_html_assets( 

222 site["html"].read_text(), base_path=site["root"], config=cfg 

223 ) 

224 assert "<style>" in out 

225 

226 

227# ---------------------------------------------------------------- 

228# CLI integration (subprocess) 

229# ---------------------------------------------------------------- 

230 

231 

232def test_cli_smoke(tmp_path: Path, site: dict[str, Path]) -> None: 

233 html_copy = tmp_path / "page.html" 

234 html_copy.write_text(site["html"].read_text()) 

235 assert bundle_html.__file__ is not None 

236 exe = Path(bundle_html.__file__).resolve() 

237 subprocess.check_call( 

238 [sys.executable, str(exe), str(html_copy), "--output", str(html_copy)] 

239 ) 

240 

241 text = html_copy.read_text() 

242 assert "<style>" in text and "data:image/png;base64," in text 

243 

244 

245@pytest.fixture() 

246def tiny_site(tmp_path: Path) -> dict[str, Path]: 

247 """Create a minimal web-site with one asset of each supported type.""" 

248 css = tmp_path / "style.css" 

249 css.write_text("body { color: red; }") 

250 

251 js = tmp_path / "app.js" 

252 js.write_text("console.log('hi');") 

253 

254 svg = tmp_path / "icon.svg" 

255 svg.write_text("<svg><rect/></svg>") 

256 

257 png = tmp_path / "pic.png" 

258 png.write_bytes(b"\x89PNG\r\n\x1a\n") # PNG header only 

259 

260 html = tmp_path / "index.html" 

261 html.write_text( 

262 textwrap.dedent( 

263 """ 

264 <!doctype html> 

265 <html> 

266 <head> 

267 <link rel="stylesheet" href="style.css"> 

268 <script src="app.js"></script> 

269 </head> 

270 <body> 

271 <img src="pic.png"> 

272 <svg> 

273 <use xlink:href="icon.svg#i"></use> 

274 </svg> 

275 </body> 

276 </html> 

277 """ 

278 ) 

279 ) 

280 return { 

281 "root": tmp_path, 

282 "html": html, 

283 "css": css, 

284 "js": js, 

285 "svg": svg, 

286 "png": png, 

287 } 

288 

289 

290# utilities ----------------------------------------------------------- 

291 

292 

293def _b64_in(html: str, mime: str) -> bool: 

294 return f"data:{mime};base64," in html 

295 

296 

297# regex-mode tests ---------------------------------------------------- 

298 

299 

300def test_inline_everything_regex(tiny_site: dict[str, Path]) -> None: 

301 out = _inline(tiny_site["html"].read_text(), tiny_site["root"]) 

302 assert "<style>" in out 

303 assert "<script>" in out 

304 assert _b64_in(out, "image/png") 

305 assert _b64_in(out, "image/svg+xml") 

306 assert "<!-- begin 'style.css' -->" in out 

307 assert "<!-- end 'app.js' -->" in out 

308 

309 

310def test_indentation_preserved_2(tiny_site: dict[str, Path]) -> None: 

311 out = _inline(tiny_site["html"].read_text(), tiny_site["root"]) 

312 assert "\n <style>" in out or "\n <script>" in out 

313 

314 

315def test_max_bytes_limit(tiny_site: dict[str, Path]) -> None: 

316 big_js = tiny_site["root"] / "big.js" 

317 big_js.write_bytes(b"x" * 200_000) 

318 tiny_site["html"].write_text( 

319 tiny_site["html"] 

320 .read_text() 

321 .replace("</body>", '<script src="big.js"></script>\n</body>') 

322 ) 

323 out = _inline( 

324 tiny_site["html"].read_text(), 

325 tiny_site["root"], 

326 {"max_bytes": 1_024}, 

327 ) 

328 assert '<script src="big.js"></script>' in out 

329 

330 

331def test_allowed_extensions_filter_2(tiny_site: dict[str, Path]) -> None: 

332 out = _inline( 

333 tiny_site["html"].read_text(), 

334 tiny_site["root"], 

335 {"allowed_extensions": {".css"}}, 

336 ) 

337 assert "<style>" in out 

338 assert '<script src="app.js">' in out 

339 assert not _b64_in(out, "image/png") 

340 

341 

342def test_comment_toggle_off(tiny_site: dict[str, Path]) -> None: 

343 out = _inline( 

344 tiny_site["html"].read_text(), 

345 tiny_site["root"], 

346 {"include_filename_comments": False}, 

347 ) 

348 assert "begin 'style.css'" not in out 

349 

350 

351def test_disable_local_and_remote(tiny_site: dict[str, Path]) -> None: 

352 out = _inline( 

353 tiny_site["html"].read_text(), 

354 tiny_site["root"], 

355 {"local": False, "remote": False}, 

356 ) 

357 assert 'href="style.css"' in out 

358 

359 

360# remote fetch -------------------------------------------------------- 

361 

362 

363def test_remote_asset_inlined( 

364 monkeypatch: pytest.MonkeyPatch, tiny_site: dict[str, Path] 

365) -> None: 

366 remote_css = "https://cdn.example.com/remote.css" 

367 tiny_site["html"].write_text( 

368 tiny_site["html"].read_text().replace("style.css", remote_css) 

369 ) 

370 

371 class _Resp(io.BytesIO): 

372 def __enter__(self): # type: ignore[override] 

373 return self 

374 

375 def __exit__(self, *exc): # type: ignore[override] 

376 pass 

377 

378 def fake_open(url: str, *a, **k): # type: ignore[override] 

379 assert url == remote_css 

380 return _Resp(b"body{background:blue;}") 

381 

382 monkeypatch.setattr(urllib.request, "urlopen", fake_open) 

383 out = _inline(tiny_site["html"].read_text(), tiny_site["root"], {"remote": True}) 

384 assert "background:blue" in out 

385 

386 

387def test_remote_blocked( 

388 monkeypatch: pytest.MonkeyPatch, tiny_site: dict[str, Path] 

389) -> None: 

390 remote_css = "https://cdn.example.com/remote.css" 

391 tiny_site["html"].write_text( 

392 tiny_site["html"].read_text().replace("style.css", remote_css) 

393 ) 

394 monkeypatch.setattr(urllib.request, "urlopen", lambda *a, **k: io.BytesIO(b"")) 

395 out = _inline(tiny_site["html"].read_text(), tiny_site["root"]) 

396 assert f'href="{remote_css}"' in out 

397 

398 

399# bs4 parity ---------------------------------------------------------- 

400 

401 

402def test_bs4_equals_regex(tiny_site: dict[str, Path]) -> None: 

403 raw = tiny_site["html"].read_text() 

404 out_regex = _inline(raw, tiny_site["root"]) 

405 out_bs4 = _inline(raw, tiny_site["root"], {"use_bs4": True}) 

406 assert "<style>" in out_bs4 and "<style>" in out_regex 

407 assert _b64_in(out_bs4, "image/png") 

408 assert "<!-- begin 'style.css' -->" in out_bs4 

409 

410 

411def test_bs4_prettify(tiny_site: dict[str, Path]) -> None: 

412 cfg = bundle_html.InlineConfig(use_bs4=True) 

413 pretty = bundle_html.inline_html_assets( 

414 tiny_site["html"].read_text(), 

415 base_path=tiny_site["root"], 

416 config=cfg, 

417 prettify=True, 

418 ) 

419 assert pretty.lstrip().lower().startswith("<!doctype") 

420 

421 

422# tag-attr override --------------------------------------------------- 

423 

424 

425def test_custom_attribute_name(tiny_site: dict[str, Path]) -> None: 

426 tiny_site["html"].write_text( 

427 tiny_site["html"].read_text().replace("href", "data-href") 

428 ) 

429 cfg = {"tag_attr": {"link": "data-href"}} 

430 out = _inline(tiny_site["html"].read_text(), tiny_site["root"], cfg) 

431 assert "<style>" in out 

432 

433 

434# data-uri idempotence ----------------------------------------------- 

435 

436 

437def test_data_uri_not_reprocessed(tiny_site: dict[str, Path]) -> None: 

438 # first inline to get data URIs 

439 once = _inline(tiny_site["html"].read_text(), tiny_site["root"]) 

440 # second pass should leave them unchanged 

441 twice = _inline(once, tiny_site["root"]) 

442 assert twice == once 

443 

444 

445# mixed quotes -------------------------------------------------------- 

446 

447 

448def test_single_quotes_handled(tiny_site: dict[str, Path]) -> None: 

449 tiny_site["html"].write_text( 

450 tiny_site["html"] 

451 .read_text() 

452 .replace('"app.js"', "'app.js'") # mix quote styles 

453 ) 

454 out = _inline(tiny_site["html"].read_text(), tiny_site["root"]) 

455 assert "<script>" in out 

456 

457 

458# fragment ids -------------------------------------------------------- 

459 

460 

461def test_fragment_in_src_kept(tiny_site: dict[str, Path]) -> None: 

462 # icon.svg#i should be replaced by a data URI but the #i fragment removed 

463 out = _inline(tiny_site["html"].read_text(), tiny_site["root"]) 

464 assert "#i" not in out 

465 assert _b64_in(out, "image/svg+xml") 

466 

467 

468# CLI ----------------------------------------------------------------- 

469 

470 

471def test_cli_overwrite(tmp_path: Path, tiny_site: dict[str, Path]) -> None: 

472 copy = tmp_path / "page.html" 

473 copy.write_text(tiny_site["html"].read_text()) 

474 assert bundle_html.__file__ is not None 

475 exe = Path(bundle_html.__file__).resolve() 

476 subprocess.check_call([sys.executable, str(exe), str(copy), "--output", str(copy)]) 

477 res = copy.read_text() 

478 assert "<style>" in res 

479 assert _b64_in(res, "image/png")