Coverage for tests/unit/web/test_bundle_html.py: 100%
202 statements
« prev ^ index » next coverage.py v7.6.1, created at 2025-06-06 11:28 -0600
« prev ^ index » next coverage.py v7.6.1, created at 2025-06-06 11:28 -0600
1from __future__ import annotations
3import io
4import subprocess
5import sys
6import textwrap
7import urllib.request
8from pathlib import Path
10import pytest
12import muutils.web.bundle_html as bundle_html
14# ----------------------------------------------------------------
15# helper / fixtures
16# ----------------------------------------------------------------
19@pytest.fixture()
20def site(tmp_path: Path) -> dict[str, Path]:
21 """Return paths for a tiny site with four asset types."""
22 css = tmp_path / "style.css"
23 css.write_text("body { color: red; }")
25 js = tmp_path / "app.js"
26 js.write_text("console.log('hi');")
28 svg = tmp_path / "icon.svg"
29 svg.write_text("<svg><rect/></svg>")
31 png = tmp_path / "pic.png"
32 png.write_bytes(b"\x89PNG\r\n\x1a\n")
34 html = tmp_path / "index.html"
35 html.write_text(
36 textwrap.dedent(
37 """\
38 <!doctype html>
39 <html>
40 <head>
41 <link rel="stylesheet" href="style.css" >
42 <script src='app.js'></script>
43 </head>
44 <body>
45 <img src="pic.png">
46 <svg>
47 <use
48 xlink:href = "icon.svg#i" />
49 </svg>
50 </body>
51 </html>
52 """
53 )
54 )
56 return {
57 "root": tmp_path,
58 "html": html,
59 "css": css,
60 "js": js,
61 "svg": svg,
62 "png": png,
63 }
66def _get(cfg_patch: dict | None = None) -> bundle_html.InlineConfig:
67 """Return a config object with optional overrides."""
68 return bundle_html.InlineConfig(**(cfg_patch or {}))
71def _inline(
72 text: str,
73 base: Path,
74 cfg_patch: dict | None = None,
75) -> str:
76 """Run the bundle_html with local/remote switches."""
77 cfg = _get(cfg_patch)
78 return bundle_html.inline_html_assets(text, base_path=base, config=cfg)
81def _has_b64_fragment(html: str, mime: str) -> bool:
82 """Return True if a data URI for *mime* is present."""
83 return f"data:{mime};base64," in html
86# ----------------------------------------------------------------
87# core behaviour (regex mode)
88# ----------------------------------------------------------------
91def test_all_assets_inlined_regex(site: dict[str, Path]) -> None:
92 html_raw = site["html"].read_text()
93 out = _inline(html_raw, site["root"])
94 print(out)
96 assert "<style>" in out and "</style>" in out
97 assert "<script>" in out and "</script>" in out
98 assert _has_b64_fragment(out, "image/png")
99 assert _has_b64_fragment(out, "image/svg+xml")
100 assert "<!-- begin 'style.css' -->" in out
101 assert "<!-- end 'app.js' -->" in out
104def test_indentation_preserved(site: dict[str, Path]) -> None:
105 html_raw = site["html"].read_text()
106 out = _inline(html_raw, site["root"])
107 # original line had 4 spaces indent
108 assert "\n <style>" in out or "\n <script>" in out
111def test_skip_large_file(site: dict[str, Path]) -> None:
112 big = site["root"] / "large.js"
113 big.write_bytes(b"x" * 200_000)
114 site["html"].write_text(
115 site["html"]
116 .read_text()
117 .replace("</body>", '<script src="large.js"></script>\n</body>')
118 )
119 out = _inline(site["html"].read_text(), site["root"], cfg_patch={"max_bytes": 1024})
120 assert '<script src="large.js"></script>' in out
123def test_allowed_extensions_filter(site: dict[str, Path]) -> None:
124 out = _inline(
125 site["html"].read_text(),
126 site["root"],
127 cfg_patch={"allowed_extensions": {".css"}},
128 )
129 assert "<style>" in out and "<script src='app.js'>" in out
130 assert not _has_b64_fragment(out, "image/png")
133def test_comment_toggle(site: dict[str, Path]) -> None:
134 out = _inline(
135 site["html"].read_text(),
136 site["root"],
137 cfg_patch={"include_filename_comments": False},
138 )
139 assert "begin 'style.css'" not in out
142def test_local_off_remote_off(site: dict[str, Path]) -> None:
143 out = _inline(
144 site["html"].read_text(),
145 site["root"],
146 cfg_patch={"local": False, "remote": False},
147 )
148 # nothing should change
149 assert '<link rel="stylesheet" href="style.css" >' in out
152# ----------------------------------------------------------------
153# remote asset handling
154# ----------------------------------------------------------------
157def test_remote_fetch_allowed(
158 monkeypatch: pytest.MonkeyPatch, site: dict[str, Path]
159) -> None:
160 remote_css = "https://cdn/foo.css"
161 site["html"].write_text(site["html"].read_text().replace("style.css", remote_css))
163 class FakeResp(io.BytesIO):
164 def __enter__(self): # type: ignore[override]
165 return self
167 def __exit__(self, *exc): # type: ignore[override]
168 pass
170 def fake_open(url: str, *a, **k): # type: ignore[override]
171 assert url == remote_css
172 return FakeResp(b"body{background:blue;}")
174 monkeypatch.setattr(urllib.request, "urlopen", fake_open)
175 out = _inline(site["html"].read_text(), site["root"], cfg_patch={"remote": True})
176 assert "<style>" in out and "background:blue" in out
179def test_remote_disallowed(
180 monkeypatch: pytest.MonkeyPatch, site: dict[str, Path]
181) -> None:
182 remote_css = "https://cdn/foo.css"
183 site["html"].write_text(site["html"].read_text().replace("style.css", remote_css))
184 monkeypatch.setattr(urllib.request, "urlopen", lambda *a, **k: io.BytesIO(b""))
185 out = _inline(site["html"].read_text(), site["root"]) # default remote=False
186 assert f'href="{remote_css}"' in out # untouched link remains
189# ----------------------------------------------------------------
190# bs4 mode parity checks
191# ----------------------------------------------------------------
194def test_bs4_matches_regex(site: dict[str, Path]) -> None:
195 raw = site["html"].read_text()
196 out_regex = _inline(raw, site["root"])
197 out_bs4 = _inline(raw, site["root"], {"use_bs4": True})
198 assert "<style>" in out_bs4 and "<style>" in out_regex
199 assert _has_b64_fragment(out_bs4, "image/png")
200 assert "<!-- begin 'style.css' -->" in out_bs4
203def test_prettify_flag_bs4(site: dict[str, Path]) -> None:
204 raw = site["html"].read_text()
205 cfg = _get({"use_bs4": True})
206 pretty = bundle_html.inline_html_assets(
207 raw, base_path=site["root"], config=cfg, prettify=True
208 )
209 # prettified soup always starts with <!DOCTYPE or <html ...> on its own line
210 assert pretty.lstrip().lower().startswith("<!doctype")
213# ----------------------------------------------------------------
214# tag_attr override
215# ----------------------------------------------------------------
218def test_tag_attr_override(site: dict[str, Path]) -> None:
219 site["html"].write_text(site["html"].read_text().replace("href", "data-href"))
220 cfg = _get({"tag_attr": {"link": "data-href"}})
221 out = bundle_html.inline_html_assets(
222 site["html"].read_text(), base_path=site["root"], config=cfg
223 )
224 assert "<style>" in out
227# ----------------------------------------------------------------
228# CLI integration (subprocess)
229# ----------------------------------------------------------------
232def test_cli_smoke(tmp_path: Path, site: dict[str, Path]) -> None:
233 html_copy = tmp_path / "page.html"
234 html_copy.write_text(site["html"].read_text())
235 exe = Path(bundle_html.__file__).resolve()
236 subprocess.check_call(
237 [sys.executable, str(exe), str(html_copy), "--output", str(html_copy)]
238 )
240 text = html_copy.read_text()
241 assert "<style>" in text and "data:image/png;base64," in text
244@pytest.fixture()
245def tiny_site(tmp_path: Path) -> dict[str, Path]:
246 """Create a minimal web-site with one asset of each supported type."""
247 css = tmp_path / "style.css"
248 css.write_text("body { color: red; }")
250 js = tmp_path / "app.js"
251 js.write_text("console.log('hi');")
253 svg = tmp_path / "icon.svg"
254 svg.write_text("<svg><rect/></svg>")
256 png = tmp_path / "pic.png"
257 png.write_bytes(b"\x89PNG\r\n\x1a\n") # PNG header only
259 html = tmp_path / "index.html"
260 html.write_text(
261 textwrap.dedent(
262 """
263 <!doctype html>
264 <html>
265 <head>
266 <link rel="stylesheet" href="style.css">
267 <script src="app.js"></script>
268 </head>
269 <body>
270 <img src="pic.png">
271 <svg>
272 <use xlink:href="icon.svg#i"></use>
273 </svg>
274 </body>
275 </html>
276 """
277 )
278 )
279 return {
280 "root": tmp_path,
281 "html": html,
282 "css": css,
283 "js": js,
284 "svg": svg,
285 "png": png,
286 }
289# utilities -----------------------------------------------------------
292def _b64_in(html: str, mime: str) -> bool:
293 return f"data:{mime};base64," in html
296# regex-mode tests ----------------------------------------------------
299def test_inline_everything_regex(tiny_site: dict[str, Path]) -> None:
300 out = _inline(tiny_site["html"].read_text(), tiny_site["root"])
301 assert "<style>" in out
302 assert "<script>" in out
303 assert _b64_in(out, "image/png")
304 assert _b64_in(out, "image/svg+xml")
305 assert "<!-- begin 'style.css' -->" in out
306 assert "<!-- end 'app.js' -->" in out
309def test_indentation_preserved_2(tiny_site: dict[str, Path]) -> None:
310 out = _inline(tiny_site["html"].read_text(), tiny_site["root"])
311 assert "\n <style>" in out or "\n <script>" in out
314def test_max_bytes_limit(tiny_site: dict[str, Path]) -> None:
315 big_js = tiny_site["root"] / "big.js"
316 big_js.write_bytes(b"x" * 200_000)
317 tiny_site["html"].write_text(
318 tiny_site["html"]
319 .read_text()
320 .replace("</body>", '<script src="big.js"></script>\n</body>')
321 )
322 out = _inline(
323 tiny_site["html"].read_text(),
324 tiny_site["root"],
325 {"max_bytes": 1_024},
326 )
327 assert '<script src="big.js"></script>' in out
330def test_allowed_extensions_filter_2(tiny_site: dict[str, Path]) -> None:
331 out = _inline(
332 tiny_site["html"].read_text(),
333 tiny_site["root"],
334 {"allowed_extensions": {".css"}},
335 )
336 assert "<style>" in out
337 assert '<script src="app.js">' in out
338 assert not _b64_in(out, "image/png")
341def test_comment_toggle_off(tiny_site: dict[str, Path]) -> None:
342 out = _inline(
343 tiny_site["html"].read_text(),
344 tiny_site["root"],
345 {"include_filename_comments": False},
346 )
347 assert "begin 'style.css'" not in out
350def test_disable_local_and_remote(tiny_site: dict[str, Path]) -> None:
351 out = _inline(
352 tiny_site["html"].read_text(),
353 tiny_site["root"],
354 {"local": False, "remote": False},
355 )
356 assert 'href="style.css"' in out
359# remote fetch --------------------------------------------------------
362def test_remote_asset_inlined(
363 monkeypatch: pytest.MonkeyPatch, tiny_site: dict[str, Path]
364) -> None:
365 remote_css = "https://cdn.example.com/remote.css"
366 tiny_site["html"].write_text(
367 tiny_site["html"].read_text().replace("style.css", remote_css)
368 )
370 class _Resp(io.BytesIO):
371 def __enter__(self): # type: ignore[override]
372 return self
374 def __exit__(self, *exc): # type: ignore[override]
375 pass
377 def fake_open(url: str, *a, **k): # type: ignore[override]
378 assert url == remote_css
379 return _Resp(b"body{background:blue;}")
381 monkeypatch.setattr(urllib.request, "urlopen", fake_open)
382 out = _inline(tiny_site["html"].read_text(), tiny_site["root"], {"remote": True})
383 assert "background:blue" in out
386def test_remote_blocked(
387 monkeypatch: pytest.MonkeyPatch, tiny_site: dict[str, Path]
388) -> None:
389 remote_css = "https://cdn.example.com/remote.css"
390 tiny_site["html"].write_text(
391 tiny_site["html"].read_text().replace("style.css", remote_css)
392 )
393 monkeypatch.setattr(urllib.request, "urlopen", lambda *a, **k: io.BytesIO(b""))
394 out = _inline(tiny_site["html"].read_text(), tiny_site["root"])
395 assert f'href="{remote_css}"' in out
398# bs4 parity ----------------------------------------------------------
401def test_bs4_equals_regex(tiny_site: dict[str, Path]) -> None:
402 raw = tiny_site["html"].read_text()
403 out_regex = _inline(raw, tiny_site["root"])
404 out_bs4 = _inline(raw, tiny_site["root"], {"use_bs4": True})
405 assert "<style>" in out_bs4 and "<style>" in out_regex
406 assert _b64_in(out_bs4, "image/png")
407 assert "<!-- begin 'style.css' -->" in out_bs4
410def test_bs4_prettify(tiny_site: dict[str, Path]) -> None:
411 cfg = bundle_html.InlineConfig(use_bs4=True)
412 pretty = bundle_html.inline_html_assets(
413 tiny_site["html"].read_text(),
414 base_path=tiny_site["root"],
415 config=cfg,
416 prettify=True,
417 )
418 assert pretty.lstrip().lower().startswith("<!doctype")
421# tag-attr override ---------------------------------------------------
424def test_custom_attribute_name(tiny_site: dict[str, Path]) -> None:
425 tiny_site["html"].write_text(
426 tiny_site["html"].read_text().replace("href", "data-href")
427 )
428 cfg = {"tag_attr": {"link": "data-href"}}
429 out = _inline(tiny_site["html"].read_text(), tiny_site["root"], cfg)
430 assert "<style>" in out
433# data-uri idempotence -----------------------------------------------
436def test_data_uri_not_reprocessed(tiny_site: dict[str, Path]) -> None:
437 # first inline to get data URIs
438 once = _inline(tiny_site["html"].read_text(), tiny_site["root"])
439 # second pass should leave them unchanged
440 twice = _inline(once, tiny_site["root"])
441 assert twice == once
444# mixed quotes --------------------------------------------------------
447def test_single_quotes_handled(tiny_site: dict[str, Path]) -> None:
448 tiny_site["html"].write_text(
449 tiny_site["html"]
450 .read_text()
451 .replace('"app.js"', "'app.js'") # mix quote styles
452 )
453 out = _inline(tiny_site["html"].read_text(), tiny_site["root"])
454 assert "<script>" in out
457# fragment ids --------------------------------------------------------
460def test_fragment_in_src_kept(tiny_site: dict[str, Path]) -> None:
461 # icon.svg#i should be replaced by a data URI but the #i fragment removed
462 out = _inline(tiny_site["html"].read_text(), tiny_site["root"])
463 assert "#i" not in out
464 assert _b64_in(out, "image/svg+xml")
467# CLI -----------------------------------------------------------------
470def test_cli_overwrite(tmp_path: Path, tiny_site: dict[str, Path]) -> None:
471 copy = tmp_path / "page.html"
472 copy.write_text(tiny_site["html"].read_text())
473 exe = Path(bundle_html.__file__).resolve()
474 subprocess.check_call([sys.executable, str(exe), str(copy), "--output", str(copy)])
475 res = copy.read_text()
476 assert "<style>" in res
477 assert _b64_in(res, "image/png")