Coverage for tests / unit / web / test_bundle_html.py: 100%
204 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-18 02:51 -0700
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-18 02:51 -0700
1from __future__ import annotations
3import io
4import subprocess
5import sys
6import textwrap
7import urllib.request
8from pathlib import Path
10import pytest
12import muutils.web.bundle_html as bundle_html
14# ----------------------------------------------------------------
15# helper / fixtures
16# ----------------------------------------------------------------
19@pytest.fixture()
20def site(tmp_path: Path) -> dict[str, Path]:
21 """Return paths for a tiny site with four asset types."""
22 css = tmp_path / "style.css"
23 css.write_text("body { color: red; }")
25 js = tmp_path / "app.js"
26 js.write_text("console.log('hi');")
28 svg = tmp_path / "icon.svg"
29 svg.write_text("<svg><rect/></svg>")
31 png = tmp_path / "pic.png"
32 png.write_bytes(b"\x89PNG\r\n\x1a\n")
34 html = tmp_path / "index.html"
35 html.write_text(
36 textwrap.dedent(
37 """\
38 <!doctype html>
39 <html>
40 <head>
41 <link rel="stylesheet" href="style.css" >
42 <script src='app.js'></script>
43 </head>
44 <body>
45 <img src="pic.png">
46 <svg>
47 <use
48 xlink:href = "icon.svg#i" />
49 </svg>
50 </body>
51 </html>
52 """
53 )
54 )
56 return {
57 "root": tmp_path,
58 "html": html,
59 "css": css,
60 "js": js,
61 "svg": svg,
62 "png": png,
63 }
66def _get(cfg_patch: dict | None = None) -> bundle_html.InlineConfig:
67 """Return a config object with optional overrides."""
68 return bundle_html.InlineConfig(**(cfg_patch or {}))
71def _inline(
72 text: str,
73 base: Path,
74 cfg_patch: dict | None = None,
75) -> str:
76 """Run the bundle_html with local/remote switches."""
77 cfg = _get(cfg_patch)
78 return bundle_html.inline_html_assets(text, base_path=base, config=cfg)
81def _has_b64_fragment(html: str, mime: str) -> bool:
82 """Return True if a data URI for *mime* is present."""
83 return f"data:{mime};base64," in html
86# ----------------------------------------------------------------
87# core behaviour (regex mode)
88# ----------------------------------------------------------------
91def test_all_assets_inlined_regex(site: dict[str, Path]) -> None:
92 html_raw = site["html"].read_text()
93 out = _inline(html_raw, site["root"])
94 print(out)
96 assert "<style>" in out and "</style>" in out
97 assert "<script>" in out and "</script>" in out
98 assert _has_b64_fragment(out, "image/png")
99 assert _has_b64_fragment(out, "image/svg+xml")
100 assert "<!-- begin 'style.css' -->" in out
101 assert "<!-- end 'app.js' -->" in out
104def test_indentation_preserved(site: dict[str, Path]) -> None:
105 html_raw = site["html"].read_text()
106 out = _inline(html_raw, site["root"])
107 # original line had 4 spaces indent
108 assert "\n <style>" in out or "\n <script>" in out
111def test_skip_large_file(site: dict[str, Path]) -> None:
112 big = site["root"] / "large.js"
113 big.write_bytes(b"x" * 200_000)
114 site["html"].write_text(
115 site["html"]
116 .read_text()
117 .replace("</body>", '<script src="large.js"></script>\n</body>')
118 )
119 out = _inline(site["html"].read_text(), site["root"], cfg_patch={"max_bytes": 1024})
120 assert '<script src="large.js"></script>' in out
123def test_allowed_extensions_filter(site: dict[str, Path]) -> None:
124 out = _inline(
125 site["html"].read_text(),
126 site["root"],
127 cfg_patch={"allowed_extensions": {".css"}},
128 )
129 assert "<style>" in out and "<script src='app.js'>" in out
130 assert not _has_b64_fragment(out, "image/png")
133def test_comment_toggle(site: dict[str, Path]) -> None:
134 out = _inline(
135 site["html"].read_text(),
136 site["root"],
137 cfg_patch={"include_filename_comments": False},
138 )
139 assert "begin 'style.css'" not in out
142def test_local_off_remote_off(site: dict[str, Path]) -> None:
143 out = _inline(
144 site["html"].read_text(),
145 site["root"],
146 cfg_patch={"local": False, "remote": False},
147 )
148 # nothing should change
149 assert '<link rel="stylesheet" href="style.css" >' in out
152# ----------------------------------------------------------------
153# remote asset handling
154# ----------------------------------------------------------------
157def test_remote_fetch_allowed(
158 monkeypatch: pytest.MonkeyPatch, site: dict[str, Path]
159) -> None:
160 remote_css = "https://cdn/foo.css"
161 site["html"].write_text(site["html"].read_text().replace("style.css", remote_css))
163 class FakeResp(io.BytesIO):
164 def __enter__(self): # type: ignore[override]
165 return self
167 def __exit__(self, *exc): # type: ignore[override]
168 pass
170 def fake_open(url: str, *a, **k): # type: ignore[override]
171 assert url == remote_css
172 return FakeResp(b"body{background:blue;}")
174 monkeypatch.setattr(urllib.request, "urlopen", fake_open)
175 out = _inline(site["html"].read_text(), site["root"], cfg_patch={"remote": True})
176 assert "<style>" in out and "background:blue" in out
179def test_remote_disallowed(
180 monkeypatch: pytest.MonkeyPatch, site: dict[str, Path]
181) -> None:
182 remote_css = "https://cdn/foo.css"
183 site["html"].write_text(site["html"].read_text().replace("style.css", remote_css))
184 monkeypatch.setattr(urllib.request, "urlopen", lambda *a, **k: io.BytesIO(b""))
185 out = _inline(site["html"].read_text(), site["root"]) # default remote=False
186 assert f'href="{remote_css}"' in out # untouched link remains
189# ----------------------------------------------------------------
190# bs4 mode parity checks
191# ----------------------------------------------------------------
194def test_bs4_matches_regex(site: dict[str, Path]) -> None:
195 raw = site["html"].read_text()
196 out_regex = _inline(raw, site["root"])
197 out_bs4 = _inline(raw, site["root"], {"use_bs4": True})
198 assert "<style>" in out_bs4 and "<style>" in out_regex
199 assert _has_b64_fragment(out_bs4, "image/png")
200 assert "<!-- begin 'style.css' -->" in out_bs4
203def test_prettify_flag_bs4(site: dict[str, Path]) -> None:
204 raw = site["html"].read_text()
205 cfg = _get({"use_bs4": True})
206 pretty = bundle_html.inline_html_assets(
207 raw, base_path=site["root"], config=cfg, prettify=True
208 )
209 # prettified soup always starts with <!DOCTYPE or <html ...> on its own line
210 assert pretty.lstrip().lower().startswith("<!doctype")
213# ----------------------------------------------------------------
214# tag_attr override
215# ----------------------------------------------------------------
218def test_tag_attr_override(site: dict[str, Path]) -> None:
219 site["html"].write_text(site["html"].read_text().replace("href", "data-href"))
220 cfg = _get({"tag_attr": {"link": "data-href"}})
221 out = bundle_html.inline_html_assets(
222 site["html"].read_text(), base_path=site["root"], config=cfg
223 )
224 assert "<style>" in out
227# ----------------------------------------------------------------
228# CLI integration (subprocess)
229# ----------------------------------------------------------------
232def test_cli_smoke(tmp_path: Path, site: dict[str, Path]) -> None:
233 html_copy = tmp_path / "page.html"
234 html_copy.write_text(site["html"].read_text())
235 assert bundle_html.__file__ is not None
236 exe = Path(bundle_html.__file__).resolve()
237 subprocess.check_call(
238 [sys.executable, str(exe), str(html_copy), "--output", str(html_copy)]
239 )
241 text = html_copy.read_text()
242 assert "<style>" in text and "data:image/png;base64," in text
245@pytest.fixture()
246def tiny_site(tmp_path: Path) -> dict[str, Path]:
247 """Create a minimal web-site with one asset of each supported type."""
248 css = tmp_path / "style.css"
249 css.write_text("body { color: red; }")
251 js = tmp_path / "app.js"
252 js.write_text("console.log('hi');")
254 svg = tmp_path / "icon.svg"
255 svg.write_text("<svg><rect/></svg>")
257 png = tmp_path / "pic.png"
258 png.write_bytes(b"\x89PNG\r\n\x1a\n") # PNG header only
260 html = tmp_path / "index.html"
261 html.write_text(
262 textwrap.dedent(
263 """
264 <!doctype html>
265 <html>
266 <head>
267 <link rel="stylesheet" href="style.css">
268 <script src="app.js"></script>
269 </head>
270 <body>
271 <img src="pic.png">
272 <svg>
273 <use xlink:href="icon.svg#i"></use>
274 </svg>
275 </body>
276 </html>
277 """
278 )
279 )
280 return {
281 "root": tmp_path,
282 "html": html,
283 "css": css,
284 "js": js,
285 "svg": svg,
286 "png": png,
287 }
290# utilities -----------------------------------------------------------
293def _b64_in(html: str, mime: str) -> bool:
294 return f"data:{mime};base64," in html
297# regex-mode tests ----------------------------------------------------
300def test_inline_everything_regex(tiny_site: dict[str, Path]) -> None:
301 out = _inline(tiny_site["html"].read_text(), tiny_site["root"])
302 assert "<style>" in out
303 assert "<script>" in out
304 assert _b64_in(out, "image/png")
305 assert _b64_in(out, "image/svg+xml")
306 assert "<!-- begin 'style.css' -->" in out
307 assert "<!-- end 'app.js' -->" in out
310def test_indentation_preserved_2(tiny_site: dict[str, Path]) -> None:
311 out = _inline(tiny_site["html"].read_text(), tiny_site["root"])
312 assert "\n <style>" in out or "\n <script>" in out
315def test_max_bytes_limit(tiny_site: dict[str, Path]) -> None:
316 big_js = tiny_site["root"] / "big.js"
317 big_js.write_bytes(b"x" * 200_000)
318 tiny_site["html"].write_text(
319 tiny_site["html"]
320 .read_text()
321 .replace("</body>", '<script src="big.js"></script>\n</body>')
322 )
323 out = _inline(
324 tiny_site["html"].read_text(),
325 tiny_site["root"],
326 {"max_bytes": 1_024},
327 )
328 assert '<script src="big.js"></script>' in out
331def test_allowed_extensions_filter_2(tiny_site: dict[str, Path]) -> None:
332 out = _inline(
333 tiny_site["html"].read_text(),
334 tiny_site["root"],
335 {"allowed_extensions": {".css"}},
336 )
337 assert "<style>" in out
338 assert '<script src="app.js">' in out
339 assert not _b64_in(out, "image/png")
342def test_comment_toggle_off(tiny_site: dict[str, Path]) -> None:
343 out = _inline(
344 tiny_site["html"].read_text(),
345 tiny_site["root"],
346 {"include_filename_comments": False},
347 )
348 assert "begin 'style.css'" not in out
351def test_disable_local_and_remote(tiny_site: dict[str, Path]) -> None:
352 out = _inline(
353 tiny_site["html"].read_text(),
354 tiny_site["root"],
355 {"local": False, "remote": False},
356 )
357 assert 'href="style.css"' in out
360# remote fetch --------------------------------------------------------
363def test_remote_asset_inlined(
364 monkeypatch: pytest.MonkeyPatch, tiny_site: dict[str, Path]
365) -> None:
366 remote_css = "https://cdn.example.com/remote.css"
367 tiny_site["html"].write_text(
368 tiny_site["html"].read_text().replace("style.css", remote_css)
369 )
371 class _Resp(io.BytesIO):
372 def __enter__(self): # type: ignore[override]
373 return self
375 def __exit__(self, *exc): # type: ignore[override]
376 pass
378 def fake_open(url: str, *a, **k): # type: ignore[override]
379 assert url == remote_css
380 return _Resp(b"body{background:blue;}")
382 monkeypatch.setattr(urllib.request, "urlopen", fake_open)
383 out = _inline(tiny_site["html"].read_text(), tiny_site["root"], {"remote": True})
384 assert "background:blue" in out
387def test_remote_blocked(
388 monkeypatch: pytest.MonkeyPatch, tiny_site: dict[str, Path]
389) -> None:
390 remote_css = "https://cdn.example.com/remote.css"
391 tiny_site["html"].write_text(
392 tiny_site["html"].read_text().replace("style.css", remote_css)
393 )
394 monkeypatch.setattr(urllib.request, "urlopen", lambda *a, **k: io.BytesIO(b""))
395 out = _inline(tiny_site["html"].read_text(), tiny_site["root"])
396 assert f'href="{remote_css}"' in out
399# bs4 parity ----------------------------------------------------------
402def test_bs4_equals_regex(tiny_site: dict[str, Path]) -> None:
403 raw = tiny_site["html"].read_text()
404 out_regex = _inline(raw, tiny_site["root"])
405 out_bs4 = _inline(raw, tiny_site["root"], {"use_bs4": True})
406 assert "<style>" in out_bs4 and "<style>" in out_regex
407 assert _b64_in(out_bs4, "image/png")
408 assert "<!-- begin 'style.css' -->" in out_bs4
411def test_bs4_prettify(tiny_site: dict[str, Path]) -> None:
412 cfg = bundle_html.InlineConfig(use_bs4=True)
413 pretty = bundle_html.inline_html_assets(
414 tiny_site["html"].read_text(),
415 base_path=tiny_site["root"],
416 config=cfg,
417 prettify=True,
418 )
419 assert pretty.lstrip().lower().startswith("<!doctype")
422# tag-attr override ---------------------------------------------------
425def test_custom_attribute_name(tiny_site: dict[str, Path]) -> None:
426 tiny_site["html"].write_text(
427 tiny_site["html"].read_text().replace("href", "data-href")
428 )
429 cfg = {"tag_attr": {"link": "data-href"}}
430 out = _inline(tiny_site["html"].read_text(), tiny_site["root"], cfg)
431 assert "<style>" in out
434# data-uri idempotence -----------------------------------------------
437def test_data_uri_not_reprocessed(tiny_site: dict[str, Path]) -> None:
438 # first inline to get data URIs
439 once = _inline(tiny_site["html"].read_text(), tiny_site["root"])
440 # second pass should leave them unchanged
441 twice = _inline(once, tiny_site["root"])
442 assert twice == once
445# mixed quotes --------------------------------------------------------
448def test_single_quotes_handled(tiny_site: dict[str, Path]) -> None:
449 tiny_site["html"].write_text(
450 tiny_site["html"]
451 .read_text()
452 .replace('"app.js"', "'app.js'") # mix quote styles
453 )
454 out = _inline(tiny_site["html"].read_text(), tiny_site["root"])
455 assert "<script>" in out
458# fragment ids --------------------------------------------------------
461def test_fragment_in_src_kept(tiny_site: dict[str, Path]) -> None:
462 # icon.svg#i should be replaced by a data URI but the #i fragment removed
463 out = _inline(tiny_site["html"].read_text(), tiny_site["root"])
464 assert "#i" not in out
465 assert _b64_in(out, "image/svg+xml")
468# CLI -----------------------------------------------------------------
471def test_cli_overwrite(tmp_path: Path, tiny_site: dict[str, Path]) -> None:
472 copy = tmp_path / "page.html"
473 copy.write_text(tiny_site["html"].read_text())
474 assert bundle_html.__file__ is not None
475 exe = Path(bundle_html.__file__).resolve()
476 subprocess.check_call([sys.executable, str(exe), str(copy), "--output", str(copy)])
477 res = copy.read_text()
478 assert "<style>" in res
479 assert _b64_in(res, "image/png")