Coverage for muutils/web/inline_html.py: 81%

52 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2025-05-30 22:10 -0600

1"Inline local CSS/JS files into an HTML document" 

2 

3from __future__ import annotations 

4 

5from typing import Literal 

6from pathlib import Path 

7import warnings 

8 

9AssetType = Literal["script", "style"] 

10 

11 

12def inline_html_assets( 

13 html: str, 

14 assets: list[tuple[AssetType, Path]], 

15 base_path: Path, 

16 include_filename_comments: bool = True, 

17 prettify: bool = False, 

18) -> str: 

19 """Inline specified local CSS/JS files into the text of an HTML document. 

20 

21 Each entry in `assets` should be a tuple like `("script", "app.js")` or `("style", "style.css")`. 

22 

23 # Parameters: 

24 - `html : str` 

25 input HTML content. 

26 - `assets : list[tuple[AssetType, Path]]` 

27 List of (tag_type, filename) tuples to inline. 

28 

29 # Returns: 

30 `str` : Modified HTML content with inlined assets. 

31 """ 

32 for tag_type, filename in assets: 

33 fname_str: str = filename.as_posix() 

34 if tag_type not in AssetType.__args__: # type: ignore[attr-defined] 

35 err_msg: str = f"Unsupported tag type: {tag_type}" 

36 raise ValueError(err_msg) 

37 

38 # Dynamically create the pattern for the given tag and filename 

39 pattern: str 

40 if tag_type == "script": 

41 pattern = rf'<script src="{fname_str}"></script>' 

42 elif tag_type == "style": 

43 pattern = rf'<link rel="stylesheet" href="{fname_str}">' 

44 # assert it's in the text exactly once 

45 assert ( 

46 html.count(pattern) == 1 

47 ), f"Pattern {pattern} should be in the html exactly once, found {html.count(pattern) = }" 

48 # figure out the indentation level of the pattern in the html 

49 indentation: str = html.split(pattern)[0].splitlines()[-1] 

50 assert ( 

51 indentation.strip() == "" 

52 ), f"Pattern '{pattern}' should be alone in its line, found {indentation = }" 

53 # read the content and create the replacement 

54 content: str = (base_path / filename).read_text() 

55 replacement: str = f"<{tag_type}>\n{content}\n</{tag_type}>" 

56 if include_filename_comments: 

57 replacement = f"<!-- begin '{fname_str}' -->\n{replacement}\n<!-- end '{fname_str}' -->" 

58 # indent the replacement 

59 replacement = "\n".join( 

60 [f"{indentation}\t{line}" for line in replacement.splitlines()] 

61 ) 

62 # perform the replacement 

63 html = html.replace(pattern, replacement) 

64 

65 if prettify: 

66 try: 

67 from bs4 import BeautifulSoup 

68 

69 soup: BeautifulSoup = BeautifulSoup(html, "html.parser") 

70 # TYPING: .prettify() might return a str or bytes, but we want str? 

71 html = str(soup.prettify()) 

72 print(BeautifulSoup) 

73 except ImportError: 

74 warnings.warn( 

75 "BeautifulSoup is not installed, skipping prettification of HTML." 

76 ) 

77 

78 return html 

79 

80 

81def inline_html_file( 

82 html_path: Path, 

83 output_path: Path, 

84 include_filename_comments: bool = True, 

85 prettify: bool = False, 

86) -> None: 

87 "given a path to an HTML file, inline the local CSS/JS files into it and save it to output_path" 

88 base_path: Path = html_path.parent 

89 # read the HTML file 

90 html: str = html_path.read_text() 

91 # read the assets 

92 assets: list[tuple[AssetType, Path]] = [] 

93 for asset in base_path.glob("*.js"): 

94 assets.append(("script", Path(asset.name))) 

95 for asset in base_path.glob("*.css"): 

96 assets.append(("style", Path(asset.name))) 

97 # inline the assets 

98 html_new: str = inline_html_assets( 

99 html, 

100 assets, 

101 base_path, 

102 include_filename_comments=include_filename_comments, 

103 prettify=prettify, 

104 ) 

105 # write the new HTML file 

106 output_path.write_text(html_new) 

107 

108 

109if __name__ == "__main__": 

110 import argparse 

111 

112 parser: argparse.ArgumentParser = argparse.ArgumentParser( 

113 description="Inline local CSS/JS files into an HTML document." 

114 ) 

115 parser.add_argument( 

116 "-i", 

117 "--input-path", 

118 type=Path, 

119 help="Path to the HTML file to process.", 

120 ) 

121 parser.add_argument( 

122 "-o", 

123 "--output-path", 

124 type=str, 

125 help="Path to save the modified HTML file.", 

126 ) 

127 

128 parser.add_argument( 

129 "-c", 

130 "--no-filename-comments", 

131 action="store_true", 

132 help="don't include comments with the filename in the inlined assets", 

133 ) 

134 

135 parser.add_argument( 

136 "-p", 

137 "--no-prettify", 

138 action="store_true", 

139 help="don't prettify the HTML file", 

140 ) 

141 

142 args: argparse.Namespace = parser.parse_args() 

143 

144 inline_html_file( 

145 html_path=Path(args.input_path), 

146 output_path=Path(args.output_path), 

147 include_filename_comments=not args.no_filename_comments, 

148 prettify=not args.no_prettify, 

149 )