Coverage for tests/unit/test_jsonlines.py: 100%

1from __future__ import annotations

3import gzip

4import json

5from pathlib import Path

7import pytest

9from muutils.json_serialize import JSONitem

10from muutils.jsonlines import jsonl_load, jsonl_load_log, jsonl_write

12TEMP_PATH: Path = Path("tests/_temp/jsonl")

15def test_jsonl_load():

16 """Test loading jsonlines file - write data, load it back, verify it matches."""

17 # Create temp directory

18 TEMP_PATH.mkdir(parents=True, exist_ok=True)

20 test_file = TEMP_PATH / "test_load.jsonl"

22 # Create test data

23 test_data = [

24 {"id": 1, "name": "Alice", "value": 42.5},

25 {"id": 2, "name": "Bob", "value": 17.3},

26 {"id": 3, "name": "Charlie", "value": None},

27 {"list": [1, 2, 3], "nested": {"a": 1, "b": 2}},

28 ]

30 # Write the data manually

31 with open(test_file, "w", encoding="UTF-8") as f:

32 for item in test_data:

33 f.write(json.dumps(item) + "\n")

35 # Load it back using jsonl_load

36 loaded_data = jsonl_load(str(test_file))

38 # Verify the data matches

39 assert loaded_data == test_data

40 assert len(loaded_data) == 4

41 loaded_item_0 = loaded_data[0]

42 assert isinstance(loaded_item_0, dict)

43 assert loaded_item_0["name"] == "Alice" # ty: ignore[invalid-argument-type]

44 loaded_item_3 = loaded_data[3]

45 assert isinstance(loaded_item_3, dict)

46 loaded_item_3_nested = loaded_item_3["nested"] # ty: ignore[invalid-argument-type]

47 assert isinstance(loaded_item_3_nested, dict)

48 assert loaded_item_3_nested["b"] == 2

51def test_jsonl_write():

52 """Test writing jsonlines data - write using jsonl_write, read raw contents, verify format."""

53 # Create temp directory

54 TEMP_PATH.mkdir(parents=True, exist_ok=True)

56 test_file = TEMP_PATH / "test_write.jsonl"

58 # Test data

59 test_data: list[JSONitem] = [

60 {"id": 1, "status": "active"},

61 {"id": 2, "status": "inactive"},

62 {"id": 3, "status": "pending", "metadata": {"priority": "high"}},

63 ]

65 # Write using jsonl_write

66 jsonl_write(str(test_file), test_data)

68 # Read raw contents

69 with open(test_file, "r", encoding="UTF-8") as f:

70 lines = f.readlines()

72 # Verify format

73 assert len(lines) == 3

75 # Each line should be valid JSON

76 for i, line in enumerate(lines):

77 assert line.endswith("\n")

78 parsed = json.loads(line)

79 assert parsed == test_data[i]

81 # Verify specific content

82 assert json.loads(lines[0]) == {"id": 1, "status": "active"}

83 assert json.loads(lines[2])["metadata"]["priority"] == "high"

86def test_gzip_support():

87 """Test .gz extension auto-detection for both reading and writing."""

88 # Create temp directory

89 TEMP_PATH.mkdir(parents=True, exist_ok=True)

91 test_file_gz = TEMP_PATH / "test_gzip.jsonl.gz"

92 test_file_gzip = TEMP_PATH / "test_gzip2.jsonl.gzip"

94 # Test data

95 test_data: list[JSONitem] = [

96 {"compressed": True, "value": 123},

97 {"compressed": True, "value": 456},

98 ]

100 # Test with .gz extension - auto-detection

101 jsonl_write(str(test_file_gz), test_data)

102

103 # Verify it's actually gzipped by trying to read with gzip

104 with gzip.open(test_file_gz, "rt", encoding="UTF-8") as f:

105 lines = f.readlines()

106 assert len(lines) == 2

107

108 # Load back using jsonl_load with auto-detection

109 loaded_data = jsonl_load(str(test_file_gz))

110 assert loaded_data == test_data

111

112 # Test with .gzip extension

113 jsonl_write(str(test_file_gzip), test_data)

114 loaded_data_gzip = jsonl_load(str(test_file_gzip))

115 assert loaded_data_gzip == test_data

116

117 # Test explicit use_gzip parameter

118 test_file_explicit = TEMP_PATH / "test_explicit.jsonl"

119 jsonl_write(str(test_file_explicit), test_data, use_gzip=True)

120

121 # Should be gzipped even without .gz extension

122 with gzip.open(test_file_explicit, "rt", encoding="UTF-8") as f:

123 lines = f.readlines()

124 assert len(lines) == 2

125

126 loaded_explicit = jsonl_load(str(test_file_explicit), use_gzip=True)

127 assert loaded_explicit == test_data

128

129

130def test_jsonl_load_log():

131 """Test jsonl_load_log with dict assertion - test with valid dicts and non-dict items."""

132 # Create temp directory

133 TEMP_PATH.mkdir(parents=True, exist_ok=True)

134

135 # Test with valid dict data

136 test_file_valid = TEMP_PATH / "test_log_valid.jsonl"

137 valid_data: list[JSONitem] = [

138 {"level": "INFO", "message": "Starting process"},

139 {"level": "WARNING", "message": "Low memory"},

140 {"level": "ERROR", "message": "Connection failed"},

141 ]

142

143 jsonl_write(str(test_file_valid), valid_data)

144 loaded_log = jsonl_load_log(str(test_file_valid))

145

146 assert loaded_log == valid_data

147 assert all(isinstance(item, dict) for item in loaded_log)

148

149 # Test with non-dict items - should raise AssertionError

150 test_file_invalid = TEMP_PATH / "test_log_invalid.jsonl"

151 invalid_data: list[JSONitem] = [

152 {"level": "INFO", "message": "Valid entry"},

153 "not a dict", # This is invalid

154 {"level": "ERROR", "message": "Another valid entry"},

155 ]

156

157 jsonl_write(str(test_file_invalid), invalid_data)

158

159 with pytest.raises(AssertionError) as exc_info:

160 jsonl_load_log(str(test_file_invalid))

161

162 # Verify the error message contains useful information

163 error_msg = str(exc_info.value)

164 assert "idx = 1" in error_msg

165 assert "is not a dict" in error_msg

166

167 # Test with list item

168 test_file_list = TEMP_PATH / "test_log_list.jsonl"

169 list_data: list[JSONitem] = [

170 {"level": "INFO"},

171 [1, 2, 3], # List instead of dict

172 ]

173

174 jsonl_write(str(test_file_list), list_data)

175

176 with pytest.raises(AssertionError) as exc_info:

177 jsonl_load_log(str(test_file_list))

178

179 error_msg = str(exc_info.value)

180 assert "idx = 1" in error_msg

181 assert "is not a dict" in error_msg

182

183

184def test_gzip_compresslevel():

185 """Test that gzip_compresslevel parameter works without errors."""

186 # Create temp directory

187 TEMP_PATH.mkdir(parents=True, exist_ok=True)

188

189 test_file = TEMP_PATH / "test_compresslevel.jsonl.gz"

190

191 # Create test data

192 test_data: list[JSONitem] = [{"value": i, "data": "content"} for i in range(10)]

193

194 # Write with different compression levels - should not error

195 jsonl_write(str(test_file), test_data, gzip_compresslevel=1)

196 loaded_data = jsonl_load(str(test_file))

197 assert loaded_data == test_data

198

199 jsonl_write(str(test_file), test_data, gzip_compresslevel=9)

200 loaded_data = jsonl_load(str(test_file))

201 assert loaded_data == test_data

Coverage for tests / unit / test_jsonlines.py: 100%

95 statements