Coverage for tests / unit / test_jsonlines.py: 100%
95 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-18 02:51 -0700
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-18 02:51 -0700
1from __future__ import annotations
3import gzip
4import json
5from pathlib import Path
7import pytest
9from muutils.json_serialize import JSONitem
10from muutils.jsonlines import jsonl_load, jsonl_load_log, jsonl_write
12TEMP_PATH: Path = Path("tests/_temp/jsonl")
15def test_jsonl_load():
16 """Test loading jsonlines file - write data, load it back, verify it matches."""
17 # Create temp directory
18 TEMP_PATH.mkdir(parents=True, exist_ok=True)
20 test_file = TEMP_PATH / "test_load.jsonl"
22 # Create test data
23 test_data = [
24 {"id": 1, "name": "Alice", "value": 42.5},
25 {"id": 2, "name": "Bob", "value": 17.3},
26 {"id": 3, "name": "Charlie", "value": None},
27 {"list": [1, 2, 3], "nested": {"a": 1, "b": 2}},
28 ]
30 # Write the data manually
31 with open(test_file, "w", encoding="UTF-8") as f:
32 for item in test_data:
33 f.write(json.dumps(item) + "\n")
35 # Load it back using jsonl_load
36 loaded_data = jsonl_load(str(test_file))
38 # Verify the data matches
39 assert loaded_data == test_data
40 assert len(loaded_data) == 4
41 loaded_item_0 = loaded_data[0]
42 assert isinstance(loaded_item_0, dict)
43 assert loaded_item_0["name"] == "Alice" # ty: ignore[invalid-argument-type]
44 loaded_item_3 = loaded_data[3]
45 assert isinstance(loaded_item_3, dict)
46 loaded_item_3_nested = loaded_item_3["nested"] # ty: ignore[invalid-argument-type]
47 assert isinstance(loaded_item_3_nested, dict)
48 assert loaded_item_3_nested["b"] == 2
51def test_jsonl_write():
52 """Test writing jsonlines data - write using jsonl_write, read raw contents, verify format."""
53 # Create temp directory
54 TEMP_PATH.mkdir(parents=True, exist_ok=True)
56 test_file = TEMP_PATH / "test_write.jsonl"
58 # Test data
59 test_data: list[JSONitem] = [
60 {"id": 1, "status": "active"},
61 {"id": 2, "status": "inactive"},
62 {"id": 3, "status": "pending", "metadata": {"priority": "high"}},
63 ]
65 # Write using jsonl_write
66 jsonl_write(str(test_file), test_data)
68 # Read raw contents
69 with open(test_file, "r", encoding="UTF-8") as f:
70 lines = f.readlines()
72 # Verify format
73 assert len(lines) == 3
75 # Each line should be valid JSON
76 for i, line in enumerate(lines):
77 assert line.endswith("\n")
78 parsed = json.loads(line)
79 assert parsed == test_data[i]
81 # Verify specific content
82 assert json.loads(lines[0]) == {"id": 1, "status": "active"}
83 assert json.loads(lines[2])["metadata"]["priority"] == "high"
86def test_gzip_support():
87 """Test .gz extension auto-detection for both reading and writing."""
88 # Create temp directory
89 TEMP_PATH.mkdir(parents=True, exist_ok=True)
91 test_file_gz = TEMP_PATH / "test_gzip.jsonl.gz"
92 test_file_gzip = TEMP_PATH / "test_gzip2.jsonl.gzip"
94 # Test data
95 test_data: list[JSONitem] = [
96 {"compressed": True, "value": 123},
97 {"compressed": True, "value": 456},
98 ]
100 # Test with .gz extension - auto-detection
101 jsonl_write(str(test_file_gz), test_data)
103 # Verify it's actually gzipped by trying to read with gzip
104 with gzip.open(test_file_gz, "rt", encoding="UTF-8") as f:
105 lines = f.readlines()
106 assert len(lines) == 2
108 # Load back using jsonl_load with auto-detection
109 loaded_data = jsonl_load(str(test_file_gz))
110 assert loaded_data == test_data
112 # Test with .gzip extension
113 jsonl_write(str(test_file_gzip), test_data)
114 loaded_data_gzip = jsonl_load(str(test_file_gzip))
115 assert loaded_data_gzip == test_data
117 # Test explicit use_gzip parameter
118 test_file_explicit = TEMP_PATH / "test_explicit.jsonl"
119 jsonl_write(str(test_file_explicit), test_data, use_gzip=True)
121 # Should be gzipped even without .gz extension
122 with gzip.open(test_file_explicit, "rt", encoding="UTF-8") as f:
123 lines = f.readlines()
124 assert len(lines) == 2
126 loaded_explicit = jsonl_load(str(test_file_explicit), use_gzip=True)
127 assert loaded_explicit == test_data
130def test_jsonl_load_log():
131 """Test jsonl_load_log with dict assertion - test with valid dicts and non-dict items."""
132 # Create temp directory
133 TEMP_PATH.mkdir(parents=True, exist_ok=True)
135 # Test with valid dict data
136 test_file_valid = TEMP_PATH / "test_log_valid.jsonl"
137 valid_data: list[JSONitem] = [
138 {"level": "INFO", "message": "Starting process"},
139 {"level": "WARNING", "message": "Low memory"},
140 {"level": "ERROR", "message": "Connection failed"},
141 ]
143 jsonl_write(str(test_file_valid), valid_data)
144 loaded_log = jsonl_load_log(str(test_file_valid))
146 assert loaded_log == valid_data
147 assert all(isinstance(item, dict) for item in loaded_log)
149 # Test with non-dict items - should raise AssertionError
150 test_file_invalid = TEMP_PATH / "test_log_invalid.jsonl"
151 invalid_data: list[JSONitem] = [
152 {"level": "INFO", "message": "Valid entry"},
153 "not a dict", # This is invalid
154 {"level": "ERROR", "message": "Another valid entry"},
155 ]
157 jsonl_write(str(test_file_invalid), invalid_data)
159 with pytest.raises(AssertionError) as exc_info:
160 jsonl_load_log(str(test_file_invalid))
162 # Verify the error message contains useful information
163 error_msg = str(exc_info.value)
164 assert "idx = 1" in error_msg
165 assert "is not a dict" in error_msg
167 # Test with list item
168 test_file_list = TEMP_PATH / "test_log_list.jsonl"
169 list_data: list[JSONitem] = [
170 {"level": "INFO"},
171 [1, 2, 3], # List instead of dict
172 ]
174 jsonl_write(str(test_file_list), list_data)
176 with pytest.raises(AssertionError) as exc_info:
177 jsonl_load_log(str(test_file_list))
179 error_msg = str(exc_info.value)
180 assert "idx = 1" in error_msg
181 assert "is not a dict" in error_msg
184def test_gzip_compresslevel():
185 """Test that gzip_compresslevel parameter works without errors."""
186 # Create temp directory
187 TEMP_PATH.mkdir(parents=True, exist_ok=True)
189 test_file = TEMP_PATH / "test_compresslevel.jsonl.gz"
191 # Create test data
192 test_data: list[JSONitem] = [{"value": i, "data": "content"} for i in range(10)]
194 # Write with different compression levels - should not error
195 jsonl_write(str(test_file), test_data, gzip_compresslevel=1)
196 loaded_data = jsonl_load(str(test_file))
197 assert loaded_data == test_data
199 jsonl_write(str(test_file), test_data, gzip_compresslevel=9)
200 loaded_data = jsonl_load(str(test_file))
201 assert loaded_data == test_data