Coverage for muutils/jsonlines.py: 0%
32 statements
« prev ^ index » next coverage.py v7.6.1, created at 2025-04-04 03:33 -0600
« prev ^ index » next coverage.py v7.6.1, created at 2025-04-04 03:33 -0600
1"utilities for reading and writing jsonlines files, including gzip support"
3from __future__ import annotations
5import gzip
6import json
7from typing import Callable, Sequence
9from muutils.json_serialize import JSONitem
11_GZIP_EXTENSIONS: tuple = (".gz", ".gzip")
14def _file_is_gzip(path: str) -> bool:
15 return any(str(path).endswith(ext) for ext in _GZIP_EXTENSIONS)
18def _get_opener(
19 path: str,
20 use_gzip: bool | None = None,
21) -> Callable:
22 if use_gzip is None:
23 use_gzip = _file_is_gzip(path)
25 # appears to be another mypy bug
26 # https://github.com/python/mypy/issues/10740
27 return open if not use_gzip else gzip.open # type: ignore
30def jsonl_load(
31 path: str,
32 /,
33 *,
34 use_gzip: bool | None = None,
35) -> list[JSONitem]:
36 opener: Callable = _get_opener(path, use_gzip)
38 data: list[JSONitem] = list()
39 with opener(path, "rt", encoding="UTF-8") as f:
40 for line in f:
41 data.append(json.loads(line))
43 return data
46def jsonl_load_log(
47 path: str,
48 /,
49 *,
50 use_gzip: bool | None = None,
51) -> list[dict]:
52 data: list[JSONitem] = jsonl_load(path, use_gzip=use_gzip)
53 for idx, item in enumerate(data):
54 assert isinstance(
55 item, dict
56 ), f"item {idx = } from file {path} is not a dict: {type(item) = }\t{item = }"
58 # mypy complains that we are returning a list[JSONitem] but the function signature says list[dict]
59 # it can't figure out that we are asserting that all items are dicts
60 return data # type: ignore
63def jsonl_write(
64 path: str,
65 items: Sequence[JSONitem],
66 use_gzip: bool | None = None,
67 gzip_compresslevel: int = 2,
68) -> None:
69 opener: Callable = _get_opener(path, use_gzip)
71 opener_kwargs: dict = dict()
72 if use_gzip:
73 opener_kwargs = dict(compresslevel=gzip_compresslevel)
75 with opener(path, "wt", encoding="UTF-8", **opener_kwargs) as f:
76 for item in items:
77 f.write(json.dumps(item) + "\n")