fix: parse raw json markdown arrays

This commit is contained in:
ZeroIce 2026-06-23 23:16:13 +08:00
parent 50b3228bc7
commit 586c9704e5
2 changed files with 23 additions and 15 deletions

View File

@ -7,23 +7,26 @@ def parse_json_markdown(json_string: str):
# Get json from the backticks/braces
json_string = json_string.strip()
starts = ["```json", "```", "``", "`", "{", "["]
ends = ["```", "``", "`", "}", "]"]
fence_ends = {"```json": "```", "```": "```", "``": "``", "`": "`"}
end_index = -1
start_index = 0
start_index = -1
end_marker = ""
parsed: dict = {}
for s in starts:
start_index = json_string.find(s)
if start_index != -1:
if json_string[start_index] not in ("{", "["):
start_index += len(s)
break
if start_index != -1:
for e in ends:
end_index = json_string.rfind(e, start_index)
if end_index != -1:
if json_string[end_index] in ("}", "]"):
end_index += 1
break
start_matches = [(json_string.find(s), s) for s in starts]
start_matches = [(index, marker) for index, marker in start_matches if index != -1]
if start_matches:
start_index, start_marker = min(start_matches, key=lambda match: match[0])
if start_marker in fence_ends:
start_index += len(start_marker)
end_marker = fence_ends[start_marker]
else:
end_marker = "}" if start_marker == "{" else "]"
if start_index != -1 and end_marker:
end_index = json_string.rfind(end_marker, start_index)
if end_index != -1 and end_marker in ("}", "]"):
end_index += len(end_marker)
if start_index != -1 and end_index != -1 and start_index < end_index:
extracted_content = json_string[start_index:end_index].strip()
parsed = json.loads(extracted_content)

View File

@ -35,6 +35,11 @@ def test_parse_json_markdown_braces_only():
assert parse_json_markdown(src) == {"ok": "yes"}
def test_parse_json_markdown_raw_top_level_array():
src = ' [{"name": "first"}, {"name": "second"}] '
assert parse_json_markdown(src) == [{"name": "first"}, {"name": "second"}]
def test_parse_json_markdown_not_found():
with pytest.raises(ValueError):
parse_json_markdown("no json here")