This is version v0.2 of jed.
Jed is a command-line tool that aims to be the spiritual successor of sed but specialized in JSON data manipulation.
I have written about this project before: https://camilo.matajira.com/?p=635 https://camilo.matajira.com/?p=638
In this realease I added the following features:
- Key substitution: You can now substitute keys in JSON objects using regex patterns.
- Value substitution: You can now substitute values in JSON objects using regex patterns.
- Output with Colors! Using pygments
The speed of the tool is remarkable for a tool written in Python. It shows the power of the json and regex modules,
both written in C.
Below is the code of the project with the unit test and examples. I still haven’t upload it to Github, I am waiting for v1.0.
Examples
# With S we apply substitution only to the keys
» echo '{"old_name":"value"}' | ./jed.py 'S/old_/new_/g'
{
"new_name": "value"
}
# With 's' we apply substitution only to the values
» echo '{"old_name":"value"}' | ./jed.py 's/value/new_value/g'
{
"old_name": "new_value"
}
# Jed supports regular expressions
» echo '{"old_name":"value"}' | ./jed.py 's/val.*/new_value/g'
{
"old_name": "new_value"
}
Code
#!/usr/bin/env -S uv run --script
#
# /// script
# requires-python = ">=3.12"
# dependencies = ["lark>=1.3.1", "pygments>=2.15.1"]
# ///
import argparse
import sys
import re
import json
from numbers import Number
from lark import Lark, Transformer
from pygments import highlight
from pygments.lexers import JsonLexer
from pygments.formatters import Terminal256Formatter
grammar = r"""
start: command+
command: JREGEX* JVALUE? "s/" OLD_PATTERN "/" NEW_PATTERN "/" FLAGS -> jed_substitute_value_regex
| JREGEX* JVALUE? "S/" OLD_PATTERN "/" NEW_PATTERN "/" FLAGS -> jed_substitute_key_regex
REGEX: /[a-zA-Z0-9 \[\]+.?*_^-]+/
JREGEX: "/"REGEX"/""."?
JVALUE: ":/"REGEX"/"
NEW_PATTERN: REGEX
OLD_PATTERN: REGEX
FLAGS: LETTER+
%import common.LETTER
%import common.WS
%ignore WS
"""
class SedTransformer(Transformer):
def __init__(self, text):
super().__init__()
self.text = text
def jed_substitute_value_regex(self, args):
regexp, replace, flags = args
self.text = value_substitute(self.text, regexp, replace)
def jed_substitute_key_regex(self, args):
regexp, replace, flags = args
self.text = key_substitute(self.text, regexp, replace)
def key_substitute(data: dict, old_regex: str, new: str) -> dict:
data_copy = data.copy()
compiled_regex = re.compile(old_regex)
for i in data.keys():
if type(data[i]) is dict:
data_copy[i] = key_substitute(data[i], old_regex, new)
if compiled_regex.match(i):
data_copy[compiled_regex.sub(new, i)] = data_copy[i]
del data_copy[i]
elif type(data[i]) is list:
result = []
for j in data[i]:
result.append(key_substitute(j, old_regex, new))
data_copy[i] = result
elif compiled_regex.search(i):
data_copy[compiled_regex.sub(new, i)] = data[i]
del data_copy[i]
return data_copy
def value_substitute(data: dict | list, old_regex: str, new: str) -> dict:
compiled_regex = re.compile(old_regex, re.DOTALL)
if isinstance(data, list):
result = []
for j in data:
result.append(value_substitute(j, old_regex, new))
return result
elif isinstance(data, dict):
data_copy = data.copy()
for i in data.keys():
data_copy[i] = value_substitute(data[i], old_regex, new)
return data_copy
elif isinstance(data, bool):
data_copy = compiled_regex.sub(new, str(data))
if re.match("^[Tt]rue$", data_copy):
return True
if re.match("^[Ff]alse$", data_copy):
return False
return data_copy
elif isinstance(data, Number):
data_copy = compiled_regex.sub(new, str(data))
try:
return float(data_copy) if "." in data_copy else int(data_copy)
except ValueError:
return data_copy
elif data is None:
data_copy = compiled_regex.sub(new, "")
if data_copy == "":
return None
return data_copy
else:
data_copy = compiled_regex.sub(new, data)
return data_copy
def pretty_print_dictionary(data: dict):
json_data = json.dumps(data, indent=4)
print(highlight(json_data, JsonLexer(), Terminal256Formatter(style="dracula")))
if __name__ == "__main__":
argument_parser = argparse.ArgumentParser(
prog="jed",
description="Sed for json!",
)
argument_parser.add_argument("jed_script")
args = argument_parser.parse_args()
grammar_parser = Lark(grammar)
tree = grammar_parser.parse(args.jed_script)
data = json.loads(sys.stdin.read())
t = SedTransformer(data)
t.transform(tree)
pretty_print_dictionary(t.text)
# vim: set syntax=python filetype=python:
Unit tests
import unittest
import json
from jed import key_substitute, value_substitute
class TestInitial(unittest.TestCase):
def test_key_substitute_1(self):
some_json = '{"sha": "0eb3da11ed489189963045a3d4eb21ba343736cb", "node_id": "C_kwDOAE3WVdoAKDBlYjNkYTExZWQ0ODkxODk5NjMwNDVhM2Q0ZWIyMWJhMzQzNzM2Y2I"}'
data = json.loads(some_json)
data = key_substitute(data, "sha", "new_sha")
assert "new_sha" in data.keys()
def test_key_substitute_recursivity(self):
some_json = """
{
"commit": {
"author": {
"name": "bigmoonbit"
}
}
}
"""
data = json.loads(some_json)
data = key_substitute(data, "a", "o")
assert data["commit"]["outhor"]["nome"] == "bigmoonbit"
def test_key_substitute_repeated_keys_keeps_last(self):
some_json = """
{
"commit": {
"author": {
"name": "bigmoonbit",
"nombre": "hola"
}
}
}
"""
data = json.loads(some_json)
data = key_substitute(data, "nombre", "name")
# Repeated keys will keep the last one
assert data["commit"]["author"]["name"] == "hola"
def test_key_substitute_recursivity_inside_lists(self):
some_json = """
{
"commit": [
{ "author": "camilo" },
{ "author": "andres" }
]
}
"""
data = json.loads(some_json)
data = key_substitute(data, "author", "autor")
# Repeated keys will keep the last one
assert data["commit"][0]["autor"] == "camilo"
assert data["commit"][1]["autor"] == "andres"
def test_value_substitute(self):
some_json = """
{
"commit": {
"author": {
"name": "bigmoonbit"
}
}
}
"""
data = json.loads(some_json)
data = value_substitute(data, "oo", "AAA")
assert data["commit"]["author"]["name"] == "bigmAAAnbit"
def test_value_substitute_recursivity_inside_lists(self):
some_json = """
{
"commit": [
{ "author": "camilo" },
{ "author": "andres" }
]
}
"""
data = json.loads(some_json)
data = value_substitute(data, "andres", "mata")
assert data["commit"][1]["author"] == "mata"
def test_value_substitute_recursivity_with_list_in_the_root(self):
some_json = """
[
{ "author": "camilo" },
{ "author": "andres" }
]
"""
data = json.loads(some_json)
data = value_substitute(data, "andres", "mata")
assert data[1]
assert data[1]["author"] == "mata"
def test_value_substitute_numbers_can_be_replaced(self):
some_json = """
{
"commit": {
"author": {
"name": 5
}
}
}
"""
data = json.loads(some_json)
data = value_substitute(data, "5", "6")
assert data["commit"]["author"]["name"] == 6
def test_value_substitute_booleans_can_be_modified(self):
some_json = """
{
"commit": {
"author": {
"name": true
}
}
}
"""
# TODO
# Problem, true is interpreted as True, and I create the text "True"
# So it's hard for the user to know how to replace booleans
data = json.loads(some_json)
data = value_substitute(data, "True", "False")
assert data["commit"]["author"]["name"] is False
def test_value_substitute_random_bug(self):
some_json = """
{
"sha": "03cb1e19da91f0df728914d4c8717f7490df04e4"
}
"""
data = json.loads(some_json)
data = value_substitute(data, ".+", "hola")
assert data["sha"] == "hola"
def test_value_substitute_numbers_can_be_replaced_2(self):
some_json = """
{
"sha": 0
}
"""
data = json.loads(some_json)
data = value_substitute(data, ".+", "hola")
assert data["sha"] == "hola"
def test_value_substitute_nulls_can_be_replaced(self):
some_json = """
{
"sha": null
}
"""
data = json.loads(some_json)
data = value_substitute(data, ".*", "hola")
assert data["sha"] == "hola"
def test_value_substitute_new_lines_are_replaced(self):
some_json = """
{
"sha": "a\\nb"
}
"""
data = json.loads(some_json)
data = value_substitute(data, ".+", "hola")
print(data)
assert data["sha"] == "hola"
