jed v0.2

Camilo MATAJIRA Avatar

This is version v0.2 of jed.
Jed is a command-line tool that aims to be the spiritual successor of sed but specialized in JSON data manipulation.
I have written about this project before: https://camilo.matajira.com/?p=635 https://camilo.matajira.com/?p=638

In this realease I added the following features:

  • Key substitution: You can now substitute keys in JSON objects using regex patterns.
  • Value substitution: You can now substitute values in JSON objects using regex patterns.
  • Output with Colors! Using pygments

The speed of the tool is remarkable for a tool written in Python. It shows the power of the json and regex modules,
both written in C.

Below is the code of the project with the unit test and examples. I still haven’t upload it to Github, I am waiting for v1.0.

Examples

# With S we apply substitution only to the keys
» echo '{"old_name":"value"}' | ./jed.py 'S/old_/new_/g'
{
    "new_name": "value"
}

# With 's' we apply substitution only to the values
» echo '{"old_name":"value"}' | ./jed.py 's/value/new_value/g'
{
    "old_name": "new_value"
}
# Jed supports regular expressions
» echo '{"old_name":"value"}' | ./jed.py 's/val.*/new_value/g'
{
    "old_name": "new_value"
}

Code

#!/usr/bin/env -S uv run --script
#
# /// script
# requires-python = ">=3.12"
# dependencies = ["lark>=1.3.1", "pygments>=2.15.1"]
# ///


import argparse
import sys
import re
import json
from numbers import Number
from lark import Lark, Transformer
from pygments import highlight
from pygments.lexers import JsonLexer
from pygments.formatters import Terminal256Formatter


grammar = r"""
start: command+

command: JREGEX* JVALUE? "s/" OLD_PATTERN "/" NEW_PATTERN "/" FLAGS   -> jed_substitute_value_regex
       | JREGEX* JVALUE? "S/" OLD_PATTERN "/" NEW_PATTERN "/" FLAGS   -> jed_substitute_key_regex

REGEX: /[a-zA-Z0-9 \[\]+.?*_^-]+/
JREGEX: "/"REGEX"/""."?
JVALUE: ":/"REGEX"/"
NEW_PATTERN: REGEX
OLD_PATTERN: REGEX
FLAGS: LETTER+

%import common.LETTER
%import common.WS
%ignore WS
"""


class SedTransformer(Transformer):
    def __init__(self, text):
        super().__init__()
        self.text = text

    def jed_substitute_value_regex(self, args):
        regexp, replace, flags = args
        self.text = value_substitute(self.text, regexp, replace)

    def jed_substitute_key_regex(self, args):
        regexp, replace, flags = args
        self.text = key_substitute(self.text, regexp, replace)


def key_substitute(data: dict, old_regex: str, new: str) -> dict:
    data_copy = data.copy()
    compiled_regex = re.compile(old_regex)
    for i in data.keys():
        if type(data[i]) is dict:
            data_copy[i] = key_substitute(data[i], old_regex, new)
            if compiled_regex.match(i):
                data_copy[compiled_regex.sub(new, i)] = data_copy[i]
                del data_copy[i]
        elif type(data[i]) is list:
            result = []
            for j in data[i]:
                result.append(key_substitute(j, old_regex, new))
            data_copy[i] = result
        elif compiled_regex.search(i):
            data_copy[compiled_regex.sub(new, i)] = data[i]
            del data_copy[i]
    return data_copy


def value_substitute(data: dict | list, old_regex: str, new: str) -> dict:
    compiled_regex = re.compile(old_regex, re.DOTALL)
    if isinstance(data, list):
        result = []
        for j in data:
            result.append(value_substitute(j, old_regex, new))
        return result
    elif isinstance(data, dict):
        data_copy = data.copy()
        for i in data.keys():
            data_copy[i] = value_substitute(data[i], old_regex, new)
        return data_copy
    elif isinstance(data, bool):
        data_copy = compiled_regex.sub(new, str(data))
        if re.match("^[Tt]rue$", data_copy):
            return True
        if re.match("^[Ff]alse$", data_copy):
            return False
        return data_copy
    elif isinstance(data, Number):
        data_copy = compiled_regex.sub(new, str(data))
        try:
            return float(data_copy) if "." in data_copy else int(data_copy)
        except ValueError:
            return data_copy
    elif data is None:
        data_copy = compiled_regex.sub(new, "")
        if data_copy == "":
            return None
        return data_copy
    else:
        data_copy = compiled_regex.sub(new, data)
        return data_copy


def pretty_print_dictionary(data: dict):
    json_data = json.dumps(data, indent=4)
    print(highlight(json_data, JsonLexer(), Terminal256Formatter(style="dracula")))


if __name__ == "__main__":
    argument_parser = argparse.ArgumentParser(
        prog="jed",
        description="Sed for json!",
    )
    argument_parser.add_argument("jed_script")
    args = argument_parser.parse_args()

    grammar_parser = Lark(grammar)
    tree = grammar_parser.parse(args.jed_script)
    data = json.loads(sys.stdin.read())
    t = SedTransformer(data)
    t.transform(tree)
    pretty_print_dictionary(t.text)


# vim: set syntax=python filetype=python:

Unit tests

import unittest
import json
from jed import key_substitute, value_substitute


class TestInitial(unittest.TestCase):
    def test_key_substitute_1(self):
        some_json = '{"sha": "0eb3da11ed489189963045a3d4eb21ba343736cb", "node_id": "C_kwDOAE3WVdoAKDBlYjNkYTExZWQ0ODkxODk5NjMwNDVhM2Q0ZWIyMWJhMzQzNzM2Y2I"}'
        data = json.loads(some_json)
        data = key_substitute(data, "sha", "new_sha")
        assert "new_sha" in data.keys()

    def test_key_substitute_recursivity(self):
        some_json = """
{
  "commit": {
    "author": {
      "name": "bigmoonbit"
    }
}
}
"""
        data = json.loads(some_json)
        data = key_substitute(data, "a", "o")
        assert data["commit"]["outhor"]["nome"] == "bigmoonbit"

    def test_key_substitute_repeated_keys_keeps_last(self):
        some_json = """
{
  "commit": {
    "author": {
      "name": "bigmoonbit",
      "nombre": "hola"
    }
}
}
"""
        data = json.loads(some_json)
        data = key_substitute(data, "nombre", "name")
        # Repeated keys will keep the last one
        assert data["commit"]["author"]["name"] == "hola"

    def test_key_substitute_recursivity_inside_lists(self):
        some_json = """
{
  "commit": [
    { "author": "camilo" },
    { "author": "andres" }
    ]
}
"""
        data = json.loads(some_json)
        data = key_substitute(data, "author", "autor")
        # Repeated keys will keep the last one
        assert data["commit"][0]["autor"] == "camilo"
        assert data["commit"][1]["autor"] == "andres"

    def test_value_substitute(self):
        some_json = """
{
  "commit": {
    "author": {
      "name": "bigmoonbit"
    }
}
}
"""
        data = json.loads(some_json)
        data = value_substitute(data, "oo", "AAA")
        assert data["commit"]["author"]["name"] == "bigmAAAnbit"

    def test_value_substitute_recursivity_inside_lists(self):
        some_json = """
{
  "commit": [
    { "author": "camilo" },
    { "author": "andres" }
    ]
}
"""
        data = json.loads(some_json)
        data = value_substitute(data, "andres", "mata")
        assert data["commit"][1]["author"] == "mata"

    def test_value_substitute_recursivity_with_list_in_the_root(self):
        some_json = """
[
    { "author": "camilo" },
    { "author": "andres" }
]
"""
        data = json.loads(some_json)
        data = value_substitute(data, "andres", "mata")
        assert data[1]
        assert data[1]["author"] == "mata"

    def test_value_substitute_numbers_can_be_replaced(self):
        some_json = """
{
  "commit": {
    "author": {
      "name": 5
    }
}
}
"""
        data = json.loads(some_json)
        data = value_substitute(data, "5", "6")
        assert data["commit"]["author"]["name"] == 6

    def test_value_substitute_booleans_can_be_modified(self):
        some_json = """
{
  "commit": {
    "author": {
      "name": true
    }
}
}
"""
        # TODO
        # Problem, true is interpreted as True, and I create the text "True"
        # So it's hard for the user to know how to replace booleans
        data = json.loads(some_json)
        data = value_substitute(data, "True", "False")
        assert data["commit"]["author"]["name"] is False

    def test_value_substitute_random_bug(self):
        some_json = """ 
{
"sha": "03cb1e19da91f0df728914d4c8717f7490df04e4"
}
"""
        data = json.loads(some_json)
        data = value_substitute(data, ".+", "hola")
        assert data["sha"] == "hola"

    def test_value_substitute_numbers_can_be_replaced_2(self):
        some_json = """ 
{
"sha": 0
}
"""
        data = json.loads(some_json)
        data = value_substitute(data, ".+", "hola")
        assert data["sha"] == "hola"

    def test_value_substitute_nulls_can_be_replaced(self):
        some_json = """ 
{
"sha": null
}
"""
        data = json.loads(some_json)
        data = value_substitute(data, ".*", "hola")
        assert data["sha"] == "hola"

    def test_value_substitute_new_lines_are_replaced(self):
        some_json = """ 
{
"sha": "a\\nb"
}
"""
        data = json.loads(some_json)
        data = value_substitute(data, ".+", "hola")
        print(data)
        assert data["sha"] == "hola"

Tagged in :

Camilo MATAJIRA Avatar