Jmespath - query language for JSON#

Keywords: JSON

用命令行处理过 JSON 的开发者几乎都知道大名鼎鼎的 JSON 命令行处理工具 JQ <https://github.com/jqlang/jq>. JQ 是 2013 年发布的.

而 Jmespath 是 2013 年发布的一个和 JQ 类似的 JSON 处理工具, 功能也非常强大, 但是没有 JQ 那么有名气. 但是强大之处在于 Jmespath 在主流编程语言几乎都有实现, 其中以 Python 的实现最佳, 最流行. 换言之你可以在 Python 中用 Jmespath 处理原生字典数据结构, 对于其他语言也类似. Jmespath 的作者是一个 AWS 的资深工程师 James Saryerwinnie (越南裔), 他同时也是 AWS Lambda 框架 Chalice 的作者. Jmespath 同时也是 AWS 内部流行的, 以及 AWS CLI 的 JSON 处理语言的内部实现.

如果你要对 JSON 数据进行处理, 可能 JQ 会更强. 而如果你主要是从 JSON 中提取数据, 那么 Jmespath 非常适合.

Reference:

  • PyPI

  • GitHub

  • Doc: JmesPath.org 官网, 上面还自带一个交互式的输入输出实验小工具.

  • Tutorial: 快速入门必读.

  • Examples: 通过研究进阶的例子来学习高阶技巧.

  • Specifications: 语法的详细规则, 函数功能说明等. 相当于 API 文档.

Basic Usage#

[1]:
import jmespath
[2]:
from rich import print as rprint
[3]:
def search(expr, data):
    rprint(jmespath.search(expr, data))

Access Node#

[4]:
data = {"Name": "Alice"}
expr = "Name"
search(expr, data)
Alice
[5]:
data = {"Profile": {"Name": "Alice"}}
expr = "Profile.Name"
search(expr, data)
Alice
[6]:
data = {"Profile": {"Name": "Alice"}}
expr = "Profile.NotExists"
search(expr, data)
None

Root Node#

[7]:
data = "Alice"
expr = "@"
search(expr, data)
Alice
[8]:
data = {"Name": "Alice"}
expr = "@"
search(expr, data)
{'Name': 'Alice'}
[9]:
data = [1, 2, 3, 4, 5]
expr = "@"
search(expr, data)
[1, 2, 3, 4, 5]

List, Index and Slicing#

[10]:
data = [1, 2, 3, 4, 5]
expr = "[]"
search(expr, data)
[1, 2, 3, 4, 5]
[11]:
data = [1, 2, 3, 4, 5]
expr = "[0]"
search(expr, data)
1
[12]:
data = [1, 2, 3, 4, 5]
expr = "[1]"
search(expr, data)
2
[13]:
data = [1, 2, 3, 4, 5]
expr = "[-1]"
search(expr, data)
5
[14]:
data = [1, 2, 3, 4, 5]
expr = "[-2]"
search(expr, data)
4
[15]:
data = [1, 2, 3, 4, 5]
expr = "[:3]"
search(expr, data)
[1, 2, 3]
[16]:
data = [1, 2, 3, 4, 5]
expr = "[-3:]"
search(expr, data)
[3, 4, 5]
[17]:
data = [1, 2, 3, 4, 5]
expr = "[::-1]"
search(expr, data)
[5, 4, 3, 2, 1]
[18]:
data = [1, 2, 3, 4, 5]
expr = "[1:4]"
search(expr, data)
[2, 3, 4]

List Projections#

[19]:
data = {
    "people": [
        {"first": "James", "last": "d"},
        {"first": "Jacob", "last": "e"},
        {"first": "Jayden", "last": "f"},
        {"missing": "different"}
    ],
    "foo": {"bar": "baz"}
}
expr = "people[*]"
search(expr, data)
[
    {'first': 'James', 'last': 'd'},
    {'first': 'Jacob', 'last': 'e'},
    {'first': 'Jayden', 'last': 'f'},
    {'missing': 'different'}
]
[20]:
data = {
    "people": [
        {"first": "James", "last": "d"},
        {"first": "Jacob", "last": "e"},
        {"first": "Jayden", "last": "f"},
        {"missing": "different"}
    ],
    "foo": {"bar": "baz"}
}
expr = "people[:2].first"
search(expr, data)
['James', 'Jacob']

Object Projections#

[21]:
data = {
    "ops": {
        "functionA": {"numArgs": 2},
        "functionB": {"numArgs": 3},
        "functionC": {"variadic": True}
    }
}
expr = "ops.*.numArgs"
search(expr, data)
[2, 3]

Flatten Projections#

[22]:
data = {
    "reservations": [
        {
            "instances": [
                {"state": "running"},
                {"state": "stopped"},
            ]
        },
        {
            "instances": [
                {"state": "terminated"},
                {"state": "running"},
            ]
        },
    ]
}
expr = "reservations[*].instances[*]"
search(expr, data)
[[{'state': 'running'}, {'state': 'stopped'}], [{'state': 'terminated'}, {'state': 'running'}]]
[23]:
data = {
    "reservations": [
        {
            "instances": [
                {"state": "running"},
                {"state": "stopped"},
            ]
        },
        {
            "instances": [
                {"state": "terminated"},
                {"state": "running"},
            ]
        },
    ]
}
expr = "reservations[].instances[]"
search(expr, data)
[{'state': 'running'}, {'state': 'stopped'}, {'state': 'terminated'}, {'state': 'running'}]

Filter Projections#

[24]:
data = {
    "machines": [
        {"name": "a", "state": "running"},
        {"name": "b", "state": "stopped"},
        {"name": "c", "state": "running"},
    ]
}
expr = "machines[?state=='running'].name"
search(expr, data)
['a', 'c']

Pipe Expressions#

[25]:
data = {
    "people": [
        {"first": "James", "last": "d"},
        {"first": "Jacob", "last": "e"},
        {"first": "Jayden", "last": "f"},
        {"missing": "different"},
    ],
    "foo": {"bar": "baz"},
}
expr = "people[*].first | [0]"
search(expr, data)
James
[26]:
data = {"Profile": {"Name": "Alice"}}
expr = "Profile.NotExists || 'Unknown'"
search(expr, data)
Unknown

MultiSelect#

[27]:
data = {
    "people": [
        {
            "name": "a",
            "state": {"name": "up"},
        },
        {
            "name": "b",
            "state": {"name": "down"},
        },
        {
            "name": "c",
            "state": {"name": "up"},
        },
    ]
}
expr = "people[].[name, state.name]"
search(expr, data)
[['a', 'up'], ['b', 'down'], ['c', 'up']]
[28]:
data = {
    "people": [
        {
            "name": "a",
            "state": {"name": "up"},
        },
        {
            "name": "b",
            "state": {"name": "down"},
        },
        {
            "name": "c",
            "state": {"name": "up"},
        },
    ]
}
expr = "people[].{Name: name, State: state.name}"
search(expr, data)
[{'Name': 'a', 'State': 'up'}, {'Name': 'b', 'State': 'down'}, {'Name': 'c', 'State': 'up'}]

Functions#

[29]:
data = {
    "people": [
        {
            "name": "b",
            "age": 30,
            "state": {"name": "up"},
        },
        {
            "name": "a",
            "age": 50,
            "state": {"name": "down"},
        },
        {
            "name": "c",
            "age": 40,
            "state": {"name": "up"},
        },
    ]
}
expr = "length(people)"
search(expr, data)
3
[30]:
data = {
    "people": [
        {"name": "b", "age": 30},
        {"name": "a", "age": 50},
        {"name": "c", "age": 40},
    ]
}
expr = "max_by(people, &age).name"
search(expr, data)
a
[31]:
data = {
    "myarray": [
        "foo",
        "foobar",
        "barfoo",
        "bar",
        "baz",
        "barbaz",
        "barfoobaz",
    ]
}
expr = "myarray[?contains(@, 'foo') == `true`]"
search(expr, data)
['foo', 'foobar', 'barfoo', 'barfoobaz']

Examples from My Personal Experience#

Filter Tag Value#

[32]:
data = {
    "Tags": [
        {"Name": "Alice", "Value": "alice"},
        {"Name": "Age", "Value": "bob"},
    ]
}
expr = "Tags[?Key=='Name'].Value"
search(expr, data)
[]

Extract List of Attribute#

[33]:
data = {
    "Buckets": [
        {"Bucket": "my-bucket-1"},
        {"Bucket": "my-bucket-2"},
    ]
}
expr = "Buckets[].Bucket"
search(expr, data)
['my-bucket-1', 'my-bucket-2']

Literal Value#

[34]:
data = {}
expr = "'Unknown'"
search(expr, data)
Unknown
[35]:
data = {}
expr = "['a', 'b', 'c']"
search(expr, data)
['a', 'b', 'c']

Default Value if Node not Exists#

[36]:
data = {"Profile": {"Name": "Alice"}}
expr = "Profile.NotExists || 'Unknown'"
search(expr, data)
Unknown
[37]:
data = {
    "name": [1, 2, 3]
}
expr = "name || `[4, 5, 6]`"
search(expr, data)
[1, 2, 3]
[38]:
data = {
    "name": [1, 2, 3]
}
expr = "unknown || `[4, 5, 6]`"
search(expr, data)
[4, 5, 6]

Get Keys or Values#

[39]:
data = {
    "key1": "value1",
    "key2": "value2",
}
expr = "keys(@)"
search(expr, data)
['key1', 'key2']
[40]:
data = {
    "key1": "value1",
    "key2": "value2",
}
expr = "values(@)"
search(expr, data)
['value1', 'value2']

MultiSelect#

[41]:
data = {
    "key1": "value1",
    "key2": "value2",
}
expr = "[key1, key2]"
search(expr, data)
['value1', 'value2']

Join Function#

[42]:
# ref: https://jmespath.org/specification.html#join
data = {
    "key1": "value1",
    "key2": "value2",
}
expr = "join(', ', @.[key1, key2])"
search(expr, data)
value1, value2