Amazon ION - Richly Typed Self Describing Data Serialization Format#

Library to record terminal session and playback.

[1]:
from pathlib import Path
import amazon.ion.simpleion as ion
from amazon.ion.simple_types import IonPyDict
from rich import print as rprint

dir_here = Path.cwd()
path = dir_here / "data.ion"
[2]:
py_data = {
    "id": 1,
    "name": "Alice",
    "age": 25,
    "weight": 94.7,
    "bio": {
        "dob": "1990-01-01",
        "address": "123 Main St.",
        "hometown": None,
    },
    "relationships": [
        {"name": "Bob", "relation": "friend"},
        {"name": "Charlie", "relation": "father"},
    ],
}

Encode as Bytes#

[3]:
print("--- Dump one record to bytes ---")
print(ion.dumps(py_data))
--- Dump one record to bytes ---
b'\xe0\x01\x00\xea\xee\xc5\x81\x83\xde\xc1\x87\xbe\xbe\x82id\x83age\x86weight\x83bio\x83dob\x87address\x88hometown\x8drelationships\x88relation\xde\xdb\x8a!\x01\x84\x85Alice\x8b!\x19\x8cH@W\xac\xcc\xcc\xcc\xcc\xcd\x8d\xde\x9c\x8e\x8a1990-01-01\x8f\x8c123 Main St.\x90\x0f\x91\xbe\xa2\xde\x8d\x84\x83Bob\x92\x86friend\xde\x91\x84\x87Charlie\x92\x86father'
[4]:
print("--- Dump many records to bytes ---")
print(ion.dumps([py_data]))
--- Dump many records to bytes ---
b'\xe0\x01\x00\xea\xee\xc5\x81\x83\xde\xc1\x87\xbe\xbe\x82id\x83age\x86weight\x83bio\x83dob\x87address\x88hometown\x8drelationships\x88relation\xbe\xdd\xde\xdb\x8a!\x01\x84\x85Alice\x8b!\x19\x8cH@W\xac\xcc\xcc\xcc\xcc\xcd\x8d\xde\x9c\x8e\x8a1990-01-01\x8f\x8c123 Main St.\x90\x0f\x91\xbe\xa2\xde\x8d\x84\x83Bob\x92\x86friend\xde\x91\x84\x87Charlie\x92\x86father'
[5]:
print("--- Dump one record to file ---")
with path.open("wb") as f:
    ion.dump(py_data, f)
--- Dump one record to file ---
[6]:
print("--- Load one record from file ---")
with path.open("rb") as f:
    data = ion.load(f)
    print(f"{type(data) = }")
--- Load one record from file ---
type(data) = <class 'amazon.ion.simple_types.IonPyDict'>
[7]:
print(f"{data = }")
data = {'id': 1, 'name': 'Alice', 'age': 25, 'weight': 94.7, 'bio': {'dob': '1990-01-01', 'address': '123 Main St.', 'hometown': <amazon.ion.simple_types.IonPyNull object at 0x1043130a0>}, 'relationships': [{'name': 'Bob', 'relation': 'friend'}, {'name': 'Charlie', 'relation': 'father'}]}
[8]:
print("--- Dump many records to file ---")
with path.open("wb") as f:
    ion.dump([py_data], f)
--- Dump many records to file ---
[9]:
print("--- Load many records from file ---")
with path.open("rb") as f:
    data = ion.load(f)
    print(f"{type(data) = }")
--- Load many records from file ---
type(data) = <class 'amazon.ion.simple_types.IonPyList'>
[10]:
print(f"{data = }")
data = [{'id': 1, 'name': 'Alice', 'age': 25, 'weight': 94.7, 'bio': {'dob': '1990-01-01', 'address': '123 Main St.', 'hometown': <amazon.ion.simple_types.IonPyNull object at 0x104308cd0>}, 'relationships': [{'name': 'Bob', 'relation': 'friend'}, {'name': 'Charlie', 'relation': 'father'}]}]

Encode as String#

[11]:
print("--- Dump one record to string ---")
print(ion.dumps(py_data, binary=False))
--- Dump one record to string ---
$ion_1_0 {id:1,name:"Alice",age:25,weight:94.7e0,bio:{dob:"1990-01-01",address:"123 Main St.",hometown:null},relationships:[{name:"Bob",relation:"friend"},{name:"Charlie",relation:"father"}]}
[12]:
print("--- Dump many records to string ---")
print(ion.dumps([py_data], binary=False))
--- Dump many records to string ---
$ion_1_0 [{id:1,name:"Alice",age:25,weight:94.7e0,bio:{dob:"1990-01-01",address:"123 Main St.",hometown:null},relationships:[{name:"Bob",relation:"friend"},{name:"Charlie",relation:"father"}]}]
[13]:
print("--- Read one record from string ---")
text = ion.dumps(py_data, binary=False)
data = ion.loads(text)
print(f"{type(data) = }")
--- Read one record from string ---
type(data) = <class 'amazon.ion.simple_types.IonPyDict'>
[14]:
print(f"{data = }")
data = {'id': 1, 'name': 'Alice', 'age': 25, 'weight': 94.7, 'bio': {'dob': '1990-01-01', 'address': '123 Main St.', 'hometown': <amazon.ion.simple_types.IonPyNull object at 0x1043259a0>}, 'relationships': [{'name': 'Bob', 'relation': 'friend'}, {'name': 'Charlie', 'relation': 'father'}]}
[15]:
for k, v in data.items():
    print(f"{k} = {v}")
id = 1
name = Alice
age = 25
weight = 94.7
bio = {'dob': '1990-01-01', 'address': '123 Main St.', 'hometown': <amazon.ion.simple_types.IonPyNull object at 0x1043259a0>}
relationships = [{'name': 'Bob', 'relation': 'friend'}, {'name': 'Charlie', 'relation': 'father'}]

Ion Dict to Regular Python Dict#

[16]:
text = ion.dumps(py_data, binary=False)

MAY_BE_BARE#

[17]:
ion_dict = ion.loads(text, value_model=ion.IonPyValueModel.MAY_BE_BARE)
print(f"{type(ion_dict) = }")
rprint(ion_dict)
type(ion_dict) = <class 'amazon.ion.simple_types.IonPyDict'>
{'id': 1, 'name': 'Alice', 'age': 25, 'weight': 94.7, 'bio': {'dob': '1990-01-01', 'address': '123 Main St.',
'hometown': <amazon.ion.simple_types.IonPyNull object at 0x10432b430>}, 'relationships': [{'name': 'Bob',
'relation': 'friend'}, {'name': 'Charlie', 'relation': 'father'}]}
[18]:
py_dict = dict(ion_dict)
rprint(py_dict)
{
    'id': 1,
    'name': 'Alice',
    'age': 25,
    'weight': 94.7,
    'bio': {'dob': '1990-01-01', 'address': '123 Main St.', 'hometown': <amazon.ion.simple_types.IonPyNull object 
at 0x10432b430>},
    'relationships': [{'name': 'Bob', 'relation': 'friend'}, {'name': 'Charlie', 'relation': 'father'}]
}
[19]:
# Ion Int
print(f"{type(py_dict['age']) = }")
rprint(py_dict["age"])
type(py_dict['age']) = <class 'amazon.ion.simple_types.IonPyInt'>
25
[20]:
# Ion Float
print(f"{type(py_dict['weight']) = }")
rprint(py_dict["weight"])
type(py_dict['weight']) = <class 'amazon.ion.simple_types.IonPyFloat'>
94.7
[21]:
# Nested attribute is still ION Dict
print(f"{type(py_dict['bio']) = }")
rprint(py_dict["bio"])
type(py_dict['bio']) = <class 'amazon.ion.simple_types.IonPyDict'>
{'dob': '1990-01-01', 'address': '123 Main St.', 'hometown': <amazon.ion.simple_types.IonPyNull object at 
0x10432b430>}
[22]:
# Same as ION List
print(f"{type(py_dict['relationships']) = }")
rprint(py_dict["relationships"])
type(py_dict['relationships']) = <class 'amazon.ion.simple_types.IonPyList'>
[{'name': 'Bob', 'relation': 'friend'}, {'name': 'Charlie', 'relation': 'father'}]

SYMBOL_AS_TEXT#

[23]:
ion_dict = ion.loads(text, value_model=ion.IonPyValueModel.SYMBOL_AS_TEXT)
print(f"{type(ion_dict) = }")
rprint(ion_dict)
type(ion_dict) = <class 'amazon.ion.simple_types.IonPyDict'>
{'id': 1, 'name': 'Alice', 'age': 25, 'weight': 94.7, 'bio': {'dob': '1990-01-01', 'address': '123 Main St.',
'hometown': <amazon.ion.simple_types.IonPyNull object at 0x1045a1d00>}, 'relationships': [{'name': 'Bob',
'relation': 'friend'}, {'name': 'Charlie', 'relation': 'father'}]}
[24]:
py_dict = dict(ion_dict)
rprint(py_dict)
{
    'id': 1,
    'name': 'Alice',
    'age': 25,
    'weight': 94.7,
    'bio': {'dob': '1990-01-01', 'address': '123 Main St.', 'hometown': <amazon.ion.simple_types.IonPyNull object 
at 0x1045a1d00>},
    'relationships': [{'name': 'Bob', 'relation': 'friend'}, {'name': 'Charlie', 'relation': 'father'}]
}
[25]:
# Ion Int
print(f"{type(py_dict['age']) = }")
rprint(py_dict["age"])
type(py_dict['age']) = <class 'amazon.ion.simple_types.IonPyInt'>
25
[26]:
# Ion Float
print(f"{type(py_dict['weight']) = }")
rprint(py_dict["weight"])
type(py_dict['weight']) = <class 'amazon.ion.simple_types.IonPyFloat'>
94.7
[27]:
# Nested attribute is still ION Dict
print(f"{type(py_dict['bio']) = }")
rprint(py_dict["bio"])
type(py_dict['bio']) = <class 'amazon.ion.simple_types.IonPyDict'>
{'dob': '1990-01-01', 'address': '123 Main St.', 'hometown': <amazon.ion.simple_types.IonPyNull object at 
0x1045a1d00>}
[28]:
# Same as ION List
print(f"{type(py_dict['relationships']) = }")
rprint(py_dict["relationships"])
type(py_dict['relationships']) = <class 'amazon.ion.simple_types.IonPyList'>
[{'name': 'Bob', 'relation': 'friend'}, {'name': 'Charlie', 'relation': 'father'}]

STRUCT_AS_STD_DICT#

[29]:
ion_dict = ion.loads(text, value_model=ion.IonPyValueModel.STRUCT_AS_STD_DICT)
print(f"{type(ion_dict) = }")
rprint(ion_dict)
type(ion_dict) = <class 'amazon.ion.simple_types.IonPyDict'>
{'id': 1, 'name': 'Alice', 'age': 25, 'weight': 94.7, 'bio': {'dob': '1990-01-01', 'address': '123 Main St.',
'hometown': <amazon.ion.simple_types.IonPyNull object at 0x1045b0100>}, 'relationships': [{'name': 'Bob',
'relation': 'friend'}, {'name': 'Charlie', 'relation': 'father'}]}
[30]:
py_dict = dict(ion_dict)
rprint(py_dict)
{
    'id': 1,
    'name': 'Alice',
    'age': 25,
    'weight': 94.7,
    'bio': {'dob': '1990-01-01', 'address': '123 Main St.', 'hometown': <amazon.ion.simple_types.IonPyNull object 
at 0x1045b0100>},
    'relationships': [{'name': 'Bob', 'relation': 'friend'}, {'name': 'Charlie', 'relation': 'father'}]
}
[31]:
# Ion Int
print(f"{type(py_dict['age']) = }")
rprint(py_dict["age"])
type(py_dict['age']) = <class 'amazon.ion.simple_types.IonPyInt'>
25
[32]:
# Ion Float
print(f"{type(py_dict['weight']) = }")
rprint(py_dict["weight"])
type(py_dict['weight']) = <class 'amazon.ion.simple_types.IonPyFloat'>
94.7
[33]:
# Nested attribute is still ION Dict
print(f"{type(py_dict['bio']) = }")
rprint(py_dict["bio"])
type(py_dict['bio']) = <class 'amazon.ion.simple_types.IonPyDict'>
{'dob': '1990-01-01', 'address': '123 Main St.', 'hometown': <amazon.ion.simple_types.IonPyNull object at 
0x1045b0100>}
[34]:
# Same as ION List
print(f"{type(py_dict['relationships']) = }")
rprint(py_dict["relationships"])
type(py_dict['relationships']) = <class 'amazon.ion.simple_types.IonPyList'>
[{'name': 'Bob', 'relation': 'friend'}, {'name': 'Charlie', 'relation': 'father'}]

Ion Type with Nativ Python Type#

[35]:
text = ion.dumps(py_data, binary=False)
ion_dict = ion.loads(text, value_model=ion.IonPyValueModel.STRUCT_AS_STD_DICT)
[36]:
ion_dict["age"] + 100
[36]:
125
[37]:
ion_dict["bio"]["hobby"] = "Swim"
ion_dict["bio"]
[37]:
{'dob': '1990-01-01', 'address': '123 Main St.', 'hometown': <amazon.ion.simple_types.IonPyNull object at 0x1045a1d00>, 'hobby': 'Swim'}

ION with Polars#

[52]:
import polars as pl

text = ion.dumps(py_data, binary=False)
ion_dict = ion.loads(text, value_model=ion.IonPyValueModel.MAY_BE_BARE)
[53]:
schema = {
    "id": pl.Int64(),
    "name": pl.Utf8(),
    "age": pl.Int8(),
    "weight": pl.Float64(),
    "bio": pl.Struct({
        "dob": pl.Utf8(),
        "address": pl.Utf8(),
        "hometown": pl.Utf8(),
    }),
    "relationships": pl.List(
        pl.Struct({"name": pl.Utf8(), "relation": pl.Utf8()}),
    ),
}
[54]:
# This won't work
try:
    df = pl.DataFrame([ion_dict], schema=schema)
except Exception as e:
    print(f"Error: {e!r}")
Error: ShapeError('data does not match the number of columns')
[64]:
def converter(ion_dict):
    py_dict = dict(ion_dict)
    py_dict["bio"] = dict(py_dict["bio"])
    py_dict["relationships"] = [
        dict(dct)
        for dct in py_dict["relationships"]
    ]
    return py_dict

df = pl.DataFrame([converter(ion_dict)], schema=schema)
df
[64]:
shape: (1, 6)
idnameageweightbiorelationships
i64stri8f64struct[3]list[struct[2]]
1"Alice"2594.7{"1990-01-01","123 Main St.","<amazon.ion.simple_types.IonPyNull object at 0x105dd83d0>"}[{"Bob","friend"}, {"Charlie","father"}]
[61]:
import io

buffer = io.BytesIO()
df.write_parquet(buffer)
df1 = pl.read_parquet(buffer.getvalue())
df1
[61]:
shape: (1, 6)
idnameageweightbiorelationships
i64stri8f64struct[3]list[struct[2]]
1"Alice"2594.7{"1990-01-01","123 Main St.","<amazon.ion.simple_types.IonPyNull object at 0x105dd83d0>"}[{"Bob","friend"}, {"Charlie","father"}]
[ ]: