Amazon ION - Richly Typed Self Describing Data Serialization Format#
Library to record terminal session and playback.
[1]:
from pathlib import Path
import amazon.ion.simpleion as ion
from amazon.ion.simple_types import IonPyDict
from rich import print as rprint
dir_here = Path.cwd()
path = dir_here / "data.ion"
[2]:
py_data = {
"id": 1,
"name": "Alice",
"age": 25,
"weight": 94.7,
"bio": {
"dob": "1990-01-01",
"address": "123 Main St.",
"hometown": None,
},
"relationships": [
{"name": "Bob", "relation": "friend"},
{"name": "Charlie", "relation": "father"},
],
}
Encode as Bytes#
[3]:
print("--- Dump one record to bytes ---")
print(ion.dumps(py_data))
--- Dump one record to bytes ---
b'\xe0\x01\x00\xea\xee\xc5\x81\x83\xde\xc1\x87\xbe\xbe\x82id\x83age\x86weight\x83bio\x83dob\x87address\x88hometown\x8drelationships\x88relation\xde\xdb\x8a!\x01\x84\x85Alice\x8b!\x19\x8cH@W\xac\xcc\xcc\xcc\xcc\xcd\x8d\xde\x9c\x8e\x8a1990-01-01\x8f\x8c123 Main St.\x90\x0f\x91\xbe\xa2\xde\x8d\x84\x83Bob\x92\x86friend\xde\x91\x84\x87Charlie\x92\x86father'
[4]:
print("--- Dump many records to bytes ---")
print(ion.dumps([py_data]))
--- Dump many records to bytes ---
b'\xe0\x01\x00\xea\xee\xc5\x81\x83\xde\xc1\x87\xbe\xbe\x82id\x83age\x86weight\x83bio\x83dob\x87address\x88hometown\x8drelationships\x88relation\xbe\xdd\xde\xdb\x8a!\x01\x84\x85Alice\x8b!\x19\x8cH@W\xac\xcc\xcc\xcc\xcc\xcd\x8d\xde\x9c\x8e\x8a1990-01-01\x8f\x8c123 Main St.\x90\x0f\x91\xbe\xa2\xde\x8d\x84\x83Bob\x92\x86friend\xde\x91\x84\x87Charlie\x92\x86father'
[5]:
print("--- Dump one record to file ---")
with path.open("wb") as f:
ion.dump(py_data, f)
--- Dump one record to file ---
[6]:
print("--- Load one record from file ---")
with path.open("rb") as f:
data = ion.load(f)
print(f"{type(data) = }")
--- Load one record from file ---
type(data) = <class 'amazon.ion.simple_types.IonPyDict'>
[7]:
print(f"{data = }")
data = {'id': 1, 'name': 'Alice', 'age': 25, 'weight': 94.7, 'bio': {'dob': '1990-01-01', 'address': '123 Main St.', 'hometown': <amazon.ion.simple_types.IonPyNull object at 0x1043130a0>}, 'relationships': [{'name': 'Bob', 'relation': 'friend'}, {'name': 'Charlie', 'relation': 'father'}]}
[8]:
print("--- Dump many records to file ---")
with path.open("wb") as f:
ion.dump([py_data], f)
--- Dump many records to file ---
[9]:
print("--- Load many records from file ---")
with path.open("rb") as f:
data = ion.load(f)
print(f"{type(data) = }")
--- Load many records from file ---
type(data) = <class 'amazon.ion.simple_types.IonPyList'>
[10]:
print(f"{data = }")
data = [{'id': 1, 'name': 'Alice', 'age': 25, 'weight': 94.7, 'bio': {'dob': '1990-01-01', 'address': '123 Main St.', 'hometown': <amazon.ion.simple_types.IonPyNull object at 0x104308cd0>}, 'relationships': [{'name': 'Bob', 'relation': 'friend'}, {'name': 'Charlie', 'relation': 'father'}]}]
Encode as String#
[11]:
print("--- Dump one record to string ---")
print(ion.dumps(py_data, binary=False))
--- Dump one record to string ---
$ion_1_0 {id:1,name:"Alice",age:25,weight:94.7e0,bio:{dob:"1990-01-01",address:"123 Main St.",hometown:null},relationships:[{name:"Bob",relation:"friend"},{name:"Charlie",relation:"father"}]}
[12]:
print("--- Dump many records to string ---")
print(ion.dumps([py_data], binary=False))
--- Dump many records to string ---
$ion_1_0 [{id:1,name:"Alice",age:25,weight:94.7e0,bio:{dob:"1990-01-01",address:"123 Main St.",hometown:null},relationships:[{name:"Bob",relation:"friend"},{name:"Charlie",relation:"father"}]}]
[13]:
print("--- Read one record from string ---")
text = ion.dumps(py_data, binary=False)
data = ion.loads(text)
print(f"{type(data) = }")
--- Read one record from string ---
type(data) = <class 'amazon.ion.simple_types.IonPyDict'>
[14]:
print(f"{data = }")
data = {'id': 1, 'name': 'Alice', 'age': 25, 'weight': 94.7, 'bio': {'dob': '1990-01-01', 'address': '123 Main St.', 'hometown': <amazon.ion.simple_types.IonPyNull object at 0x1043259a0>}, 'relationships': [{'name': 'Bob', 'relation': 'friend'}, {'name': 'Charlie', 'relation': 'father'}]}
[15]:
for k, v in data.items():
print(f"{k} = {v}")
id = 1
name = Alice
age = 25
weight = 94.7
bio = {'dob': '1990-01-01', 'address': '123 Main St.', 'hometown': <amazon.ion.simple_types.IonPyNull object at 0x1043259a0>}
relationships = [{'name': 'Bob', 'relation': 'friend'}, {'name': 'Charlie', 'relation': 'father'}]
Ion Dict to Regular Python Dict#
[16]:
text = ion.dumps(py_data, binary=False)
MAY_BE_BARE#
[17]:
ion_dict = ion.loads(text, value_model=ion.IonPyValueModel.MAY_BE_BARE)
print(f"{type(ion_dict) = }")
rprint(ion_dict)
type(ion_dict) = <class 'amazon.ion.simple_types.IonPyDict'>
{'id': 1, 'name': 'Alice', 'age': 25, 'weight': 94.7, 'bio': {'dob': '1990-01-01', 'address': '123 Main St.', 'hometown': <amazon.ion.simple_types.IonPyNull object at 0x10432b430>}, 'relationships': [{'name': 'Bob', 'relation': 'friend'}, {'name': 'Charlie', 'relation': 'father'}]}
[18]:
py_dict = dict(ion_dict)
rprint(py_dict)
{ 'id': 1, 'name': 'Alice', 'age': 25, 'weight': 94.7, 'bio': {'dob': '1990-01-01', 'address': '123 Main St.', 'hometown': <amazon.ion.simple_types.IonPyNull object at 0x10432b430>}, 'relationships': [{'name': 'Bob', 'relation': 'friend'}, {'name': 'Charlie', 'relation': 'father'}] }
[19]:
# Ion Int
print(f"{type(py_dict['age']) = }")
rprint(py_dict["age"])
type(py_dict['age']) = <class 'amazon.ion.simple_types.IonPyInt'>
25
[20]:
# Ion Float
print(f"{type(py_dict['weight']) = }")
rprint(py_dict["weight"])
type(py_dict['weight']) = <class 'amazon.ion.simple_types.IonPyFloat'>
94.7
[21]:
# Nested attribute is still ION Dict
print(f"{type(py_dict['bio']) = }")
rprint(py_dict["bio"])
type(py_dict['bio']) = <class 'amazon.ion.simple_types.IonPyDict'>
{'dob': '1990-01-01', 'address': '123 Main St.', 'hometown': <amazon.ion.simple_types.IonPyNull object at 0x10432b430>}
[22]:
# Same as ION List
print(f"{type(py_dict['relationships']) = }")
rprint(py_dict["relationships"])
type(py_dict['relationships']) = <class 'amazon.ion.simple_types.IonPyList'>
[{'name': 'Bob', 'relation': 'friend'}, {'name': 'Charlie', 'relation': 'father'}]
SYMBOL_AS_TEXT#
[23]:
ion_dict = ion.loads(text, value_model=ion.IonPyValueModel.SYMBOL_AS_TEXT)
print(f"{type(ion_dict) = }")
rprint(ion_dict)
type(ion_dict) = <class 'amazon.ion.simple_types.IonPyDict'>
{'id': 1, 'name': 'Alice', 'age': 25, 'weight': 94.7, 'bio': {'dob': '1990-01-01', 'address': '123 Main St.', 'hometown': <amazon.ion.simple_types.IonPyNull object at 0x1045a1d00>}, 'relationships': [{'name': 'Bob', 'relation': 'friend'}, {'name': 'Charlie', 'relation': 'father'}]}
[24]:
py_dict = dict(ion_dict)
rprint(py_dict)
{ 'id': 1, 'name': 'Alice', 'age': 25, 'weight': 94.7, 'bio': {'dob': '1990-01-01', 'address': '123 Main St.', 'hometown': <amazon.ion.simple_types.IonPyNull object at 0x1045a1d00>}, 'relationships': [{'name': 'Bob', 'relation': 'friend'}, {'name': 'Charlie', 'relation': 'father'}] }
[25]:
# Ion Int
print(f"{type(py_dict['age']) = }")
rprint(py_dict["age"])
type(py_dict['age']) = <class 'amazon.ion.simple_types.IonPyInt'>
25
[26]:
# Ion Float
print(f"{type(py_dict['weight']) = }")
rprint(py_dict["weight"])
type(py_dict['weight']) = <class 'amazon.ion.simple_types.IonPyFloat'>
94.7
[27]:
# Nested attribute is still ION Dict
print(f"{type(py_dict['bio']) = }")
rprint(py_dict["bio"])
type(py_dict['bio']) = <class 'amazon.ion.simple_types.IonPyDict'>
{'dob': '1990-01-01', 'address': '123 Main St.', 'hometown': <amazon.ion.simple_types.IonPyNull object at 0x1045a1d00>}
[28]:
# Same as ION List
print(f"{type(py_dict['relationships']) = }")
rprint(py_dict["relationships"])
type(py_dict['relationships']) = <class 'amazon.ion.simple_types.IonPyList'>
[{'name': 'Bob', 'relation': 'friend'}, {'name': 'Charlie', 'relation': 'father'}]
STRUCT_AS_STD_DICT#
[29]:
ion_dict = ion.loads(text, value_model=ion.IonPyValueModel.STRUCT_AS_STD_DICT)
print(f"{type(ion_dict) = }")
rprint(ion_dict)
type(ion_dict) = <class 'amazon.ion.simple_types.IonPyDict'>
{'id': 1, 'name': 'Alice', 'age': 25, 'weight': 94.7, 'bio': {'dob': '1990-01-01', 'address': '123 Main St.', 'hometown': <amazon.ion.simple_types.IonPyNull object at 0x1045b0100>}, 'relationships': [{'name': 'Bob', 'relation': 'friend'}, {'name': 'Charlie', 'relation': 'father'}]}
[30]:
py_dict = dict(ion_dict)
rprint(py_dict)
{ 'id': 1, 'name': 'Alice', 'age': 25, 'weight': 94.7, 'bio': {'dob': '1990-01-01', 'address': '123 Main St.', 'hometown': <amazon.ion.simple_types.IonPyNull object at 0x1045b0100>}, 'relationships': [{'name': 'Bob', 'relation': 'friend'}, {'name': 'Charlie', 'relation': 'father'}] }
[31]:
# Ion Int
print(f"{type(py_dict['age']) = }")
rprint(py_dict["age"])
type(py_dict['age']) = <class 'amazon.ion.simple_types.IonPyInt'>
25
[32]:
# Ion Float
print(f"{type(py_dict['weight']) = }")
rprint(py_dict["weight"])
type(py_dict['weight']) = <class 'amazon.ion.simple_types.IonPyFloat'>
94.7
[33]:
# Nested attribute is still ION Dict
print(f"{type(py_dict['bio']) = }")
rprint(py_dict["bio"])
type(py_dict['bio']) = <class 'amazon.ion.simple_types.IonPyDict'>
{'dob': '1990-01-01', 'address': '123 Main St.', 'hometown': <amazon.ion.simple_types.IonPyNull object at 0x1045b0100>}
[34]:
# Same as ION List
print(f"{type(py_dict['relationships']) = }")
rprint(py_dict["relationships"])
type(py_dict['relationships']) = <class 'amazon.ion.simple_types.IonPyList'>
[{'name': 'Bob', 'relation': 'friend'}, {'name': 'Charlie', 'relation': 'father'}]
Ion Type with Nativ Python Type#
[35]:
text = ion.dumps(py_data, binary=False)
ion_dict = ion.loads(text, value_model=ion.IonPyValueModel.STRUCT_AS_STD_DICT)
[36]:
ion_dict["age"] + 100
[36]:
125
[37]:
ion_dict["bio"]["hobby"] = "Swim"
ion_dict["bio"]
[37]:
{'dob': '1990-01-01', 'address': '123 Main St.', 'hometown': <amazon.ion.simple_types.IonPyNull object at 0x1045a1d00>, 'hobby': 'Swim'}
ION with Polars#
[52]:
import polars as pl
text = ion.dumps(py_data, binary=False)
ion_dict = ion.loads(text, value_model=ion.IonPyValueModel.MAY_BE_BARE)
[53]:
schema = {
"id": pl.Int64(),
"name": pl.Utf8(),
"age": pl.Int8(),
"weight": pl.Float64(),
"bio": pl.Struct({
"dob": pl.Utf8(),
"address": pl.Utf8(),
"hometown": pl.Utf8(),
}),
"relationships": pl.List(
pl.Struct({"name": pl.Utf8(), "relation": pl.Utf8()}),
),
}
[54]:
# This won't work
try:
df = pl.DataFrame([ion_dict], schema=schema)
except Exception as e:
print(f"Error: {e!r}")
Error: ShapeError('data does not match the number of columns')
[64]:
def converter(ion_dict):
py_dict = dict(ion_dict)
py_dict["bio"] = dict(py_dict["bio"])
py_dict["relationships"] = [
dict(dct)
for dct in py_dict["relationships"]
]
return py_dict
df = pl.DataFrame([converter(ion_dict)], schema=schema)
df
[64]:
shape: (1, 6)
| id | name | age | weight | bio | relationships |
|---|---|---|---|---|---|
| i64 | str | i8 | f64 | struct[3] | list[struct[2]] |
| 1 | "Alice" | 25 | 94.7 | {"1990-01-01","123 Main St.","<amazon.ion.simple_types.IonPyNull object at 0x105dd83d0>"} | [{"Bob","friend"}, {"Charlie","father"}] |
[61]:
import io
buffer = io.BytesIO()
df.write_parquet(buffer)
df1 = pl.read_parquet(buffer.getvalue())
df1
[61]:
shape: (1, 6)
| id | name | age | weight | bio | relationships |
|---|---|---|---|---|---|
| i64 | str | i8 | f64 | struct[3] | list[struct[2]] |
| 1 | "Alice" | 25 | 94.7 | {"1990-01-01","123 Main St.","<amazon.ion.simple_types.IonPyNull object at 0x105dd83d0>"} | [{"Bob","friend"}, {"Charlie","father"}] |
[ ]: