As we all know pandas “json_normalize” which works great in taking a JSON Data, however, nested it is and convert’s it to the usable pandas dataframe.
pandas json_normalize documentation
Now If you want the reverse operation which takes that same Dataframe and convert back to originals JSON format, for example: for pushing data to elastic search DB or to store in Mongo DB or JSON File for Processing it later.
Here is a nice handy function which does exactly that, Thanks to Parsa T on StackOverflow for sharing this awesome utility function.
def make_formatted_dict(my_dict, key_arr, val):
"""
Set val at path in my_dict defined by the string (or serializable object) array key_arr
"""
current = my_dict
for i in range(len(key_arr)):
key = key_arr[i]
if key not in current:
if i == len(key_arr)-1:
current[key] = val
else:
current[key] = {}
else:
if type(current[key]) is not dict:
print("Given dictionary is not compatible with key structure requested")
raise ValueError("Dictionary key already occupied")
current = current[key]
return my_dict
def df_to_formatted_json(df, sep="."):
result = []
for _, row in df.iterrows():
parsed_row = {}
for idx, val in row.iteritems():
keys = idx.split(sep)
parsed_row = make_formatted_dict(parsed_row, keys, val)
result.append(parsed_row)
return result
after backward converting you might need to drop empty columns from your generated JSON data, therefore, use below function that will remove empty columns:
import numpy as np
def remove_empties_from_dict(a_dict):
new_dict = {}
for k, v in a_dict.items():
if isinstance(v, dict):
v = remove_empties_from_dict(v)
if v is not None and v is not np.nan and v is not "":
new_dict[k] = v
return new_dict
def make_formatted_dict(my_dict, key_arr, val):
"""
Set val at path in my_dict defined by the string (or serializable object) array key_arr
"""
current = my_dict
for i in range(len(key_arr)):
key = key_arr[i]
if key not in current:
if i == len(key_arr)-1:
current[key] = val
else:
current[key] = {}
else:
if type(current[key]) is not dict:
print("Given dictionary is not compatible with key structure requested")
raise ValueError("Dictionary key already occupied")
current = current[key]
return remove_empties_from_dict(my_dict)
def df_to_formatted_json(df, sep="."):
result = []
for _, row in df.iterrows():
parsed_row = {}
for idx, val in row.iteritems():
keys = idx.split(sep)
parsed_row = make_formatted_dict(parsed_row, keys, val)
result.append(parsed_row)
return result
This Is Very Handy Utility Function.