'How to json_normalize nested json arrays
I have the complex json structure as below. I am able to json_normalize only first level of array (MatchingReleases.MatchingRelease). whereas I have one more json array which is not getting normalized (MatchingTheatres.MatchingTheatre.HhMatchingTheatre.Theatre.HhSalaries.Salary) as I am getting Nan even though the respective levels have values. Not sure if there are any limitations in json normalize with respect to the number of levels. Should I give any more meta information?
My approach:
matchingReleaseDf = pd.DataFrame(pd.json_normalize(dataSourceOutputDict["MovieEnquiryResponseType"],
record_path = ["MatchingReleases", "MatchingRelease"], meta = ["MatchingReleases", "MatchingRelease", "MatchingTheatres", "MatchingTheatre", "Theatre", "HHSalaries", "Salary"],
errors = "ignore"))
{
"MovieEnquiryResponseType": {
"SearchedMovie": {
"SearchElements": {
"Movie": {
"MoviePartName1": "Home",
"MoviePartName2": "Alone",
"DateReleased": "1973-03-07T00:00:00"
},
"Address": {
"AddressValidation": "AsInput",
"Unformatted": {
"Line1": "456",
"Line2": "Foxhall Road",
"Line3": "Newyork",
"Line4": null,
"Line5": null,
"Line6": null,
"PostCode": "44306",
"Country": "United States"
}
}
}
},
"MatchingReleases": {
"MatchingRelease": [
{
"ReleaseInfo": {
"ReleaseNumber": "1",
"ReleaseDate": "2021-10-01T00:00:00"
},
"MatchingTheatres": {
"MatchingTheatre": {
"MatchInfo": {
"MatchingTheatreId": "408794d4-a5e8-4ca1-b53a-320103e23b1f",
"MatchType": "F",
"LinkedAddressMatch": "false"
},
"HhMatchingTheatre": {
"MatchingMovie": {
"MovieRole": "PH",
"MovieNumber": "1",
"MovieCode": "D"
},
"Theatre": {
"Provider": {
"ContactDetails": "01473 212422||-",
"ProducerId": "I12434",
"ProducerDescription": "RedJeyant",
"DelAutId": "I12434",
"DelAutDescription": "RedJeyant",
"DelAutType": "2"
},
"Production": {
"ProductionType": "A",
"ProductionNumber": "123123",
"CollectiveProductionIndicator": "N",
"ProductionNumberID": "Standard",
"InceptionDate": "2021-06-01T00:00:00Z",
"PeriodEndDate": "2021-11-05T00:00:00Z",
"RiskAddress": {
"HouseNr": "456",
"Street1": "Foxhall Road",
"City": "Newyork",
"County": "Suffolk",
"PostCode": "44306",
"Country": "United States",
"AddressValidation": "PafValid"
}
},
"TheatreDetails": {
"TheatreNr": "50000",
"LossCause": "10",
"TheatreStatus": "O1",
"LossDate": "2021-10-01T00:00:00Z",
"LossSetupDate": "2021-10-02T00:00:00Z",
"ReleaseDescription": "This is a test Theatre by Lorenz",
"CatastropheRelated": "U"
},
"HhSalaries": {
"Salary": [
{
"SalaryCode": "A",
"SalaryAmount": "100.00"
},
{
"SalaryCode": "M",
"SalaryAmount": "100.00"
}
]
},
"Movies": {
"ProductionHolders": {
"ProductionHolder": {
"MovieCode": "D",
"MovieNumber": "1",
"Person": {
"Name": {
"Title": "1",
"MoviePartName1": "Home",
"MoviePartName2": "Theatre",
"DateReleased": "1973-03-07T00:00:00Z",
"Gender": "M"
},
"Address": {
"HouseNr": "456",
"Street1": "Foxhall Road",
"City": "Newyork",
"County": "Suffolk",
"PostCode": "44306",
"Country": "United States",
"AddressValidation": "PafValid"
},
"Email": "[email protected]",
"BankDetails": {
"SortCode": "567832",
"AccountNumber": "8976543"
},
"SalaryAmount": "123.00"
}
}
}
}
}
}
}
}
},
{
"ReleaseInfo": {
"ReleaseNumber": "2",
"ReleaseDate": "2021-10-02T00:00:00"
},
"MatchingTheatres": {
"MatchingTheatre": {
"MatchInfo": {
"MatchingTheatreId": "3d491762-c25c-406b-8870-6785d7e8a408",
"MatchType": "F",
"LinkedAddressMatch": "false"
},
"HhMatchingTheatre": {
"MatchingMovie": {
"MovieRole": "PH",
"MovieNumber": "1",
"MovieCode": "D"
},
"Theatre": {
"Provider": {
"ContactDetails": "01473 212422||-",
"ProducerId": "I12434",
"ProducerDescription": "RedJeyant",
"DelAutId": "I12434",
"DelAutDescription": "RedJeyant",
"DelAutType": "2"
},
"Production": {
"ProductionType": "B",
"ProductionNumber": "7567",
"CollectiveProductionIndicator": "N",
"ProductionNumberID": "Standard",
"InceptionDate": "2021-09-26T00:00:00Z",
"PeriodEndDate": "2021-12-11T00:00:00Z",
"RiskAddress": {
"HouseNr": "456",
"Street1": "Foxhall Road",
"City": "Newyork",
"County": "Suffolk",
"PostCode": "44306",
"Country": "United States",
"AddressValidation": "PafValid"
}
},
"TheatreDetails": {
"TheatreNr": "50001",
"LossCause": "11",
"TheatreStatus": "O1",
"LossDate": "2021-10-02T00:00:00Z",
"LossSetupDate": "2021-10-10T00:00:00Z",
"ReleaseDescription": "This is a second Theatre by Lorenz",
"CatastropheRelated": "U"
},
"HhSalaries": {
"Salary": [
{
"SalaryCode": "B",
"SalaryAmount": "200.00"
},
{
"SalaryCode": "G",
"SalaryAmount": "200.00"
}
]
},
"Movies": {
"ProductionHolders": {
"ProductionHolder": {
"MovieCode": "D",
"MovieNumber": "1",
"Person": {
"Name": {
"Title": "1",
"MoviePartName1": "Home",
"MoviePartName2": "Theatre",
"DateReleased": "1973-03-07T00:00:00Z",
"Gender": "M"
},
"Address": {
"HouseNr": "456",
"Street1": "Foxhall Road",
"City": "Newyork",
"County": "Suffolk",
"PostCode": "44306",
"Country": "United States",
"AddressValidation": "PafValid"
},
"Email": "[email protected]",
"BankDetails": {
"SortCode": "110000",
"AccountNumber": "20658200"
},
"SalaryAmount": "456.00"
}
}
}
}
}
}
}
}
},
{
"ReleaseInfo": {
"ReleaseNumber": "3",
"ReleaseDate": "2021-10-03T00:00:00"
},
"MatchingTheatres": {
"MatchingTheatre": {
"MatchInfo": {
"MatchingTheatreId": "822838f9-927a-4e40-91ff-815e3f75b75b",
"MatchType": "F",
"LinkedAddressMatch": "false"
},
"HhMatchingTheatre": {
"MatchingMovie": {
"MovieRole": "PH",
"MovieNumber": "1",
"MovieCode": "D"
},
"Theatre": {
"Provider": {
"ContactDetails": "235678123||-",
"ProducerId": "I12434",
"ProducerDescription": "RedJeyant",
"DelAutId": "I12434",
"DelAutDescription": "RedJeyant",
"DelAutType": "2"
},
"Production": {
"ProductionType": "H",
"ProductionNumber": "657567",
"CollectiveProductionIndicator": "N",
"ProductionNumberID": "Standard",
"InceptionDate": "2021-10-01T00:00:00Z",
"PeriodEndDate": "2021-11-04T00:00:00Z",
"RiskAddress": {
"HouseNr": "456",
"Street1": "Foxhall Road",
"City": "Newyork",
"County": "Suffolk",
"PostCode": "44306",
"Country": "United States",
"AddressValidation": "PafValid"
}
},
"TheatreDetails": {
"TheatreNr": "50002",
"LossCause": "12",
"TheatreStatus": "O1",
"LossDate": "2021-10-03T00:00:00Z",
"LossSetupDate": "2021-10-12T00:00:00Z",
"ReleaseDescription": "This is a third Theatre by Lorenz",
"CatastropheRelated": "U"
},
"HhSalaries": {
"Salary": [
{
"SalaryCode": "D",
"SalaryAmount": "300.00"
},
{
"SalaryCode": "E",
"SalaryAmount": "300.00"
}
]
},
"Movies": {
"ProductionHolders": {
"ProductionHolder": {
"MovieCode": "D",
"MovieNumber": "1",
"Person": {
"Name": {
"Title": "1",
"MoviePartName1": "Home",
"MoviePartName2": "Theatre",
"DateReleased": "1973-03-07T00:00:00Z",
"Gender": "M"
},
"Address": {
"HouseNr": "456",
"Street1": "Foxhall Road",
"City": "Newyork",
"County": "Suffolk",
"PostCode": "44306",
"Country": "United States",
"AddressValidation": "PafValid"
},
"Email": "[email protected]",
"BankDetails": {
"SortCode": "110203",
"AccountNumber": "20658211"
},
"SalaryAmount": "789.00"
}
}
}
}
}
}
}
}
}
]
}
}
}
Solution 1:[1]
You only need record_path
matchingReleaseDf = pd.json_normalize(dataSourceOutputDict["MovieEnquiryResponseType"],
record_path = ["MatchingReleases", "MatchingRelease"],
errors = "ignore")
To flatten salary
column, you can
matchingReleaseDf = matchingReleaseDf.explode('MatchingTheatres.MatchingTheatre.HhMatchingTheatre.Theatre.HhSalaries.Salary').reset_index()
salary = pd.json_normalize(matchingReleaseDf['MatchingTheatres.MatchingTheatre.HhMatchingTheatre.Theatre.HhSalaries.Salary']).add_prefix('MatchingTheatres.MatchingTheatre.HhMatchingTheatre.Theatre.HhSalaries.Salary.')
out = pd.concat([matchingReleaseDf, salary], axis=1).drop('MatchingTheatres.MatchingTheatre.HhMatchingTheatre.Theatre.HhSalaries.Salary', axis=1)
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
Solution | Source |
---|---|
Solution 1 |