'Multi-level list to data frame in R
I'd like to convert the below list to a data frame but I'm failing at doing it.
The list is taken from Microsoft Azure's API listing all resource types with technical information about them (link: https://docs.microsoft.com/en-us/rest/api/compute/resource-skus/list). The list is similar to this one:
library(tidyverse)
input <- list(value = list(
list(resourceType = "rt1", name = "name1", tier = "tier1", size = "size1", family = "family1", capabilities = list(list(name = "cap_name1", value = "value1_1"), list(name = "cap_name2", value = "value1_2"))),
list(resourceType = "rt1", name = "name2", tier = "tier2", size = "size2", family = "family2", capabilities = list(list(name = "cap_name2", value = "value2_2"), list(name = "cap_name3", value = "value2_3"))),
list(resourceType = "rt1", name = "name3", tier = "tier3", size = "size3", family = "family3", capabilities = list(list(name = "cap_name1", value = "value3_1"), list(name = "cap_name3", value = "value3_3"))),
list(resourceType = "rt1", name = "name4", tier = "tier4", size = "size4", family = "family4", capabilities = list(list(name = "cap_name1", value = "value4_1"), list(name = "cap_name2", value = "value4_2"), list(name = "cap_name3", value = "value4_3"))),
list(resourceType = "rt2", name = "name5", capabilities = list(list(name = "cap_name4", value = "value5_5")))
))
expected_output <-
tibble(
resourceType = c("rt1", "rt1", "rt1", "rt1"),
name = c("name1", "name2", "name3", "name4"),
tier = c("tier1", "tier2", "tier3", "tier4"),
size = c("size1", "size2", "size3", "size4"),
family = c("family1", "family2", "family3", "family4"),
cap_name1 = c("value1_1", NA, "value3_1", "value4_1"),
cap_name2 = c("value1_2", "value2_2", NA, "value4_2"),
cap_name3 = c(NA, "value2_3", "value3_3", "value4_3"),
)
expected_output
#> # A tibble: 4 × 8
#> resourceType name tier size family cap_name1 cap_name2 cap_name3
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 rt1 name1 tier1 size1 family1 value1_1 value1_2 <NA>
#> 2 rt1 name2 tier2 size2 family2 <NA> value2_2 value2_3
#> 3 rt1 name3 tier3 size3 family3 value3_1 <NA> value3_3
#> 4 rt1 name4 tier4 size4 family4 value4_1 value4_2 value4_3
Created on 2022-05-12 by the reprex package (v2.0.1)
I have two problems here:
- I don't know how to filter only
resourceType == "rt1"
. I know how to filter it this way:
input %>% pluck("value") %>% keep(~.x$resourceType == "rt1")
but I'd like to somehow do it without pluck
step.
- The main problem is to transform it from
input
toexpected_output
. I found this complicated way to list all capabilities:
capabilities <- input %>% pluck("value") %>% keep(~.x$resourceType == "rt1") %>% transpose() %>% as_tibble() %>% pull(capabilities)
all_capabilities_names <- capabilities %>% map_depth(1, ~ map_chr(.x, "name")) %>% purrr::flatten_chr() %>% unique()
all_capabilities_names
#> [1] "cap_name1" "cap_name2" "cap_name3"
Created on 2022-05-12 by the reprex package (v2.0.1)
I'm stuck there because I have no idea how to map value
s to correct columns.
Working with lists is always a nightmare to me. Any help appreciated : )
Solution 1:[1]
With a little bit of purrr
y magic, you can sort of dig around to make tibbles at various levels and unnest/reduce
them all the way up. With a little bit of playing around:
library(tidyverse)
input <- list(value = list(
list(resourceType = "rt1", name = "name1", tier = "tier1", size = "size1", family = "family1", capabilities = list(list(name = "cap_name1", value = "value1_1"), list(name = "cap_name2", value = "value1_2"))),
list(resourceType = "rt1", name = "name2", tier = "tier2", size = "size2", family = "family2", capabilities = list(list(name = "cap_name2", value = "value2_2"), list(name = "cap_name3", value = "value2_3"))),
list(resourceType = "rt1", name = "name3", tier = "tier3", size = "size3", family = "family3", capabilities = list(list(name = "cap_name1", value = "value3_1"), list(name = "cap_name3", value = "value3_3"))),
list(resourceType = "rt1", name = "name4", tier = "tier4", size = "size4", family = "family4", capabilities = list(list(name = "cap_name1", value = "value4_1"), list(name = "cap_name2", value = "value4_2"), list(name = "cap_name3", value = "value4_3"))),
list(resourceType = "rt2", name = "name5", capabilities = list(list(name = "cap_name4", value = "value5_4")))
))
output_test <- input[[1]] %>%
map(as_tibble) %>%
reduce(bind_rows) %>%
mutate(capabilities = map(capabilities, as_tibble)) %>%
unnest(capabilities, names_repair = "unique") %>%
filter(resourceType == "rt1") %>%
pivot_wider(names_from = `name...6`, values_from = value) %>%
rename(name = `name...2`)
#> New names:
#> • `name` -> `name...2`
#> • `name` -> `name...6`
output_test
#> # A tibble: 4 × 8
#> resourceType name tier size family cap_name1 cap_name2 cap_name3
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 rt1 name1 tier1 size1 family1 value1_1 value1_2 <NA>
#> 2 rt1 name2 tier2 size2 family2 <NA> value2_2 value2_3
#> 3 rt1 name3 tier3 size3 family3 value3_1 <NA> value3_3
#> 4 rt1 name4 tier4 size4 family4 value4_1 value4_2 value4_3
And just to see if it's worked:
expected_output <-
tibble(
resourceType = c("rt1", "rt1", "rt1", "rt1"),
name = c("name1", "name2", "name3", "name4"),
tier = c("tier1", "tier2", "tier3", "tier4"),
size = c("size1", "size2", "size3", "size4"),
family = c("family1", "family2", "family3", "family4"),
cap_name1 = c("value1_1", NA, "value3_1", "value4_1"),
cap_name2 = c("value1_2", "value2_2", NA, "value4_2"),
cap_name3 = c(NA, "value2_3", "value3_3", "value4_3"),
)
assertthat::are_equal(expected_output, output_test)
#> [1] TRUE
Edit - encountering another error
If there is an empty vector in one of the lists then turning to a tibble wont work. You can discard that vector and it'll code as NA
when binding all rows together:
library(tidyverse)
input <- list(value = list(
list(resourceType = "rt1", name = "name1", tier = vector("character"), size = "size1", family = "family1", capabilities = list(list(name = "cap_name1", value = "value1_1"), list(name = "cap_name2", value = "value1_2"))),
list(resourceType = "rt1", name = "name2", tier = "tier2", size = "size2", family = "family2", capabilities = list(list(name = "cap_name2", value = "value2_2"), list(name = "cap_name3", value = "value2_3"))),
list(resourceType = "rt1", name = "name3", tier = "tier3", size = "size3", family = "family3", capabilities = list(list(name = "cap_name1", value = "value3_1"), list(name = "cap_name3", value = "value3_3"))),
list(resourceType = "rt1", name = "name4", tier = "tier4", size = "size4", family = "family4", capabilities = list(list(name = "cap_name1", value = "value4_1"), list(name = "cap_name2", value = "value4_2"), list(name = "cap_name3", value = "value4_3"))),
list(resourceType = "rt2", name = "name5", capabilities = list(list(name = "cap_name4", value = "value5_4")))
))
input$value %>%
map(~ discard(.x, is_empty) %>% as_tibble) %>%
reduce(bind_rows) %>%
mutate(capabilities = map(capabilities, as_tibble)) %>%
rename(value_name = name) %>%
unnest(capabilities, names_repair = "unique") %>%
filter(resourceType == "rt1") %>%
pivot_wider(names_from = `name`, values_from = value)
#> # A tibble: 4 × 8
#> resourceType value_name size family tier cap_name1 cap_name2 cap_name3
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 rt1 name1 size1 family1 <NA> value1_1 value1_2 <NA>
#> 2 rt1 name2 size2 family2 tier2 <NA> value2_2 value2_3
#> 3 rt1 name3 size3 family3 tier3 value3_1 <NA> value3_3
#> 4 rt1 name4 size4 family4 tier4 value4_1 value4_2 value4_3
Created on 2022-05-14 by the reprex package (v2.0.1)
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
Solution | Source |
---|---|
Solution 1 |