This project has two tasks in their workflow, a survey task and a follow up question task asking about the weather. The survey task also has subquestions that ask the volunteer to select all that apply, meaning we have an extra step to flatten out the annotations

library(tidyjson)
library(magrittr)
library(jsonlite)
library(dplyr)
library(stringr)
library(tidyr)
library(lubridate)

Take a peek at the data structure. There are two tasks, and within the survey task, only some species have subquestions.

############### SURVEY TASK
head(jdata)
for (i in 15:17) {
     jdata$annotations[i] %>% prettify %>% print
}
[
    {
        "task": "T3",
        "value": [
            {
                "choice": "DEER",
                "answers": {
                    "HOWMANYANIMALSDOYOUSEE": "1"
                },
                "filters": {

                }
            }
        ]
    },
    {
        "task": "T2",
        "task_label": "What are the weather conditions in this image?",
        "value": "Snow or Snow-Covered"
    }
]
 
[
    {
        "task": "T3",
        "value": [
            {
                "choice": "DEER",
                "answers": {
                    "HOWMANYANIMALSDOYOUSEE": "1"
                },
                "filters": {

                }
            }
        ]
    },
    {
        "task": "T2",
        "task_label": "What are the weather conditions in this image?",
        "value": "Clear"
    }
]
 
[
    {
        "task": "T3",
        "value": [
            {
                "choice": "RACCOON",
                "answers": {
                    "WHATISTHEANIMALSDOING": [
                        "WALKING"
                    ],
                    "HOWMANYANIMALSDOYOUSEE": "1"
                },
                "filters": {

                }
            }
        ]
    },
    {
        "task": "T2",
        "task_label": "What are the weather conditions in this image?",
        "value": "Raining"
    }
]
 
# preliminary flat
basic_flat_with_values <- jdata %>% 
     select(., subject_ids, classification_id, workflow_version, annotations) %>%
     as.tbl_json(json.column = "annotations") %>%
     gather_array(column.name = "task_index") %>% # really important for joining later
     spread_values(task = jstring("task"), task_label = jstring("task_label"), value = jstring("value")) 
basic_flat_with_values %>% data.frame %>% head
basic_summary %>% data.frame %>% group_by(., workflow_version, key, task) %>% summarise(., n())
#--------------------------------------------------------------------------------#
# split into survey vs. non-survey data frames. Question is flattened and can be exported as a separate file now.
survey <- basic_flat_with_values %>% filter(., task == "T3")
question <- basic_flat_with_values %>% filter(., task == "T2") 
###----------------------------### SURVEY FLATTENING ###----------------------------### 
# grab choices; append embedded array values just for tracking
with_choices <- survey %>%
     enter_object("value") %>% json_lengths(column.name = "total_species") %>% 
     gather_array(column.name = "species_index") %>% #each classification is an array. so you need to gather up multiple arrays.
     spread_values(choice = jstring("choice")) 
# if there are multiple species ID'd, there will be multiple rows and array.index will be >1
with_choices %>% data.frame %>% head
with_choices %>% summarise(., n_distinct(subject_ids), n_distinct(classification_id))
with_choices %>% group_by(., classification_id) %>% summarise(., count = n(), max(species_index)) %>% arrange(., -count)

Let’s start the process of grabbing and flattening the nested data. Note that this section requires you to reference the specific suquestion labels, so if they change throughout the life of your project, you MUST create a script to handle the revisions.

# grab answers. Note that the spread_values() function needs to be customized per team and subquestion label.
with_answers <- with_choices %>% 
     enter_object("answers") %>% 
     spread_values(how_many = jstring("HOWMANYANIMALSDOYOUSEE")) %>%
     enter_object("WHATISTHEANIMALSDOING") %>% #enter into the list of behaviors
     gather_array("behavior_index") %>% #gather into one behavior per row
     append_values_string("behavior") 
# note that behaviors are listed in a "long" format, but this is probably unwieldy.
with_answers %>% data.frame %>% head

Let’s spread out the answers into individual columns with 1/0 indicators for whether or not that behavior was identified.

# spread answers (into separate columns): have to drop behavior index or else the rows won't combine!
with_answers_spread <- with_answers %>% data.frame %>% 
     select(., -behavior_index) %>%
     mutate(., behavior_present = 1) %>%
     spread(., key = behavior, value = behavior_present, fill = 0)
with_answers_spread %>% data.frame %>% head
with_answers_spread %>% summarise(., n_distinct(subject_ids), n_distinct(classification_id))

You could, also, in theory, create a column that contains an actual list of the behaviors. Note that the values look similar to how tidyjson would display them, but they are actual lists instead of character strings that say “list(…)”

# spread answers (into a list)
test <- with_answers %>% data.frame %>% 
     select(., -behavior_index) %>% nest(behavior)
with_answers %>% data.frame %>% head
# in theory, you want to tie all of these back together just in case there are missing values
add_choices <- left_join(survey, with_choices)
Joining, by = c("subject_ids", "classification_id", "workflow_version", "task_index", "task", "task_label", "value")
tot <- left_join(add_choices, with_answers_spread)
Joining, by = c("subject_ids", "classification_id", "workflow_version", "task_index", "task", "task_label", "value", "total_species", "species_index", "choice")
flat_data <- tot %>% select(., -task_index, -task_label, -value)
flat_data %>% data.frame %>% head
#check that the number of distinct subject IDs and classification IDs is still the same
flat_data %>% summarise(., n_distinct(subject_ids), n_distinct(classification_id), n()) #flattened,
jdata %>% summarise(., n_distinct(subject_ids), n_distinct(classification_id), n()) #original
#save your files for aggregation!
write.csv(flat_data, file = "../data/T3-flattened.csv")
write.csv(question, file = "../data/T2-flattened.csv")
LS0tCnRpdGxlOiAiSlNPTiBwYXJzaW5nOiBzdXJ2ZXkgdGFza3M6IHdpdGggbXVsdGlwbGUtY2hvaWNlIHN1YnF1ZXN0aW9ucyIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKVGhpcyBwcm9qZWN0IGhhcyB0d28gdGFza3MgaW4gdGhlaXIgd29ya2Zsb3csIGEgc3VydmV5IHRhc2sgYW5kIGEgZm9sbG93IHVwIHF1ZXN0aW9uIHRhc2sgYXNraW5nIGFib3V0IHRoZSB3ZWF0aGVyLiBUaGUgc3VydmV5IHRhc2sgYWxzbyBoYXMgc3VicXVlc3Rpb25zIHRoYXQgYXNrIHRoZSB2b2x1bnRlZXIgdG8gc2VsZWN0IGFsbCB0aGF0IGFwcGx5LCBtZWFuaW5nIHdlIGhhdmUgYW4gZXh0cmEgc3RlcCB0byBmbGF0dGVuIG91dCB0aGUgYW5ub3RhdGlvbnMKCmBgYHtyfQpsaWJyYXJ5KHRpZHlqc29uKQpsaWJyYXJ5KG1hZ3JpdHRyKQpsaWJyYXJ5KGpzb25saXRlKQpsaWJyYXJ5KGRwbHlyKQpsaWJyYXJ5KHN0cmluZ3IpCmxpYnJhcnkodGlkeXIpCmxpYnJhcnkobHVicmlkYXRlKQpgYGAKYGBge3J9CgpqZGF0YV91bmZpbHRlcmVkIDwtIHJlYWQuY3N2KGZpbGUgPSAiLi4vZGF0YS9taWNoaWdhbi16b29taW4tY2xhc3NpZmljYXRpb25zLmNzdiIsIHN0cmluZ3NBc0ZhY3RvcnMgPSBGKQoKIyB5b3UnZCBwcm9iYWJseSBuZWVkIHRvIGluY2x1ZGUgbXVsdGlwbGUgdmVyc2lvbnMgKGFzIHRoZXNlIGxpa2VseSBoYXZlIG1pbm9yIHRleHQgY2hhbmdlcywgYnV0IGZvciB0aGlzIGRlbW8gd2UnbGwgY2hvb3NlIDQ2My41NSkKamRhdGFfdW5maWx0ZXJlZCAlPiUgbXV0YXRlKC4sIGNyZWF0ZWRfYXQgPSB5bWRfaG1zKGNyZWF0ZWRfYXQpKSAlPiUgCiAgICAgZ3JvdXBfYnkoLiwgd29ya2Zsb3dfaWQsIHdvcmtmbG93X3ZlcnNpb24pICU+JSBzdW1tYXJpc2UoLiwgbWF4KGNyZWF0ZWRfYXQpLCBuKCkpICU+JSBoZWFkCgoKamRhdGEgPC0gamRhdGFfdW5maWx0ZXJlZCAlPiUgZmlsdGVyKC4sIHdvcmtmbG93X3ZlcnNpb24gPT0gNDYzLjU1KSAlPiUgaGVhZCguLCBuID0gNTAwMCkKamRhdGEgJT4lIHN1bW1hcmlzZSguLCBuX2Rpc3RpbmN0KHN1YmplY3RfaWRzKSwgbl9kaXN0aW5jdChjbGFzc2lmaWNhdGlvbl9pZCksIG5fZGlzdGluY3Qod29ya2Zsb3dfdmVyc2lvbikpCgpgYGAKClRha2UgYSBwZWVrIGF0IHRoZSBkYXRhIHN0cnVjdHVyZS4gVGhlcmUgYXJlIHR3byB0YXNrcywgYW5kIHdpdGhpbiB0aGUgc3VydmV5IHRhc2ssIG9ubHkgc29tZSBzcGVjaWVzIGhhdmUgc3VicXVlc3Rpb25zLgpgYGB7cn0KIyMjIyMjIyMjIyMjIyMjIFNVUlZFWSBUQVNLCmhlYWQoamRhdGEpCmZvciAoaSBpbiAxNToxNykgewogICAgIGpkYXRhJGFubm90YXRpb25zW2ldICU+JSBwcmV0dGlmeSAlPiUgcHJpbnQKfQpgYGAKCmBgYHtyfQojIHByZWxpbWluYXJ5IGZsYXQKCmJhc2ljX2ZsYXRfd2l0aF92YWx1ZXMgPC0gamRhdGEgJT4lIAogICAgIHNlbGVjdCguLCBzdWJqZWN0X2lkcywgY2xhc3NpZmljYXRpb25faWQsIHdvcmtmbG93X3ZlcnNpb24sIGFubm90YXRpb25zKSAlPiUKICAgICBhcy50YmxfanNvbihqc29uLmNvbHVtbiA9ICJhbm5vdGF0aW9ucyIpICU+JQogICAgIGdhdGhlcl9hcnJheShjb2x1bW4ubmFtZSA9ICJ0YXNrX2luZGV4IikgJT4lICMgcmVhbGx5IGltcG9ydGFudCBmb3Igam9pbmluZyBsYXRlcgogICAgIHNwcmVhZF92YWx1ZXModGFzayA9IGpzdHJpbmcoInRhc2siKSwgdGFza19sYWJlbCA9IGpzdHJpbmcoInRhc2tfbGFiZWwiKSwgdmFsdWUgPSBqc3RyaW5nKCJ2YWx1ZSIpKSAKCmJhc2ljX2ZsYXRfd2l0aF92YWx1ZXMgJT4lIGRhdGEuZnJhbWUgJT4lIGhlYWQKCmJhc2ljX3N1bW1hcnkgJT4lIGRhdGEuZnJhbWUgJT4lIGdyb3VwX2J5KC4sIHdvcmtmbG93X3ZlcnNpb24sIGtleSwgdGFzaykgJT4lIHN1bW1hcmlzZSguLCBuKCkpCmBgYAoKYGBge3J9CgojLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0jCiMgc3BsaXQgaW50byBzdXJ2ZXkgdnMuIG5vbi1zdXJ2ZXkgZGF0YSBmcmFtZXMuIFF1ZXN0aW9uIGlzIGZsYXR0ZW5lZCBhbmQgY2FuIGJlIGV4cG9ydGVkIGFzIGEgc2VwYXJhdGUgZmlsZSBub3cuCnN1cnZleSA8LSBiYXNpY19mbGF0X3dpdGhfdmFsdWVzICU+JSBmaWx0ZXIoLiwgdGFzayA9PSAiVDMiKQpxdWVzdGlvbiA8LSBiYXNpY19mbGF0X3dpdGhfdmFsdWVzICU+JSBmaWx0ZXIoLiwgdGFzayA9PSAiVDIiKSAKCiMjIy0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0jIyMgU1VSVkVZIEZMQVRURU5JTkcgIyMjLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLSMjIyAKCiMgZ3JhYiBjaG9pY2VzOyBTcGVjaWVzX2luZGV4IGxpc3RzIGhvdyBtYW55IHNwZWNpZXMgd2VyZSByZWNvcmRlZCBpbiBhIGdpdmVuIGNsYXNzaWZpY2F0aW9uLiAoVXN1YWxseSBtYXhlcyBhdCAyLi4uCndpdGhfY2hvaWNlcyA8LSBzdXJ2ZXkgJT4lCiAgICAgZW50ZXJfb2JqZWN0KCJ2YWx1ZSIpICU+JSBqc29uX2xlbmd0aHMoY29sdW1uLm5hbWUgPSAidG90YWxfc3BlY2llcyIpICU+JSAKICAgICBnYXRoZXJfYXJyYXkoY29sdW1uLm5hbWUgPSAic3BlY2llc19pbmRleCIpICU+JSAjZWFjaCBjbGFzc2lmaWNhdGlvbiBpcyBhbiBhcnJheS4gc28geW91IG5lZWQgdG8gZ2F0aGVyIHVwIG11bHRpcGxlIGFycmF5cy4KICAgICBzcHJlYWRfdmFsdWVzKGNob2ljZSA9IGpzdHJpbmcoImNob2ljZSIpKSAKCiMgaWYgdGhlcmUgYXJlIG11bHRpcGxlIHNwZWNpZXMgSUQnZCwgdGhlcmUgd2lsbCBiZSBtdWx0aXBsZSByb3dzIGFuZCBhcnJheS5pbmRleCB3aWxsIGJlID4xCndpdGhfY2hvaWNlcyAlPiUgZGF0YS5mcmFtZSAlPiUgaGVhZAp3aXRoX2Nob2ljZXMgJT4lIHN1bW1hcmlzZSguLCBuX2Rpc3RpbmN0KHN1YmplY3RfaWRzKSwgbl9kaXN0aW5jdChjbGFzc2lmaWNhdGlvbl9pZCkpCmBgYAoKTGV0J3Mgc3RhcnQgdGhlIHByb2Nlc3Mgb2YgZ3JhYmJpbmcgYW5kIGZsYXR0ZW5pbmcgdGhlIG5lc3RlZCBkYXRhLiBOb3RlIHRoYXQgdGhpcyBzZWN0aW9uIHJlcXVpcmVzIHlvdSB0byByZWZlcmVuY2UgdGhlIHNwZWNpZmljIHN1cXVlc3Rpb24gbGFiZWxzLCBzbyBpZiB0aGV5IGNoYW5nZSB0aHJvdWdob3V0IHRoZSBsaWZlIG9mIHlvdXIgcHJvamVjdCwgeW91IE1VU1QgY3JlYXRlIGEgc2NyaXB0IHRvIGhhbmRsZSB0aGUgcmV2aXNpb25zLgpgYGB7cn0KIyBncmFiIGFuc3dlcnMuIE5vdGUgdGhhdCB0aGUgc3ByZWFkX3ZhbHVlcygpIGZ1bmN0aW9uIG5lZWRzIHRvIGJlIGN1c3RvbWl6ZWQgcGVyIHRlYW0gYW5kIHN1YnF1ZXN0aW9uIGxhYmVsLgoKd2l0aF9hbnN3ZXJzIDwtIHdpdGhfY2hvaWNlcyAlPiUgCiAgICAgZW50ZXJfb2JqZWN0KCJhbnN3ZXJzIikgJT4lIAogICAgIHNwcmVhZF92YWx1ZXMoaG93X21hbnkgPSBqc3RyaW5nKCJIT1dNQU5ZQU5JTUFMU0RPWU9VU0VFIikpICU+JQogICAgIGVudGVyX29iamVjdCgiV0hBVElTVEhFQU5JTUFMU0RPSU5HIikgJT4lICNlbnRlciBpbnRvIHRoZSBsaXN0IG9mIGJlaGF2aW9ycwogICAgIGdhdGhlcl9hcnJheSgiYmVoYXZpb3JfaW5kZXgiKSAlPiUgI2dhdGhlciBpbnRvIG9uZSBiZWhhdmlvciBwZXIgcm93CiAgICAgYXBwZW5kX3ZhbHVlc19zdHJpbmcoImJlaGF2aW9yIikgCgojIG5vdGUgdGhhdCBiZWhhdmlvcnMgYXJlIGxpc3RlZCBpbiBhICJsb25nIiBmb3JtYXQsIGJ1dCB0aGlzIGlzIHByb2JhYmx5IHVud2llbGR5Lgp3aXRoX2Fuc3dlcnMgJT4lIGRhdGEuZnJhbWUgJT4lIGhlYWQKYGBgCgpMZXQncyBzcHJlYWQgb3V0IHRoZSBhbnN3ZXJzIGludG8gaW5kaXZpZHVhbCBjb2x1bW5zIHdpdGggMS8wIGluZGljYXRvcnMgZm9yIHdoZXRoZXIgb3Igbm90IHRoYXQgYmVoYXZpb3Igd2FzIGlkZW50aWZpZWQuCmBgYHtyfQojIHNwcmVhZCBhbnN3ZXJzIChpbnRvIHNlcGFyYXRlIGNvbHVtbnMpOiBoYXZlIHRvIGRyb3AgYmVoYXZpb3IgaW5kZXggb3IgZWxzZSB0aGUgcm93cyB3b24ndCBjb21iaW5lIQp3aXRoX2Fuc3dlcnNfc3ByZWFkIDwtIHdpdGhfYW5zd2VycyAlPiUgZGF0YS5mcmFtZSAlPiUgCiAgICAgc2VsZWN0KC4sIC1iZWhhdmlvcl9pbmRleCkgJT4lCiAgICAgbXV0YXRlKC4sIGJlaGF2aW9yX3ByZXNlbnQgPSAxKSAlPiUKICAgICBzcHJlYWQoLiwga2V5ID0gYmVoYXZpb3IsIHZhbHVlID0gYmVoYXZpb3JfcHJlc2VudCwgZmlsbCA9IDApCgp3aXRoX2Fuc3dlcnNfc3ByZWFkICU+JSBkYXRhLmZyYW1lICU+JSBoZWFkCndpdGhfYW5zd2Vyc19zcHJlYWQgJT4lIHN1bW1hcmlzZSguLCBuX2Rpc3RpbmN0KHN1YmplY3RfaWRzKSwgbl9kaXN0aW5jdChjbGFzc2lmaWNhdGlvbl9pZCkpCgpgYGAKCllvdSBjb3VsZCwgYWxzbywgaW4gdGhlb3J5LCBjcmVhdGUgYSBjb2x1bW4gdGhhdCBjb250YWlucyBhbiBhY3R1YWwgbGlzdCBvZiB0aGUgYmVoYXZpb3JzLiBOb3RlIHRoYXQgdGhlIHZhbHVlcyBsb29rIHNpbWlsYXIgdG8gaG93IHRpZHlqc29uIHdvdWxkIGRpc3BsYXkgdGhlbSwgYnV0IHRoZXkgYXJlIGFjdHVhbCBsaXN0cyBpbnN0ZWFkIG9mIGNoYXJhY3RlciBzdHJpbmdzIHRoYXQgc2F5ICJsaXN0KC4uLikiCmBgYHtyfQojIHNwcmVhZCBhbnN3ZXJzIChpbnRvIGEgbGlzdCkKdGVzdCA8LSB3aXRoX2Fuc3dlcnMgJT4lIGRhdGEuZnJhbWUgJT4lIAogICAgIHNlbGVjdCguLCAtYmVoYXZpb3JfaW5kZXgpICU+JSBuZXN0KGJlaGF2aW9yKQoKCndpdGhfYW5zd2VycyAlPiUgZGF0YS5mcmFtZSAlPiUgaGVhZApgYGAKCmBgYHtyfQojIGluIHRoZW9yeSwgeW91IHdhbnQgdG8gdGllIGFsbCBvZiB0aGVzZSBiYWNrIHRvZ2V0aGVyIGp1c3QgaW4gY2FzZSB0aGVyZSBhcmUgbWlzc2luZyB2YWx1ZXMKYWRkX2Nob2ljZXMgPC0gbGVmdF9qb2luKHN1cnZleSwgd2l0aF9jaG9pY2VzKQp0b3QgPC0gbGVmdF9qb2luKGFkZF9jaG9pY2VzLCB3aXRoX2Fuc3dlcnNfc3ByZWFkKQpmbGF0X2RhdGEgPC0gdG90ICU+JSBzZWxlY3QoLiwgLXRhc2tfaW5kZXgsIC10YXNrX2xhYmVsLCAtdmFsdWUpCgpmbGF0X2RhdGEgJT4lIGRhdGEuZnJhbWUgJT4lIGhlYWQKYGBgCgpgYGB7cn0KI2NoZWNrIHRoYXQgdGhlIG51bWJlciBvZiBkaXN0aW5jdCBzdWJqZWN0IElEcyBhbmQgY2xhc3NpZmljYXRpb24gSURzIGlzIHN0aWxsIHRoZSBzYW1lCmZsYXRfZGF0YSAlPiUgc3VtbWFyaXNlKC4sIG5fZGlzdGluY3Qoc3ViamVjdF9pZHMpLCBuX2Rpc3RpbmN0KGNsYXNzaWZpY2F0aW9uX2lkKSwgbigpKSAjZmxhdHRlbmVkLApqZGF0YSAlPiUgc3VtbWFyaXNlKC4sIG5fZGlzdGluY3Qoc3ViamVjdF9pZHMpLCBuX2Rpc3RpbmN0KGNsYXNzaWZpY2F0aW9uX2lkKSwgbigpKSAjb3JpZ2luYWwKCiNzYXZlIHlvdXIgZmlsZXMgZm9yIGFnZ3JlZ2F0aW9uIQp3cml0ZS5jc3YoZmxhdF9kYXRhLCBmaWxlID0gIi4uL2RhdGEvVDMtZmxhdHRlbmVkLmNzdiIpCndyaXRlLmNzdihxdWVzdGlvbiwgZmlsZSA9ICIuLi9kYXRhL1QyLWZsYXR0ZW5lZC5jc3YiKQoKYGBgCg==