This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

library(jsonlite)
library(tidyverse)
library(tidytext)
library(stringr)
library(scales)
library(tidyjson)
library(purrr)
library(lubridate)
library(broom)
library(plotly) 
library(feather)
#This takes a while - no need to do unless  wnating to update
#nyp <- fromJSON('https://raw.githubusercontent.com/nyphilarchive/PerformanceHistory/master/Programs/json/complete.json')
work_to_data_frame <- function(work) {
  workID <- work['ID']
  composer <- work['composerName']
  title <- work['workTitle']
  movement <- work['movement']
  conductor <- work['conductorName']
  soloist <- work['soloists']
  return(c(workID = workID,
           composer = composer,
           title = title,
           movement = movement,
           conductor = conductor,
           soloist = soloist))
}
expand_works <- function(record) {
  if (is_empty(record)) {
    works_db <- as.data.frame(cbind(workID = NA, 
                                composer = NA, 
                                title = NA, 
                                movement = NA, 
                                conductor = NA, 
                                soloist = NA))
    } else {
      total <- length(record)
      works_db <- t(sapply(record[1:total], work_to_data_frame))
      colnames(works_db) <- c('workID',
                              'composer',
                              'title',
                              'movement',
                              'conductor',
                              'soloist')
    }
  return(works_db)
}
expand_program <- function(record_number) {
  record <- nyp$programs[[record_number]]
  total <- length(record)
  program <- as.data.frame(cbind(id = record$id,
                                 programID = record$programID,
                                 orchestra = record$orchestra,
                                 season = record$season,
                                 eventType = record$concerts[[1]]$eventType,
                                 location = record$concerts[[1]]$Location,
                                 venue = record$concerts[[1]]$Venue,
                                 date = record$concerts[[1]]$Date,
                                 time = record$concerts[[1]]$Time))
  works <- expand_works(record$works)
  return(cbind(program, works))
}
# this takes a LOOOOOONG time so just nabbed finished file
# db <- data.frame()
# for (i in 1:13771) {
#   db <- rbind(db, cbind(i, expand_program(i)))
# }
# 
# write.csv(db, 'ny_phil_programs.csv')
# tidy_nyp <- db %>%
#   as_tibble() %>%
#   mutate(workID = as.character(workID), 
#          composer = as.character(composer), 
#          title = as.character(title), 
#          movement = as.character(movement), 
#          conductor = as.character(conductor),
#          soloist = as.character(soloist)) 
# 
# tidy_nyp %>%
#   write.csv('ny_phil_programs.csv')
#db <- read_csv("data/ny_phil_programs.csv")
# db2 <- read.csv("data/ny_phil_programs.csv")
# db2$composer[929]
# 
# # try feather
# library(feather)
# 
# write_feather(db2,"data/ny_phil_programs.feather")
#df <- read_feather("data/ny_phil_programs.feather") fast but gave same errors
df <- read.csv("data/ny_phil_programs.csv")

Now have tidied code can slightly vary from original (could be issue as there are attributes) Issue with encoding most unknown some UTF-8

df %>%  
  ungroup() %>% 
  filter(!composer %in% c('NULL', 'Traditional,', 'Anthem,')) %>%  
  count(composer, sort=TRUE) %>%  
  filter(n > 400) %>%  # was 400 but want to get rid of Chopin as invalid UTF-8
  mutate(composer = reorder(composer, n)) %>% 
  plot_ly(x=~n, y=~composer, height = 1000) %>% 
  add_bars(color= ~as.character(composer), showlegend=FALSE) %>% 
  layout(margin=list(l=200),
         xaxis=list(title="Number of Works performed"),
         yaxis=list(title="")
  )
n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors

NA
df %>%  
  filter(!title %in% c('NULL')) %>%  
  mutate(composer_work = paste(composer, '-', title)) %>%  
  group_by(composer_work, programID) %>%  
  summarize(times_on_program = n()) %>%  
  count(composer_work, sort=TRUE) %>%  
  filter(n > 220) %>%  
  mutate(composer_work = reorder(composer_work, n)) %>% 
  plot_ly(x=~n, y=~composer_work, height= 800) %>% 
  add_bars(color= ~as.character(composer_work), showlegend=FALSE) %>% 
  layout(margin=list(l=500),
         xaxis=list(title="Number of times performed"),
         yaxis=list(title="")
  )
n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
LS0tDQp0aXRsZTogIk5ZIG9yY2hlc3RyYSINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQoNClRoaXMgaXMgYW4gW1IgTWFya2Rvd25dKGh0dHA6Ly9ybWFya2Rvd24ucnN0dWRpby5jb20pIE5vdGVib29rLiBXaGVuIHlvdSBleGVjdXRlIGNvZGUgd2l0aGluIHRoZSBub3RlYm9vaywgdGhlIHJlc3VsdHMgYXBwZWFyIGJlbmVhdGggdGhlIGNvZGUuIA0KDQpUcnkgZXhlY3V0aW5nIHRoaXMgY2h1bmsgYnkgY2xpY2tpbmcgdGhlICpSdW4qIGJ1dHRvbiB3aXRoaW4gdGhlIGNodW5rIG9yIGJ5IHBsYWNpbmcgeW91ciBjdXJzb3IgaW5zaWRlIGl0IGFuZCBwcmVzc2luZyAqQ3RybCtTaGlmdCtFbnRlciouIA0KDQpgYGB7ciBzZXR1cCwgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0NCmxpYnJhcnkoanNvbmxpdGUpDQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCmxpYnJhcnkodGlkeXRleHQpDQpsaWJyYXJ5KHN0cmluZ3IpDQpsaWJyYXJ5KHNjYWxlcykNCmxpYnJhcnkodGlkeWpzb24pDQpsaWJyYXJ5KHB1cnJyKQ0KbGlicmFyeShsdWJyaWRhdGUpDQpsaWJyYXJ5KGJyb29tKQ0KbGlicmFyeShwbG90bHkpIA0KbGlicmFyeShmZWF0aGVyKQ0KDQojVGhpcyB0YWtlcyBhIHdoaWxlIC0gbm8gbmVlZCB0byBkbyB1bmxlc3MgIHduYXRpbmcgdG8gdXBkYXRlDQojbnlwIDwtIGZyb21KU09OKCdodHRwczovL3Jhdy5naXRodWJ1c2VyY29udGVudC5jb20vbnlwaGlsYXJjaGl2ZS9QZXJmb3JtYW5jZUhpc3RvcnkvbWFzdGVyL1Byb2dyYW1zL2pzb24vY29tcGxldGUuanNvbicpDQoNCndvcmtfdG9fZGF0YV9mcmFtZSA8LSBmdW5jdGlvbih3b3JrKSB7DQogIHdvcmtJRCA8LSB3b3JrWydJRCddDQogIGNvbXBvc2VyIDwtIHdvcmtbJ2NvbXBvc2VyTmFtZSddDQogIHRpdGxlIDwtIHdvcmtbJ3dvcmtUaXRsZSddDQogIG1vdmVtZW50IDwtIHdvcmtbJ21vdmVtZW50J10NCiAgY29uZHVjdG9yIDwtIHdvcmtbJ2NvbmR1Y3Rvck5hbWUnXQ0KICBzb2xvaXN0IDwtIHdvcmtbJ3NvbG9pc3RzJ10NCiAgcmV0dXJuKGMod29ya0lEID0gd29ya0lELA0KICAgICAgICAgICBjb21wb3NlciA9IGNvbXBvc2VyLA0KICAgICAgICAgICB0aXRsZSA9IHRpdGxlLA0KICAgICAgICAgICBtb3ZlbWVudCA9IG1vdmVtZW50LA0KICAgICAgICAgICBjb25kdWN0b3IgPSBjb25kdWN0b3IsDQogICAgICAgICAgIHNvbG9pc3QgPSBzb2xvaXN0KSkNCn0NCg0KZXhwYW5kX3dvcmtzIDwtIGZ1bmN0aW9uKHJlY29yZCkgew0KICBpZiAoaXNfZW1wdHkocmVjb3JkKSkgew0KICAgIHdvcmtzX2RiIDwtIGFzLmRhdGEuZnJhbWUoY2JpbmQod29ya0lEID0gTkEsIA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBjb21wb3NlciA9IE5BLCANCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgdGl0bGUgPSBOQSwgDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG1vdmVtZW50ID0gTkEsIA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBjb25kdWN0b3IgPSBOQSwgDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHNvbG9pc3QgPSBOQSkpDQogICAgfSBlbHNlIHsNCiAgICAgIHRvdGFsIDwtIGxlbmd0aChyZWNvcmQpDQogICAgICB3b3Jrc19kYiA8LSB0KHNhcHBseShyZWNvcmRbMTp0b3RhbF0sIHdvcmtfdG9fZGF0YV9mcmFtZSkpDQogICAgICBjb2xuYW1lcyh3b3Jrc19kYikgPC0gYygnd29ya0lEJywNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICdjb21wb3NlcicsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAndGl0bGUnLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgJ21vdmVtZW50JywNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICdjb25kdWN0b3InLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgJ3NvbG9pc3QnKQ0KICAgIH0NCiAgcmV0dXJuKHdvcmtzX2RiKQ0KfQ0KDQpleHBhbmRfcHJvZ3JhbSA8LSBmdW5jdGlvbihyZWNvcmRfbnVtYmVyKSB7DQogIHJlY29yZCA8LSBueXAkcHJvZ3JhbXNbW3JlY29yZF9udW1iZXJdXQ0KICB0b3RhbCA8LSBsZW5ndGgocmVjb3JkKQ0KICBwcm9ncmFtIDwtIGFzLmRhdGEuZnJhbWUoY2JpbmQoaWQgPSByZWNvcmQkaWQsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBwcm9ncmFtSUQgPSByZWNvcmQkcHJvZ3JhbUlELA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgb3JjaGVzdHJhID0gcmVjb3JkJG9yY2hlc3RyYSwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHNlYXNvbiA9IHJlY29yZCRzZWFzb24sDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBldmVudFR5cGUgPSByZWNvcmQkY29uY2VydHNbWzFdXSRldmVudFR5cGUsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBsb2NhdGlvbiA9IHJlY29yZCRjb25jZXJ0c1tbMV1dJExvY2F0aW9uLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgdmVudWUgPSByZWNvcmQkY29uY2VydHNbWzFdXSRWZW51ZSwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGRhdGUgPSByZWNvcmQkY29uY2VydHNbWzFdXSREYXRlLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgdGltZSA9IHJlY29yZCRjb25jZXJ0c1tbMV1dJFRpbWUpKQ0KICB3b3JrcyA8LSBleHBhbmRfd29ya3MocmVjb3JkJHdvcmtzKQ0KICByZXR1cm4oY2JpbmQocHJvZ3JhbSwgd29ya3MpKQ0KfQ0KYGBgDQoNCmBgYHtyfQ0KDQoNCiMgdGhpcyB0YWtlcyBhIExPT09PT09ORyB0aW1lIHNvIGp1c3QgbmFiYmVkIGZpbmlzaGVkIGZpbGUNCg0KIyBkYiA8LSBkYXRhLmZyYW1lKCkNCiMgZm9yIChpIGluIDE6MTM3NzEpIHsNCiMgICBkYiA8LSByYmluZChkYiwgY2JpbmQoaSwgZXhwYW5kX3Byb2dyYW0oaSkpKQ0KIyB9DQojIA0KIyB3cml0ZS5jc3YoZGIsICdueV9waGlsX3Byb2dyYW1zLmNzdicpDQojIHRpZHlfbnlwIDwtIGRiICU+JQ0KIyAgIGFzX3RpYmJsZSgpICU+JQ0KIyAgIG11dGF0ZSh3b3JrSUQgPSBhcy5jaGFyYWN0ZXIod29ya0lEKSwgDQojICAgICAgICAgIGNvbXBvc2VyID0gYXMuY2hhcmFjdGVyKGNvbXBvc2VyKSwgDQojICAgICAgICAgIHRpdGxlID0gYXMuY2hhcmFjdGVyKHRpdGxlKSwgDQojICAgICAgICAgIG1vdmVtZW50ID0gYXMuY2hhcmFjdGVyKG1vdmVtZW50KSwgDQojICAgICAgICAgIGNvbmR1Y3RvciA9IGFzLmNoYXJhY3Rlcihjb25kdWN0b3IpLA0KIyAgICAgICAgICBzb2xvaXN0ID0gYXMuY2hhcmFjdGVyKHNvbG9pc3QpKSANCiMgDQojIHRpZHlfbnlwICU+JQ0KIyAgIHdyaXRlLmNzdignbnlfcGhpbF9wcm9ncmFtcy5jc3YnKQ0KI2RiIDwtIHJlYWRfY3N2KCJkYXRhL255X3BoaWxfcHJvZ3JhbXMuY3N2IikNCiMgZGIyIDwtIHJlYWQuY3N2KCJkYXRhL255X3BoaWxfcHJvZ3JhbXMuY3N2IikNCiMgZGIyJGNvbXBvc2VyWzkyOV0NCiMgDQojICMgdHJ5IGZlYXRoZXINCiMgbGlicmFyeShmZWF0aGVyKQ0KIyANCiMgd3JpdGVfZmVhdGhlcihkYjIsImRhdGEvbnlfcGhpbF9wcm9ncmFtcy5mZWF0aGVyIikNCiNkZiA8LSByZWFkX2ZlYXRoZXIoImRhdGEvbnlfcGhpbF9wcm9ncmFtcy5mZWF0aGVyIikgZmFzdCBidXQgZ2F2ZSBzYW1lIGVycm9ycw0KZGYgPC0gcmVhZC5jc3YoImRhdGEvbnlfcGhpbF9wcm9ncmFtcy5jc3YiKQ0KDQpgYGANCg0KTm93IGhhdmUgdGlkaWVkIGNvZGUgY2FuIHNsaWdodGx5IHZhcnkgZnJvbSBvcmlnaW5hbCAoY291bGQgYmUgaXNzdWUgYXMgdGhlcmUgYXJlIGF0dHJpYnV0ZXMpDQpJc3N1ZSB3aXRoIGVuY29kaW5nICBtb3N0IHVua25vd24gc29tZSBVVEYtOA0KDQpgYGB7cn0NCg0KDQpkZiAlPiUgIA0KICB1bmdyb3VwKCkgJT4lIA0KICBmaWx0ZXIoIWNvbXBvc2VyICVpbiUgYygnTlVMTCcsICdUcmFkaXRpb25hbCwnLCAnQW50aGVtLCcpKSAlPiUgIA0KICBjb3VudChjb21wb3Nlciwgc29ydD1UUlVFKSAlPiUgIA0KICBmaWx0ZXIobiA+IDQwMCkgJT4lICAjIHdhcyA0MDAgYnV0IHdhbnQgdG8gZ2V0IHJpZCBvZiBDaG9waW4gYXMgaW52YWxpZCBVVEYtOA0KICBtdXRhdGUoY29tcG9zZXIgPSByZW9yZGVyKGNvbXBvc2VyLCBuKSkgJT4lIA0KICBwbG90X2x5KHg9fm4sIHk9fmNvbXBvc2VyLCBoZWlnaHQgPSAxMDAwKSAlPiUgDQogIGFkZF9iYXJzKGNvbG9yPSB+YXMuY2hhcmFjdGVyKGNvbXBvc2VyKSwgc2hvd2xlZ2VuZD1GQUxTRSkgJT4lIA0KICBsYXlvdXQobWFyZ2luPWxpc3QobD0yMDApLA0KICAgICAgICAgeGF4aXM9bGlzdCh0aXRsZT0iTnVtYmVyIG9mIFdvcmtzIHBlcmZvcm1lZCIpLA0KICAgICAgICAgeWF4aXM9bGlzdCh0aXRsZT0iIikNCiAgKQ0KIA0KDQpgYGANCg0KYGBge3J9DQpkZiAlPiUgIA0KICBmaWx0ZXIoIXRpdGxlICVpbiUgYygnTlVMTCcpKSAlPiUgIA0KICBtdXRhdGUoY29tcG9zZXJfd29yayA9IHBhc3RlKGNvbXBvc2VyLCAnLScsIHRpdGxlKSkgJT4lICANCiAgZ3JvdXBfYnkoY29tcG9zZXJfd29yaywgcHJvZ3JhbUlEKSAlPiUgIA0KICBzdW1tYXJpemUodGltZXNfb25fcHJvZ3JhbSA9IG4oKSkgJT4lICANCiAgY291bnQoY29tcG9zZXJfd29yaywgc29ydD1UUlVFKSAlPiUgIA0KICBmaWx0ZXIobiA+IDIyMCkgJT4lICANCiAgbXV0YXRlKGNvbXBvc2VyX3dvcmsgPSByZW9yZGVyKGNvbXBvc2VyX3dvcmssIG4pKSAlPiUgDQogIHBsb3RfbHkoeD1+biwgeT1+Y29tcG9zZXJfd29yaywgaGVpZ2h0PSA4MDApICU+JSANCiAgYWRkX2JhcnMoY29sb3I9IH5hcy5jaGFyYWN0ZXIoY29tcG9zZXJfd29yayksIHNob3dsZWdlbmQ9RkFMU0UpICU+JSANCiAgbGF5b3V0KG1hcmdpbj1saXN0KGw9NTAwKSwNCiAgICAgICAgIHhheGlzPWxpc3QodGl0bGU9Ik51bWJlciBvZiB0aW1lcyBwZXJmb3JtZWQiKSwNCiAgICAgICAgIHlheGlzPWxpc3QodGl0bGU9IiIpDQogICkNCmBgYA==