This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
library(jsonlite)
library(tidyverse)
library(tidytext)
library(stringr)
library(scales)
library(tidyjson)
library(purrr)
library(lubridate)
library(broom)
library(plotly)
library(feather)
#This takes a while - no need to do unless wnating to update
#nyp <- fromJSON('https://raw.githubusercontent.com/nyphilarchive/PerformanceHistory/master/Programs/json/complete.json')
work_to_data_frame <- function(work) {
workID <- work['ID']
composer <- work['composerName']
title <- work['workTitle']
movement <- work['movement']
conductor <- work['conductorName']
soloist <- work['soloists']
return(c(workID = workID,
composer = composer,
title = title,
movement = movement,
conductor = conductor,
soloist = soloist))
}
expand_works <- function(record) {
if (is_empty(record)) {
works_db <- as.data.frame(cbind(workID = NA,
composer = NA,
title = NA,
movement = NA,
conductor = NA,
soloist = NA))
} else {
total <- length(record)
works_db <- t(sapply(record[1:total], work_to_data_frame))
colnames(works_db) <- c('workID',
'composer',
'title',
'movement',
'conductor',
'soloist')
}
return(works_db)
}
expand_program <- function(record_number) {
record <- nyp$programs[[record_number]]
total <- length(record)
program <- as.data.frame(cbind(id = record$id,
programID = record$programID,
orchestra = record$orchestra,
season = record$season,
eventType = record$concerts[[1]]$eventType,
location = record$concerts[[1]]$Location,
venue = record$concerts[[1]]$Venue,
date = record$concerts[[1]]$Date,
time = record$concerts[[1]]$Time))
works <- expand_works(record$works)
return(cbind(program, works))
}
# this takes a LOOOOOONG time so just nabbed finished file
# db <- data.frame()
# for (i in 1:13771) {
# db <- rbind(db, cbind(i, expand_program(i)))
# }
#
# write.csv(db, 'ny_phil_programs.csv')
# tidy_nyp <- db %>%
# as_tibble() %>%
# mutate(workID = as.character(workID),
# composer = as.character(composer),
# title = as.character(title),
# movement = as.character(movement),
# conductor = as.character(conductor),
# soloist = as.character(soloist))
#
# tidy_nyp %>%
# write.csv('ny_phil_programs.csv')
#db <- read_csv("data/ny_phil_programs.csv")
# db2 <- read.csv("data/ny_phil_programs.csv")
# db2$composer[929]
#
# # try feather
# library(feather)
#
# write_feather(db2,"data/ny_phil_programs.feather")
#df <- read_feather("data/ny_phil_programs.feather") fast but gave same errors
df <- read.csv("data/ny_phil_programs.csv")
Now have tidied code can slightly vary from original (could be issue as there are attributes) Issue with encoding most unknown some UTF-8
df %>%
ungroup() %>%
filter(!composer %in% c('NULL', 'Traditional,', 'Anthem,')) %>%
count(composer, sort=TRUE) %>%
filter(n > 400) %>% # was 400 but want to get rid of Chopin as invalid UTF-8
mutate(composer = reorder(composer, n)) %>%
plot_ly(x=~n, y=~composer, height = 1000) %>%
add_bars(color= ~as.character(composer), showlegend=FALSE) %>%
layout(margin=list(l=200),
xaxis=list(title="Number of Works performed"),
yaxis=list(title="")
)
n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
NA
df %>%
filter(!title %in% c('NULL')) %>%
mutate(composer_work = paste(composer, '-', title)) %>%
group_by(composer_work, programID) %>%
summarize(times_on_program = n()) %>%
count(composer_work, sort=TRUE) %>%
filter(n > 220) %>%
mutate(composer_work = reorder(composer_work, n)) %>%
plot_ly(x=~n, y=~composer_work, height= 800) %>%
add_bars(color= ~as.character(composer_work), showlegend=FALSE) %>%
layout(margin=list(l=500),
xaxis=list(title="Number of times performed"),
yaxis=list(title="")
)
n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
LS0tDQp0aXRsZTogIk5ZIG9yY2hlc3RyYSINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQoNClRoaXMgaXMgYW4gW1IgTWFya2Rvd25dKGh0dHA6Ly9ybWFya2Rvd24ucnN0dWRpby5jb20pIE5vdGVib29rLiBXaGVuIHlvdSBleGVjdXRlIGNvZGUgd2l0aGluIHRoZSBub3RlYm9vaywgdGhlIHJlc3VsdHMgYXBwZWFyIGJlbmVhdGggdGhlIGNvZGUuIA0KDQpUcnkgZXhlY3V0aW5nIHRoaXMgY2h1bmsgYnkgY2xpY2tpbmcgdGhlICpSdW4qIGJ1dHRvbiB3aXRoaW4gdGhlIGNodW5rIG9yIGJ5IHBsYWNpbmcgeW91ciBjdXJzb3IgaW5zaWRlIGl0IGFuZCBwcmVzc2luZyAqQ3RybCtTaGlmdCtFbnRlciouIA0KDQpgYGB7ciBzZXR1cCwgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0NCmxpYnJhcnkoanNvbmxpdGUpDQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCmxpYnJhcnkodGlkeXRleHQpDQpsaWJyYXJ5KHN0cmluZ3IpDQpsaWJyYXJ5KHNjYWxlcykNCmxpYnJhcnkodGlkeWpzb24pDQpsaWJyYXJ5KHB1cnJyKQ0KbGlicmFyeShsdWJyaWRhdGUpDQpsaWJyYXJ5KGJyb29tKQ0KbGlicmFyeShwbG90bHkpIA0KbGlicmFyeShmZWF0aGVyKQ0KDQojVGhpcyB0YWtlcyBhIHdoaWxlIC0gbm8gbmVlZCB0byBkbyB1bmxlc3MgIHduYXRpbmcgdG8gdXBkYXRlDQojbnlwIDwtIGZyb21KU09OKCdodHRwczovL3Jhdy5naXRodWJ1c2VyY29udGVudC5jb20vbnlwaGlsYXJjaGl2ZS9QZXJmb3JtYW5jZUhpc3RvcnkvbWFzdGVyL1Byb2dyYW1zL2pzb24vY29tcGxldGUuanNvbicpDQoNCndvcmtfdG9fZGF0YV9mcmFtZSA8LSBmdW5jdGlvbih3b3JrKSB7DQogIHdvcmtJRCA8LSB3b3JrWydJRCddDQogIGNvbXBvc2VyIDwtIHdvcmtbJ2NvbXBvc2VyTmFtZSddDQogIHRpdGxlIDwtIHdvcmtbJ3dvcmtUaXRsZSddDQogIG1vdmVtZW50IDwtIHdvcmtbJ21vdmVtZW50J10NCiAgY29uZHVjdG9yIDwtIHdvcmtbJ2NvbmR1Y3Rvck5hbWUnXQ0KICBzb2xvaXN0IDwtIHdvcmtbJ3NvbG9pc3RzJ10NCiAgcmV0dXJuKGMod29ya0lEID0gd29ya0lELA0KICAgICAgICAgICBjb21wb3NlciA9IGNvbXBvc2VyLA0KICAgICAgICAgICB0aXRsZSA9IHRpdGxlLA0KICAgICAgICAgICBtb3ZlbWVudCA9IG1vdmVtZW50LA0KICAgICAgICAgICBjb25kdWN0b3IgPSBjb25kdWN0b3IsDQogICAgICAgICAgIHNvbG9pc3QgPSBzb2xvaXN0KSkNCn0NCg0KZXhwYW5kX3dvcmtzIDwtIGZ1bmN0aW9uKHJlY29yZCkgew0KICBpZiAoaXNfZW1wdHkocmVjb3JkKSkgew0KICAgIHdvcmtzX2RiIDwtIGFzLmRhdGEuZnJhbWUoY2JpbmQod29ya0lEID0gTkEsIA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBjb21wb3NlciA9IE5BLCANCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgdGl0bGUgPSBOQSwgDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG1vdmVtZW50ID0gTkEsIA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBjb25kdWN0b3IgPSBOQSwgDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHNvbG9pc3QgPSBOQSkpDQogICAgfSBlbHNlIHsNCiAgICAgIHRvdGFsIDwtIGxlbmd0aChyZWNvcmQpDQogICAgICB3b3Jrc19kYiA8LSB0KHNhcHBseShyZWNvcmRbMTp0b3RhbF0sIHdvcmtfdG9fZGF0YV9mcmFtZSkpDQogICAgICBjb2xuYW1lcyh3b3Jrc19kYikgPC0gYygnd29ya0lEJywNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICdjb21wb3NlcicsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAndGl0bGUnLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgJ21vdmVtZW50JywNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICdjb25kdWN0b3InLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgJ3NvbG9pc3QnKQ0KICAgIH0NCiAgcmV0dXJuKHdvcmtzX2RiKQ0KfQ0KDQpleHBhbmRfcHJvZ3JhbSA8LSBmdW5jdGlvbihyZWNvcmRfbnVtYmVyKSB7DQogIHJlY29yZCA8LSBueXAkcHJvZ3JhbXNbW3JlY29yZF9udW1iZXJdXQ0KICB0b3RhbCA8LSBsZW5ndGgocmVjb3JkKQ0KICBwcm9ncmFtIDwtIGFzLmRhdGEuZnJhbWUoY2JpbmQoaWQgPSByZWNvcmQkaWQsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBwcm9ncmFtSUQgPSByZWNvcmQkcHJvZ3JhbUlELA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgb3JjaGVzdHJhID0gcmVjb3JkJG9yY2hlc3RyYSwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHNlYXNvbiA9IHJlY29yZCRzZWFzb24sDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBldmVudFR5cGUgPSByZWNvcmQkY29uY2VydHNbWzFdXSRldmVudFR5cGUsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBsb2NhdGlvbiA9IHJlY29yZCRjb25jZXJ0c1tbMV1dJExvY2F0aW9uLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgdmVudWUgPSByZWNvcmQkY29uY2VydHNbWzFdXSRWZW51ZSwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGRhdGUgPSByZWNvcmQkY29uY2VydHNbWzFdXSREYXRlLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgdGltZSA9IHJlY29yZCRjb25jZXJ0c1tbMV1dJFRpbWUpKQ0KICB3b3JrcyA8LSBleHBhbmRfd29ya3MocmVjb3JkJHdvcmtzKQ0KICByZXR1cm4oY2JpbmQocHJvZ3JhbSwgd29ya3MpKQ0KfQ0KYGBgDQoNCmBgYHtyfQ0KDQoNCiMgdGhpcyB0YWtlcyBhIExPT09PT09ORyB0aW1lIHNvIGp1c3QgbmFiYmVkIGZpbmlzaGVkIGZpbGUNCg0KIyBkYiA8LSBkYXRhLmZyYW1lKCkNCiMgZm9yIChpIGluIDE6MTM3NzEpIHsNCiMgICBkYiA8LSByYmluZChkYiwgY2JpbmQoaSwgZXhwYW5kX3Byb2dyYW0oaSkpKQ0KIyB9DQojIA0KIyB3cml0ZS5jc3YoZGIsICdueV9waGlsX3Byb2dyYW1zLmNzdicpDQojIHRpZHlfbnlwIDwtIGRiICU+JQ0KIyAgIGFzX3RpYmJsZSgpICU+JQ0KIyAgIG11dGF0ZSh3b3JrSUQgPSBhcy5jaGFyYWN0ZXIod29ya0lEKSwgDQojICAgICAgICAgIGNvbXBvc2VyID0gYXMuY2hhcmFjdGVyKGNvbXBvc2VyKSwgDQojICAgICAgICAgIHRpdGxlID0gYXMuY2hhcmFjdGVyKHRpdGxlKSwgDQojICAgICAgICAgIG1vdmVtZW50ID0gYXMuY2hhcmFjdGVyKG1vdmVtZW50KSwgDQojICAgICAgICAgIGNvbmR1Y3RvciA9IGFzLmNoYXJhY3Rlcihjb25kdWN0b3IpLA0KIyAgICAgICAgICBzb2xvaXN0ID0gYXMuY2hhcmFjdGVyKHNvbG9pc3QpKSANCiMgDQojIHRpZHlfbnlwICU+JQ0KIyAgIHdyaXRlLmNzdignbnlfcGhpbF9wcm9ncmFtcy5jc3YnKQ0KI2RiIDwtIHJlYWRfY3N2KCJkYXRhL255X3BoaWxfcHJvZ3JhbXMuY3N2IikNCiMgZGIyIDwtIHJlYWQuY3N2KCJkYXRhL255X3BoaWxfcHJvZ3JhbXMuY3N2IikNCiMgZGIyJGNvbXBvc2VyWzkyOV0NCiMgDQojICMgdHJ5IGZlYXRoZXINCiMgbGlicmFyeShmZWF0aGVyKQ0KIyANCiMgd3JpdGVfZmVhdGhlcihkYjIsImRhdGEvbnlfcGhpbF9wcm9ncmFtcy5mZWF0aGVyIikNCiNkZiA8LSByZWFkX2ZlYXRoZXIoImRhdGEvbnlfcGhpbF9wcm9ncmFtcy5mZWF0aGVyIikgZmFzdCBidXQgZ2F2ZSBzYW1lIGVycm9ycw0KZGYgPC0gcmVhZC5jc3YoImRhdGEvbnlfcGhpbF9wcm9ncmFtcy5jc3YiKQ0KDQpgYGANCg0KTm93IGhhdmUgdGlkaWVkIGNvZGUgY2FuIHNsaWdodGx5IHZhcnkgZnJvbSBvcmlnaW5hbCAoY291bGQgYmUgaXNzdWUgYXMgdGhlcmUgYXJlIGF0dHJpYnV0ZXMpDQpJc3N1ZSB3aXRoIGVuY29kaW5nICBtb3N0IHVua25vd24gc29tZSBVVEYtOA0KDQpgYGB7cn0NCg0KDQpkZiAlPiUgIA0KICB1bmdyb3VwKCkgJT4lIA0KICBmaWx0ZXIoIWNvbXBvc2VyICVpbiUgYygnTlVMTCcsICdUcmFkaXRpb25hbCwnLCAnQW50aGVtLCcpKSAlPiUgIA0KICBjb3VudChjb21wb3Nlciwgc29ydD1UUlVFKSAlPiUgIA0KICBmaWx0ZXIobiA+IDQwMCkgJT4lICAjIHdhcyA0MDAgYnV0IHdhbnQgdG8gZ2V0IHJpZCBvZiBDaG9waW4gYXMgaW52YWxpZCBVVEYtOA0KICBtdXRhdGUoY29tcG9zZXIgPSByZW9yZGVyKGNvbXBvc2VyLCBuKSkgJT4lIA0KICBwbG90X2x5KHg9fm4sIHk9fmNvbXBvc2VyLCBoZWlnaHQgPSAxMDAwKSAlPiUgDQogIGFkZF9iYXJzKGNvbG9yPSB+YXMuY2hhcmFjdGVyKGNvbXBvc2VyKSwgc2hvd2xlZ2VuZD1GQUxTRSkgJT4lIA0KICBsYXlvdXQobWFyZ2luPWxpc3QobD0yMDApLA0KICAgICAgICAgeGF4aXM9bGlzdCh0aXRsZT0iTnVtYmVyIG9mIFdvcmtzIHBlcmZvcm1lZCIpLA0KICAgICAgICAgeWF4aXM9bGlzdCh0aXRsZT0iIikNCiAgKQ0KIA0KDQpgYGANCg0KYGBge3J9DQpkZiAlPiUgIA0KICBmaWx0ZXIoIXRpdGxlICVpbiUgYygnTlVMTCcpKSAlPiUgIA0KICBtdXRhdGUoY29tcG9zZXJfd29yayA9IHBhc3RlKGNvbXBvc2VyLCAnLScsIHRpdGxlKSkgJT4lICANCiAgZ3JvdXBfYnkoY29tcG9zZXJfd29yaywgcHJvZ3JhbUlEKSAlPiUgIA0KICBzdW1tYXJpemUodGltZXNfb25fcHJvZ3JhbSA9IG4oKSkgJT4lICANCiAgY291bnQoY29tcG9zZXJfd29yaywgc29ydD1UUlVFKSAlPiUgIA0KICBmaWx0ZXIobiA+IDIyMCkgJT4lICANCiAgbXV0YXRlKGNvbXBvc2VyX3dvcmsgPSByZW9yZGVyKGNvbXBvc2VyX3dvcmssIG4pKSAlPiUgDQogIHBsb3RfbHkoeD1+biwgeT1+Y29tcG9zZXJfd29yaywgaGVpZ2h0PSA4MDApICU+JSANCiAgYWRkX2JhcnMoY29sb3I9IH5hcy5jaGFyYWN0ZXIoY29tcG9zZXJfd29yayksIHNob3dsZWdlbmQ9RkFMU0UpICU+JSANCiAgbGF5b3V0KG1hcmdpbj1saXN0KGw9NTAwKSwNCiAgICAgICAgIHhheGlzPWxpc3QodGl0bGU9Ik51bWJlciBvZiB0aW1lcyBwZXJmb3JtZWQiKSwNCiAgICAgICAgIHlheGlzPWxpc3QodGl0bGU9IiIpDQogICkNCmBgYA==