The following dataset contains pitch class information from MIDI data of Bach chorales. The dataset and relevent information can be found here: https://archive.ics.uci.edu/ml/datasets/Bach+Choral+Harmony
The dataset contains 17 variables:
library(curl)
conn = curl("https://raw.githubusercontent.com/bsnacks000/IS607-DataAq/master/Data/bach_data.csv")
bach_data = read.csv(conn,header = FALSE)
# data set info
str(bach_data)
## 'data.frame': 5665 obs. of 17 variables:
## $ V1 : Factor w/ 62 levels "000106b_","000206b_",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ V2 : int 1 2 3 4 5 6 7 8 9 10 ...
## $ V3 : Factor w/ 2 levels " NO","YES": 2 2 2 2 2 1 1 2 2 1 ...
## $ V4 : Factor w/ 2 levels " NO","YES": 1 1 1 1 1 1 1 1 1 1 ...
## $ V5 : Factor w/ 2 levels " NO","YES": 1 1 1 1 1 2 2 1 1 2 ...
## $ V6 : Factor w/ 2 levels " NO","YES": 1 1 1 1 1 1 1 1 1 1 ...
## $ V7 : Factor w/ 2 levels " NO","YES": 1 2 2 1 1 1 1 1 1 1 ...
## $ V8 : Factor w/ 2 levels " NO","YES": 2 1 1 2 2 2 2 2 2 2 ...
## $ V9 : Factor w/ 2 levels " NO","YES": 1 1 1 1 1 1 1 1 1 1 ...
## $ V10: Factor w/ 2 levels " NO","YES": 1 2 2 1 1 1 1 1 1 1 ...
## $ V11: Factor w/ 2 levels " NO","YES": 1 1 1 1 1 1 1 1 1 1 ...
## $ V12: Factor w/ 2 levels " NO","YES": 2 1 1 2 2 2 2 2 2 1 ...
## $ V13: Factor w/ 2 levels " NO","YES": 1 1 1 1 1 1 1 1 1 2 ...
## $ V14: Factor w/ 2 levels " NO","YES": 1 1 1 1 1 1 1 1 1 1 ...
## $ V15: Factor w/ 16 levels "A","A#","Ab",..: 13 11 11 13 13 8 8 1 1 5 ...
## $ V16: int 3 5 2 3 2 4 2 3 2 5 ...
## $ V17: Factor w/ 102 levels " Abd"," Abm",..: 80 35 35 80 80 55 55 80 80 17 ...
head(bach_data)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17
## 1 000106b_ 1 YES NO NO NO NO YES NO NO NO YES NO NO F 3 F_M
## 2 000106b_ 2 YES NO NO NO YES NO NO YES NO NO NO NO E 5 C_M
## 3 000106b_ 3 YES NO NO NO YES NO NO YES NO NO NO NO E 2 C_M
## 4 000106b_ 4 YES NO NO NO NO YES NO NO NO YES NO NO F 3 F_M
## 5 000106b_ 5 YES NO NO NO NO YES NO NO NO YES NO NO F 2 F_M
## 6 000106b_ 6 NO NO YES NO NO YES NO NO NO YES NO NO D 4 D_m
# rename all columns
new_col_names = c('chorale_id','event_no.', 'C', 'C#/Db',
'D','D#/Eb','E','F','F#/Gb','G','G#/Ab',
'A','A#/Bb','B', 'Bass note', 'Meter_accent', 'Chord_name')
names(bach_data) = new_col_names
#overview of new dataset
head(bach_data,10)
## chorale_id event_no. C C#/Db D D#/Eb E F F#/Gb G G#/Ab A
## 1 000106b_ 1 YES NO NO NO NO YES NO NO NO YES
## 2 000106b_ 2 YES NO NO NO YES NO NO YES NO NO
## 3 000106b_ 3 YES NO NO NO YES NO NO YES NO NO
## 4 000106b_ 4 YES NO NO NO NO YES NO NO NO YES
## 5 000106b_ 5 YES NO NO NO NO YES NO NO NO YES
## 6 000106b_ 6 NO NO YES NO NO YES NO NO NO YES
## 7 000106b_ 7 NO NO YES NO NO YES NO NO NO YES
## 8 000106b_ 8 YES NO NO NO NO YES NO NO NO YES
## 9 000106b_ 9 YES NO NO NO NO YES NO NO NO YES
## 10 000106b_ 10 NO NO YES NO NO YES NO NO NO NO
## A#/Bb B Bass note Meter_accent Chord_name
## 1 NO NO F 3 F_M
## 2 NO NO E 5 C_M
## 3 NO NO E 2 C_M
## 4 NO NO F 3 F_M
## 5 NO NO F 2 F_M
## 6 NO NO D 4 D_m
## 7 NO NO D 2 D_m
## 8 NO NO A 3 F_M
## 9 NO NO A 2 F_M
## 10 YES NO Bb 5 BbM
For a study of harmony, individual pitch class data might be less useful. We subset the data to make a new dataset that excludes that information. We also subset rows by chorale id in order to look at two individual chorales, 106b and 206b.
col_subset = c('chorale_id', 'event_no.', 'Bass note', 'Meter_accent', 'Chord_name')
bach_106_206 = bach_data[bach_data$chorale_id == '000106b_'| bach_data$chorale_id == '000206b_', col_subset]
head(bach_106_206)
## chorale_id event_no. Bass note Meter_accent Chord_name
## 1 000106b_ 1 F 3 F_M
## 2 000106b_ 2 E 5 C_M
## 3 000106b_ 3 E 2 C_M
## 4 000106b_ 4 F 3 F_M
## 5 000106b_ 5 F 2 F_M
## 6 000106b_ 6 D 4 D_m
tail(bach_106_206)
## chorale_id event_no. Bass note Meter_accent Chord_name
## 239 000206b_ 77 G 3 G_m
## 240 000206b_ 78 A 4 A_M
## 241 000206b_ 79 A 2 D_m
## 242 000206b_ 80 A 3 A_M4
## 243 000206b_ 81 A 2 A_M
## 244 000206b_ 82 D 5 D_M