#Loading data setsdata("nyc_regents_scores") data("greenhouse_gases")data("us_contagious_diseases")
nyc_regents_scores <- nyc_regents_scores %>%mutate(score =ifelse(is.na(score), 101, score)) #I am adding another row to the data set to consider the next set of values in the calculation as its score is NAscores_clean <- nyc_regents_scores |>filter(!is.na(integrated_algebra) &!is.na(global_history) &!is.na(living_environment) &!is.na(english) &!is.na(us_history)) #Removing all NAs from the columnsscores_clean <- scores_clean %>%mutate(score =as.character(score)) #Making the score coloumn a character variable"scores_selected <- scores_clean %>%select(-1)#adding scores coloumnscores_selected$max <-apply(scores_selected, 1, max, na.rm=TRUE)#adding a maximum coloumn for the highest score out of the 5 subjectsscores_selected$scores <-c("Student 1", "Student 2", "Student 3", "Student 4", "Student 5", "Student 6", "Student 7", "Student 8", "Student 9", "Student 10", "Student 11", "Student 12", "Student 13", "Student 14", "Student 15", "Student 16", "Student 17", "Student 18", "Student 19", "Student 20", "Student 21", "Student 22", "Student 23", "Student 24", "Student 25", "Student 26", "Student 27", "Student 28", "Student 29", "Student 30", "Student 31", "Student 32", "Student 33", "Student 34", "Student 35", "Student 36", "Student 37", "Student 38", "Student 39", "Student 40", "Student 41", "Student 42", "Student 43", "Student 44", "Student 45", "Student 46", "Student 47", "Student 48", "Student 49", "Student 50", "Student 51", "Student 52", "Student 53", "Student 54", "Student 55", "Student 56", "Student 57", "Student 58", "Student 59", "Student 60", "Student 61", "Student 62", "Student 63", "Student 64", "Student 65", "Student 66", "Student 67", "Student 68", "Student 69", "Student 70", "Student 71", "Student 72", "Student 73", "Student 74", "Student 75", "Student 76", "Student 77", "Student 78", "Student 79", "Student 80", "Student 81", "Student 82", "Student 83", "Student 84", "Student 85", "Student 86", "Student 87", "Student 88", "Student 89", "Student 90", "Student 91", "Student 92", "Student 93", "Student 94", "Student 95", "Student 96", "Student 97") #adding each studentscore_final<- scores_selected |>mutate(Average = (integrated_algebra + global_history + living_environment + english + us_history) /5) #finding average to compare to hghest subject
cols <-brewer.pal(4, "Set1") #setting color
highchart() |># Setting graph for x = Average, y = max, and grouping them according them to the highest subhc_add_series(data = score_final,type ="line",hcaes(x = Average,y = max,group = max)) |>hc_colors(cols) |># color of pointshc_xAxis(title =list(text ="Average of all 5 subjects")) |>#label x axishc_yAxis(title =list(text ="Highest Subject")) |>#label y axishc_legend(enabled =FALSE) |>#no legendhc_plotOptions(series =list(marker =list(symbol ="circle"))) # setting point as dot
Concentration of Greenhouse Gases Over 200 Years
highchart() |>hc_add_series(data = greenhouse_gases,type ="line",hcaes(x = year,y = concentration,group = gas)) |>#setting axis using highchart for green house gaseshc_colors(cols) |>#adjusting the colors hc_xAxis(title =list(text ="Year")) |>#labelling y axishc_yAxis(title =list(text ="Concentration (ppm)")) |>#labelling x axishc_legend(verticalAlign ="right") |>#adjusting position of legendhc_plotOptions(series =list(marker =list(symbol ="circle"))) #circle dot
# set color palettecols <-brewer.pal(4,"Set2")#setting title for y axis 1highchart() |>hc_yAxis_multiples(list(title =list(text ="Year)")),list(title =list(text ="count"),opposite =TRUE))|>#setting tile for y axis 2hc_add_series(data = us_contagious_diseases$year,name ="Year",type ="line",yAxis =0) |>hc_add_series(data = us_contagious_diseases$count,name ="Case Count",type ="line",yAxis =1) |>hc_xAxis(categories = us_contagious_diseases$disease, #setting x axistickInterval =5) |>hc_colors(cols) |>hc_legend() |>#legend uploadinghc_chart(style =list(fontFamily ="Arial"))#changing font for labels
The rendered markdown document showcases visualizations created using Highcharter for three distinct datasets: NYC Regents scores, greenhouse gases concentration over 200 years, and contagious disease data for US states.
For the NYC Regents scores dataset, I preprocessed the data to calculate the maximum score for each student across multiple subjects. Then, I utilized Highcharter to generate a line chart illustrating the relationship between the average of all subjects and the highest score achieved by each student. This visualization helps in understanding the performance distribution among students.
The greenhouse gases dataset provided a historical perspective on the concentration of greenhouse gases over 200 years. Using Highcharter, I crafted a line chart where the x-axis represents the year, the y-axis represents the concentration of greenhouse gases, and different colors distinguish between different types of gases. This visualization aids in observing long-term trends in greenhouse gas levels.
Lastly, the dataset on contagious diseases in US states enabled the creation of a dual-axis chart using Highcharter. This chart juxtaposes the number of cases of different diseases against the years, with each disease represented by a different line color. The chart effectively illustrates the patterns in disease outbreaks across various states.