dslabs

Author

zhageman

NYC Regents Scores

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library("dslabs")
data(package="dslabs")
list.files(system.file("script", package = "dslabs"))
 [1] "make-admissions.R"                   
 [2] "make-brca.R"                         
 [3] "make-brexit_polls.R"                 
 [4] "make-calificaciones.R"               
 [5] "make-death_prob.R"                   
 [6] "make-divorce_margarine.R"            
 [7] "make-gapminder-rdas.R"               
 [8] "make-greenhouse_gases.R"             
 [9] "make-historic_co2.R"                 
[10] "make-mice_weights.R"                 
[11] "make-mnist_127.R"                    
[12] "make-mnist_27.R"                     
[13] "make-movielens.R"                    
[14] "make-murders-rda.R"                  
[15] "make-na_example-rda.R"               
[16] "make-nyc_regents_scores.R"           
[17] "make-olive.R"                        
[18] "make-outlier_example.R"              
[19] "make-polls_2008.R"                   
[20] "make-polls_us_election_2016.R"       
[21] "make-pr_death_counts.R"              
[22] "make-reported_heights-rda.R"         
[23] "make-research_funding_rates.R"       
[24] "make-stars.R"                        
[25] "make-temp_carbon.R"                  
[26] "make-tissue-gene-expression.R"       
[27] "make-trump_tweets.R"                 
[28] "make-weekly_us_contagious_diseases.R"
[29] "save-gapminder-example-csv.R"        
data(nyc_regents_scores) #pulling out the dataset chosen
nyc_regents_scores <- nyc_regents_scores %>%
  filter(!is.na(english) & !is.na(us_history))


p1 <-  nyc_regents_scores |>
  ggplot(aes(x = english, y = us_history, color = living_environment))+
  geom_point()+ # creating the scatterplot
  labs(x = "English Scores", y = "US History Scores", title = "English Scores and US History Scores related by Living Condition", color = "Living Environment")+
  geom_smooth(method = lm, se=FALSE, color = "purple")+ # line of regression
  theme_minimal() + # theme change
  scale_color_gradient(low = "blue", high = "cyan") # changing color
  theme(
    plot.title = element_text(size = 15, face = "bold", hjust = 0.5),
    axis.title = element_text(size = 13, face = "bold"),
    axis.text = element_text(size = 9),
    plot.caption = element_text(size = 9),
    legend.title = element_text(face = "bold")) #changes to elements of graph for cleaner look
List of 5
 $ axis.title  :List of 11
  ..$ family       : NULL
  ..$ face         : chr "bold"
  ..$ colour       : NULL
  ..$ size         : num 13
  ..$ hjust        : NULL
  ..$ vjust        : NULL
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : NULL
  ..$ debug        : NULL
  ..$ inherit.blank: logi FALSE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ axis.text   :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : num 9
  ..$ hjust        : NULL
  ..$ vjust        : NULL
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : NULL
  ..$ debug        : NULL
  ..$ inherit.blank: logi FALSE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ legend.title:List of 11
  ..$ family       : NULL
  ..$ face         : chr "bold"
  ..$ colour       : NULL
  ..$ size         : NULL
  ..$ hjust        : NULL
  ..$ vjust        : NULL
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : NULL
  ..$ debug        : NULL
  ..$ inherit.blank: logi FALSE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ plot.title  :List of 11
  ..$ family       : NULL
  ..$ face         : chr "bold"
  ..$ colour       : NULL
  ..$ size         : num 15
  ..$ hjust        : num 0.5
  ..$ vjust        : NULL
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : NULL
  ..$ debug        : NULL
  ..$ inherit.blank: logi FALSE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ plot.caption:List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : num 9
  ..$ hjust        : NULL
  ..$ vjust        : NULL
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : NULL
  ..$ debug        : NULL
  ..$ inherit.blank: logi FALSE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 - attr(*, "class")= chr [1:2] "theme" "gg"
 - attr(*, "complete")= logi FALSE
 - attr(*, "validate")= logi TRUE
p1 
`geom_smooth()` using formula = 'y ~ x'

I’ve looked at the NYC Regents Scores dataset from DSLabs, which includes English and US History scores, along with information about the students living conditions. While I’m not sure how information was retrieved about living conditions, I looked to see the relationship between US History and English scores. Identifying a relationship could show that students who do well in one subject are likely to perform well in the other. Then I examined whether higher scores correlate with better living conditions. The analysis showed a pretty clear connection showing that students who performed well in English usually did similar scores in US History. Furthermore, those with better living conditions usually got higher exam scores. This shows overall that both academic performance and living conditions are correlated and can affect a student success.