This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

These lines load the packages for use in our current session.

install.packages("tidyverse")
install.packages("readr")
install.packages("data.table")
install.packages("ggplot2")
install.packages("janitor")
install.packages("lubridate")
install.packages("scales")
install.packages("corrplot")
install.packages("reshape2")
library(corrplot)
library(reshape2)
library(scales)
library(tidyverse)
library(lubridate)
library(readr)
library(ggplot2)
library(janitor)
library(data.table)

These lines import text file into Rstudio

masterfile11_d75_final <- read.delim("~/NYCS survey/2011 data files online/masterfile11_d75_final.txt")
masterfile11_gened_final <- read.delim("~/NYCS survey/2011 data files online/masterfile11_gened_final.txt")

combine both tables into one large data frame named nycs_2011

removes any empty rows and columns

nycs_2011 <- bind_rows(masterfile11_d75_final, masterfile11_gened_final)
nycs_2011 <- janitor::remove_empty(nycs_2011, which = c("rows"))
nycs_2011 <- janitor::remove_empty(nycs_2011, which = c("cols"))

Filter the data for school that did participate in the surveys

nycs_2011 <- nycs_2011%>%
  filter(studentssurveyed ==  "Yes")

This line created the dataframe “nycs_v2” with on the specific columns need for analysis

nycs_v2 <- select(nycs_2011, dbn, bn, N_s, N_t, N_p, nr_s, nr_t, nr_p,saf_p_11, com_p_11, eng_p_11, aca_p_11, saf_t_11, com_t_11, eng_t_11, aca_t_11, saf_s_11, com_s_11, eng_s_11, aca_s_11, saf_tot_11, com_tot_11, eng_tot_11, aca_tot_11)

Give each column in the nycs_v2 a clearer column name

nycs_v2 <- rename(nycs_v2, academic_total = aca_tot_11,
                    engagement_total = eng_tot_11,
                    communication_total = com_tot_11,
                    safety_respect_total = saf_tot_11,
                    stud_academic = aca_s_11,
                    stud_engagement = eng_s_11,
                    stud_communication = com_s_11,
                    stud_safety_resp = saf_s_11,
                    teacher_academic = aca_t_11,
                    teacher_engagement = eng_t_11,
                    teacher_communication = com_t_11,
                    teacher_safety_resp = saf_t_11,
                    parent_academic = aca_p_11,
                    parent_engagement = eng_p_11,
                    parent_communication = com_p_11,
                    parent_safety_resp = saf_p_11,
                    eligible_parent = nr_p,
                    eligible_student = nr_s,
                    eligible_teacher = nr_t,
                    student_respondents = N_s,
                    teacher_respondents = N_t,
                    parent_respondents = N_p)
Error in `rename()`:
! Can't rename columns that don't exist.
✖ Column `aca_tot_11` doesn't exist.
Run `]8;;x-r-run:rlang::last_trace()rlang::last_trace()]8;;` to see where the error occurred.

Save data frame

write.csv(nycs_v2, file = "NYCS survey/nycs_working/nycs_v2.csv")

#Create a dataframe with data column grouped by demographic


perception_academic <- select(nycs_v2, parent_safety_resp, parent_communication, parent_engagement, parent_academic,
                                 teacher_safety_resp, teacher_communication, teacher_engagement, teacher_academic,
                                 stud_safety_resp, stud_communication, stud_engagement, stud_academic,
                                 academic_total)

Now we calculate the correlations between all of these columns

#create the plot

cor_matrix <- cor(perception_academic, use = "complete.obs")
corrplot(cor_matrix, method = "circle", type = "upper", tl.col = "black", tl.srt = 45)

#organize the data to make it easier to work with #Rearrange data using “melt”. This puts all perception scores in one column, and the group in another.

academic.df <- select(nycs_v2 , stud_academic, teacher_academic, parent_academic)
academic.df <- melt(academic.df)

#create the boxplot

ggplot(academic.df , aes(x = variable, y = value, fill = variable)) + geom_boxplot(alpha = 0.7) +
  labs(title = "Comparison of Academic Perceptions by Group", x = "Group", y = "Academic Perception Score") + theme(axis.text.x = element_text(angle = 45))

repeat same process for engagement

#organize the data to make it easier to work with #Use melt to rearrange date #create the boxplot

engagement.df <- select(nycs_v2, stud_engagement, teacher_engagement, parent_engagement)

engagement.df <- melt(engagement.df)

ggplot(engagement.df , aes(x = variable, y = value, fill = variable)) + geom_boxplot(alpha = 0.7) + labs(title = "Comparison of Engagement Perceptions by Group",x = "Group", y = "Engagement Perception Score") + theme(axis.text.x = element_text(angle = 15))

#Repeat same process for communication #organize the data to make it easier to work with #Use melt to rearrange date #create the boxplot

communication.df <- select(nycs_v2 , stud_communication, teacher_communication, parent_communication)

communication.df <- melt(communication.df)

ggplot(communication.df , aes(x = variable, y = value, fill = variable)) + geom_boxplot(alpha = 0.7) + labs(title = "Comparison of Communication Perceptions by Group", x = "Group", y = "Communication Perception Score") + theme(axis.text.x = element_text(angle = 15))

#Repeat same process for Safety & Respect #organize the data to make it easier to work with #Use melt to rearrange date #create the boxplot

safety_respect.df <- select(nycs_v2 , stud_safety_resp, teacher_safety_resp, parent_safety_resp)

safety_respect.df <- melt(safety_respect.df)

ggplot(safety_respect.df , aes(x = variable, y = value, fill = variable)) + geom_boxplot(alpha = 0.7) + labs(title = "Comparison of Safety & Respect Perceptions by Group",x = "Group", y = "Safety & Respect Perception Score") +  theme(axis.text.x = element_text(angle = 15))
