1

2

table(schooldata$Elementary..Middle..or.High.School)
## 
##  ES  HS  MS 
## 462  93  11
schools_filtered <- schooldata %>% select(3,12,16,18,20,22,24,30:36,39:42)

dataschools <- schools_filtered 

colnames(dataschools)[1] <- "Elementary School"
colnames(dataschools)[15] <- "GR3_5_mathlevel"


dataschools <- dataschools[!dataschools$`Elementary School` %in% c("HS", "MS"), ]

dataschools2 <- dataschools 
dataschools2[dataschools2 == "NDA"] <- NA
dataschools2 <-na.omit(dataschools2)

colnames(dataschools2)[12] <- "misconduct_rate"
colnames(dataschools2)[11] <- "student_attendance"
colnames(dataschools2)[13] <- "teacher_attendance"
colnames(dataschools2)[14] <- "iep_comp"
colnames(dataschools2)[16] <- "GR3_5_readlevel"
colnames(dataschools2)[17] <- "gr3_5_readpace"
colnames(dataschools2)[18] <- "gr3_5_mathpace"
colnames(dataschools2)[3] <- "health_cert"
colnames(dataschools2)[2] <- "yearly_progress"


dataschools2$Family.Involvement.Score <- as.numeric(dataschools2$Family.Involvement.Score)
dataschools2$Parent.Environment.Icon <- as.numeric(dataschools2$Parent.Engagement.Score)
dataschools2$Parent.Engagement.Score <-as.numeric(dataschools2$Parent.Engagement.Score)


#Environment score- supportive environment score from survey
sd(dataschools2$Environment.Score) #16.30973
## [1] 16.77734
mean(dataschools2$Environment.Score) #48.523
## [1] 50.30928
min(dataschools2$Environment.Score) #1
## [1] 1
max(dataschools2$Environment.Score) #99
## [1] 99
median(dataschools2$Environment.Score) #50
## [1] 50
ggplot((data = dataschools2), aes(x = Environment.Score)) +
  geom_histogram(binwidth = 10, fill = "lightblue", color = "black")

#Safety score- safety of environment perceived by students 
sd(dataschools2$Safety.Score) #20.39816
## [1] 20.55548
mean(dataschools2$Safety.Score) #49.64165
## [1] 51.68557
min(dataschools2$Safety.Score) #1
## [1] 6
max(dataschools2$Safety.Score) #99
## [1] 99
median(dataschools2$Safety.Score) #47
## [1] 50.5
ggplot((data = dataschools2), aes(x = Safety.Score)) +
  geom_histogram(binwidth = 10, fill = "lightblue", color = "black") 

#Average student attendance 
sd(dataschools2$student_attendance) # 2.150204
## [1] 1.867102
mean(dataschools2$student_attendance) # 94.23777
## [1] 94.60979
min(dataschools2$student_attendance) #  80.2
## [1] 88
max(dataschools2$student_attendance) # 98.4
## [1] 98.4
median(dataschools2$student_attendance) # 94.9
## [1] 95.1
ggplot((data = dataschools2), aes(x = student_attendance)) +
         geom_histogram(binwidth = 1, fill = "lightblue", color = "black")

## Grade 3-5 Reading at Grade Level
dataschools2$GR3_5_readlevel<-as.numeric(dataschools2$GR3_5_readlevel)
sd(dataschools2$GR3_5_readlevel) #19.15546
## [1] 19.15546
mean(dataschools2$GR3_5_readlevel) #35.05206
## [1] 35.05206
min(dataschools2$GR3_5_readlevel) #6.3
## [1] 6.3
max(dataschools2$GR3_5_readlevel) #100
## [1] 100
median(dataschools2$GR3_5_readlevel) #29.75
## [1] 29.75
ggplot((data = dataschools2), aes(x = GR3_5_readlevel)) +
  geom_histogram(binwidth = 10, fill = "lightblue", color = "black")

## Parent Engagement          
sd(dataschools2$Parent.Engagement.Score) #4.404416
## [1] 4.404416
mean(dataschools2$Parent.Engagement.Score) #50.44845
## [1] 50.44845
min(dataschools2$Parent.Engagement.Score) #42
## [1] 42
max(dataschools2$Parent.Engagement.Score) #68
## [1] 68
median(dataschools2$Parent.Engagement.Score) #50
## [1] 50
ggplot((data = dataschools2), aes(x = Parent.Engagement.Score)) +
  geom_histogram(binwidth = 1, fill = "lightblue", color = "black")

## Instruction Score -  Ambitious Instruction score from 5 Essentials survey 
sd(dataschools2$Instruction.Score) #17.08615
## [1] 17.08615
mean(dataschools2$Instruction.Score) #51.38144
## [1] 51.38144
min(dataschools2$Instruction.Score) #1
## [1] 1
max(dataschools2$Instruction.Score) #99
## [1] 99
median(dataschools2$Instruction.Score) #51
## [1] 51
ggplot((data = dataschools2), aes(x = Instruction.Score)) +
  geom_histogram(binwidth = 10, fill = "lightblue", color = "black")

## Family involvement - Involved family score from survey 
sd(dataschools2$Family.Involvement.Score) #18.71961
## [1] 18.71961
mean(dataschools2$Family.Involvement.Score) # 51.31443
## [1] 51.31443
min(dataschools2$Family.Involvement.Score)#6
## [1] 6
max(dataschools2$Family.Involvement.Score) #99
## [1] 99
median(dataschools2$Family.Involvement.Score) #50
## [1] 50
ggplot((data = dataschools2), aes(x = Family.Involvement.Score)) +
  geom_histogram(binwidth = 10, fill = "lightblue", color = "black")

3

I want to conduct a multivariate regression to see if parent engagement and instruction scores lead to higher grade 3-5 reading scores. As a former teacher, I believe parent engagement should affect reading scores as we know when parents read at home with their children they do better with reading. I’m also wondering if this instruction score for teachers would mean better teachers have higher scores and therefore higher instruction scores (and better teachers) lead to higher reading scores. After this, I would also like to add in family involvement and student attendance, as I believe they could also influence reading scores since family involvement is similar (if not the same for some families) to parent engagement and student attendance should have an effect as it would make sense if students show up to school they should be learning the skills necessary to be at grade level for reading and students that do not come as often will not keep up with grade level if they are missing lessons.

cor(dataschools2$Parent.Engagement.Score, dataschools2$Instruction.Score)
## [1] 0.3407995

Parent engagement and instruction score are positively correlated (0.3407995).

parent_read<- lm(GR3_5_readlevel ~ Parent.Engagement.Score, data= dataschools2)
summary(parent_read)
## 
## Call:
## lm(formula = GR3_5_readlevel ~ Parent.Engagement.Score, data = dataschools2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -34.514  -9.628  -2.286   8.348  59.842 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             -85.4584    13.2822  -6.434 9.66e-10 ***
## Parent.Engagement.Score   2.3888     0.2623   9.107  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.05 on 192 degrees of freedom
## Multiple R-squared:  0.3017, Adjusted R-squared:  0.298 
## F-statistic: 82.95 on 1 and 192 DF,  p-value: < 2.2e-16

We expect a student to have a reading level that is 2.3888 points higher with more parent engagement

teacher_read<- lm(GR3_5_readlevel ~ Instruction.Score, data= dataschools2)
summary(teacher_read)
## 
## Call:
## lm(formula = GR3_5_readlevel ~ Instruction.Score, data = dataschools2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -30.593 -12.720  -4.836   8.825  56.753 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       18.28960    4.19039   4.365 2.08e-05 ***
## Instruction.Score  0.32624    0.07741   4.215 3.85e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 18.37 on 192 degrees of freedom
## Multiple R-squared:  0.08468,    Adjusted R-squared:  0.07991 
## F-statistic: 17.76 on 1 and 192 DF,  p-value: 3.85e-05

From this regression I would expect a student to have a reading level that is 0.32624 points higher when their teacher has a higher instruction score

summary(lm(Instruction.Score ~ Parent.Engagement.Score, data=dataschools2))
## 
## Call:
## lm(formula = Instruction.Score ~ Parent.Engagement.Score, data = dataschools2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -41.856 -10.492  -0.628   8.868  49.534 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             -15.3150    13.3286  -1.149    0.252    
## Parent.Engagement.Score   1.3221     0.2632   5.023 1.16e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.11 on 192 degrees of freedom
## Multiple R-squared:  0.1161, Adjusted R-squared:  0.1115 
## F-statistic: 25.23 on 1 and 192 DF,  p-value: 1.159e-06

From this regression I would expect that parent engagement raises instructions scores by 1.3221 points. This could be because students that have engaged parents do better in class which therefore helps their teacher move through content and teach more effectively (as a former teacher, more engaged parents usually led to more focused students).

parent_teacher_read<- lm(GR3_5_readlevel ~ Parent.Engagement.Score+Instruction.Score, data= dataschools2)
summary(parent_teacher_read)
## 
## Call:
## lm(formula = GR3_5_readlevel ~ Parent.Engagement.Score + Instruction.Score, 
##     data = dataschools2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -33.232 -10.624  -2.071   8.068  56.355 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             -83.44182   13.24545  -6.300 2.01e-09 ***
## Parent.Engagement.Score   2.21470    0.27727   7.988 1.26e-13 ***
## Instruction.Score         0.13167    0.07147   1.842    0.067 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.95 on 191 degrees of freedom
## Multiple R-squared:  0.3139, Adjusted R-squared:  0.3067 
## F-statistic: 43.69 on 2 and 191 DF,  p-value: 2.381e-16

In this multivariate regression, instruction score decreases (from 0.32642 to 0.13) as does parent engagement (from 2.3888 to 2.21). So this regression tells methat high instruction scores (better teachers/instructors) should raise reading level by 0.13 points and higher parent engagement should raise reading scores by 2.21 points. We expected original bivariate effect to decrease because correlations between the old and new independent variables was positive and the dependent variables were positive

multi_factor_read<- lm(GR3_5_readlevel ~ Parent.Engagement.Score+Instruction.Score+Family.Involvement.Score+student_attendance, data= dataschools2)

summary(multi_factor_read)
## 
## Call:
## lm(formula = GR3_5_readlevel ~ Parent.Engagement.Score + Instruction.Score + 
##     Family.Involvement.Score + student_attendance, data = dataschools2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -39.371  -8.420  -1.696   8.405  50.973 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              -315.56153   55.10165  -5.727 3.97e-08 ***
## Parent.Engagement.Score     1.15134    0.27668   4.161 4.80e-05 ***
## Instruction.Score          -0.05018    0.06670  -0.752    0.453    
## Family.Involvement.Score    0.35890    0.07257   4.945 1.67e-06 ***
## student_attendance          2.92456    0.59941   4.879 2.26e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.76 on 189 degrees of freedom
## Multiple R-squared:  0.4948, Adjusted R-squared:  0.4841 
## F-statistic: 46.28 on 4 and 189 DF,  p-value: < 2.2e-16

Parent engagement = 1.15

Instruction score = -0.05

Family involvement = 0.35

Student attendance = 2.92

We can expect grade 3-5 reading level to be 1.15 points higher with parent engagement, 0.35 points higher with family involvement and 2.92 points better when they have a higher attendance rate. The instructor score doesn’t seem to help with all other things constant - I would think that because a score for an instructor is really a biased number depending on who scores the teacher (students, parents, administrators?). This multivariate regression also seems to show me that I overlooked the fact that the most important thing to keep students at grade level for reading is having them show up for school. Parent engagement and stellar teachers are great ways to help students, but as long as students show up to school every day, they should be at least keeping up with their grade level since they are learning every day.