2
table(schooldata$Elementary..Middle..or.High.School)
##
## ES HS MS
## 462 93 11
schools_filtered <- schooldata %>% select(3,12,16,18,20,22,24,30:36,39:42)
dataschools <- schools_filtered
colnames(dataschools)[1] <- "Elementary School"
colnames(dataschools)[15] <- "GR3_5_mathlevel"
dataschools <- dataschools[!dataschools$`Elementary School` %in% c("HS", "MS"), ]
dataschools2 <- dataschools
dataschools2[dataschools2 == "NDA"] <- NA
dataschools2 <-na.omit(dataschools2)
colnames(dataschools2)[12] <- "misconduct_rate"
colnames(dataschools2)[11] <- "student_attendance"
colnames(dataschools2)[13] <- "teacher_attendance"
colnames(dataschools2)[14] <- "iep_comp"
colnames(dataschools2)[16] <- "GR3_5_readlevel"
colnames(dataschools2)[17] <- "gr3_5_readpace"
colnames(dataschools2)[18] <- "gr3_5_mathpace"
colnames(dataschools2)[3] <- "health_cert"
colnames(dataschools2)[2] <- "yearly_progress"
dataschools2$Family.Involvement.Score <- as.numeric(dataschools2$Family.Involvement.Score)
dataschools2$Parent.Environment.Icon <- as.numeric(dataschools2$Parent.Engagement.Score)
dataschools2$Parent.Engagement.Score <-as.numeric(dataschools2$Parent.Engagement.Score)
#Environment score- supportive environment score from survey
sd(dataschools2$Environment.Score) #16.30973
## [1] 16.77734
mean(dataschools2$Environment.Score) #48.523
## [1] 50.30928
min(dataschools2$Environment.Score) #1
## [1] 1
max(dataschools2$Environment.Score) #99
## [1] 99
median(dataschools2$Environment.Score) #50
## [1] 50
ggplot((data = dataschools2), aes(x = Environment.Score)) +
geom_histogram(binwidth = 10, fill = "lightblue", color = "black")

#Safety score- safety of environment perceived by students
sd(dataschools2$Safety.Score) #20.39816
## [1] 20.55548
mean(dataschools2$Safety.Score) #49.64165
## [1] 51.68557
min(dataschools2$Safety.Score) #1
## [1] 6
max(dataschools2$Safety.Score) #99
## [1] 99
median(dataschools2$Safety.Score) #47
## [1] 50.5
ggplot((data = dataschools2), aes(x = Safety.Score)) +
geom_histogram(binwidth = 10, fill = "lightblue", color = "black")

#Average student attendance
sd(dataschools2$student_attendance) # 2.150204
## [1] 1.867102
mean(dataschools2$student_attendance) # 94.23777
## [1] 94.60979
min(dataschools2$student_attendance) # 80.2
## [1] 88
max(dataschools2$student_attendance) # 98.4
## [1] 98.4
median(dataschools2$student_attendance) # 94.9
## [1] 95.1
ggplot((data = dataschools2), aes(x = student_attendance)) +
geom_histogram(binwidth = 1, fill = "lightblue", color = "black")

## Grade 3-5 Reading at Grade Level
dataschools2$GR3_5_readlevel<-as.numeric(dataschools2$GR3_5_readlevel)
sd(dataschools2$GR3_5_readlevel) #19.15546
## [1] 19.15546
mean(dataschools2$GR3_5_readlevel) #35.05206
## [1] 35.05206
min(dataschools2$GR3_5_readlevel) #6.3
## [1] 6.3
max(dataschools2$GR3_5_readlevel) #100
## [1] 100
median(dataschools2$GR3_5_readlevel) #29.75
## [1] 29.75
ggplot((data = dataschools2), aes(x = GR3_5_readlevel)) +
geom_histogram(binwidth = 10, fill = "lightblue", color = "black")

## Parent Engagement
sd(dataschools2$Parent.Engagement.Score) #4.404416
## [1] 4.404416
mean(dataschools2$Parent.Engagement.Score) #50.44845
## [1] 50.44845
min(dataschools2$Parent.Engagement.Score) #42
## [1] 42
max(dataschools2$Parent.Engagement.Score) #68
## [1] 68
median(dataschools2$Parent.Engagement.Score) #50
## [1] 50
ggplot((data = dataschools2), aes(x = Parent.Engagement.Score)) +
geom_histogram(binwidth = 1, fill = "lightblue", color = "black")

## Instruction Score - Ambitious Instruction score from 5 Essentials survey
sd(dataschools2$Instruction.Score) #17.08615
## [1] 17.08615
mean(dataschools2$Instruction.Score) #51.38144
## [1] 51.38144
min(dataschools2$Instruction.Score) #1
## [1] 1
max(dataschools2$Instruction.Score) #99
## [1] 99
median(dataschools2$Instruction.Score) #51
## [1] 51
ggplot((data = dataschools2), aes(x = Instruction.Score)) +
geom_histogram(binwidth = 10, fill = "lightblue", color = "black")

## Family involvement - Involved family score from survey
sd(dataschools2$Family.Involvement.Score) #18.71961
## [1] 18.71961
mean(dataschools2$Family.Involvement.Score) # 51.31443
## [1] 51.31443
min(dataschools2$Family.Involvement.Score)#6
## [1] 6
max(dataschools2$Family.Involvement.Score) #99
## [1] 99
median(dataschools2$Family.Involvement.Score) #50
## [1] 50
ggplot((data = dataschools2), aes(x = Family.Involvement.Score)) +
geom_histogram(binwidth = 10, fill = "lightblue", color = "black")

3
We expect a student to have a reading level that is 2.3888 points
higher with more parent engagement
teacher_read<- lm(GR3_5_readlevel ~ Instruction.Score, data= dataschools2)
summary(teacher_read)
##
## Call:
## lm(formula = GR3_5_readlevel ~ Instruction.Score, data = dataschools2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -30.593 -12.720 -4.836 8.825 56.753
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.28960 4.19039 4.365 2.08e-05 ***
## Instruction.Score 0.32624 0.07741 4.215 3.85e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 18.37 on 192 degrees of freedom
## Multiple R-squared: 0.08468, Adjusted R-squared: 0.07991
## F-statistic: 17.76 on 1 and 192 DF, p-value: 3.85e-05
From this regression I would expect a student to have a reading
level that is 0.32624 points higher when their teacher has a higher
instruction score
summary(lm(Instruction.Score ~ Parent.Engagement.Score, data=dataschools2))
##
## Call:
## lm(formula = Instruction.Score ~ Parent.Engagement.Score, data = dataschools2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -41.856 -10.492 -0.628 8.868 49.534
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -15.3150 13.3286 -1.149 0.252
## Parent.Engagement.Score 1.3221 0.2632 5.023 1.16e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16.11 on 192 degrees of freedom
## Multiple R-squared: 0.1161, Adjusted R-squared: 0.1115
## F-statistic: 25.23 on 1 and 192 DF, p-value: 1.159e-06
From this regression I would expect that parent engagement raises
instructions scores by 1.3221 points. This could be because students
that have engaged parents do better in class which therefore helps their
teacher move through content and teach more effectively (as a former
teacher, more engaged parents usually led to more focused
students).
parent_teacher_read<- lm(GR3_5_readlevel ~ Parent.Engagement.Score+Instruction.Score, data= dataschools2)
summary(parent_teacher_read)
##
## Call:
## lm(formula = GR3_5_readlevel ~ Parent.Engagement.Score + Instruction.Score,
## data = dataschools2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -33.232 -10.624 -2.071 8.068 56.355
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -83.44182 13.24545 -6.300 2.01e-09 ***
## Parent.Engagement.Score 2.21470 0.27727 7.988 1.26e-13 ***
## Instruction.Score 0.13167 0.07147 1.842 0.067 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.95 on 191 degrees of freedom
## Multiple R-squared: 0.3139, Adjusted R-squared: 0.3067
## F-statistic: 43.69 on 2 and 191 DF, p-value: 2.381e-16
In this multivariate regression, instruction score decreases (from
0.32642 to 0.13) as does parent engagement (from 2.3888 to 2.21). So
this regression tells methat high instruction scores (better
teachers/instructors) should raise reading level by 0.13 points and
higher parent engagement should raise reading scores by 2.21 points. We
expected original bivariate effect to decrease because correlations
between the old and new independent variables was positive and the
dependent variables were positive
multi_factor_read<- lm(GR3_5_readlevel ~ Parent.Engagement.Score+Instruction.Score+Family.Involvement.Score+student_attendance, data= dataschools2)
summary(multi_factor_read)
##
## Call:
## lm(formula = GR3_5_readlevel ~ Parent.Engagement.Score + Instruction.Score +
## Family.Involvement.Score + student_attendance, data = dataschools2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -39.371 -8.420 -1.696 8.405 50.973
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -315.56153 55.10165 -5.727 3.97e-08 ***
## Parent.Engagement.Score 1.15134 0.27668 4.161 4.80e-05 ***
## Instruction.Score -0.05018 0.06670 -0.752 0.453
## Family.Involvement.Score 0.35890 0.07257 4.945 1.67e-06 ***
## student_attendance 2.92456 0.59941 4.879 2.26e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.76 on 189 degrees of freedom
## Multiple R-squared: 0.4948, Adjusted R-squared: 0.4841
## F-statistic: 46.28 on 4 and 189 DF, p-value: < 2.2e-16
Parent engagement = 1.15
Instruction score = -0.05
Family involvement = 0.35
Student attendance = 2.92
We can expect grade 3-5 reading level to be 1.15 points higher with
parent engagement, 0.35 points higher with family involvement and 2.92
points better when they have a higher attendance rate. The instructor
score doesn’t seem to help with all other things constant - I would
think that because a score for an instructor is really a biased number
depending on who scores the teacher (students, parents,
administrators?). This multivariate regression also seems to show me
that I overlooked the fact that the most important thing to keep
students at grade level for reading is having them show up for school.
Parent engagement and stellar teachers are great ways to help students,
but as long as students show up to school every day, they should be at
least keeping up with their grade level since they are learning every
day.