rm(list = ls())
#Loading Required Packages
library(ggplot2) # Grammar of data plotting system
library(cowplot) # Compilation of the plots and figures
library(tidyverse) # Data manipulation
library(hrbrthemes) # A package that provides typography-centric themes and theme components for ggplot2
library(gtable) # Help construct and manipulate layouts containing graphical elements
library(grid)# To implement the primitive graphical functions that underlie the ggplot2
library(foreign) # Read data from S, SAS, SPSS, STATA, Systat, Weka etc.
### Data
upload_data <- read.spss("Overton Summer 2021 School Data.sav")
upload_data <- as.data.frame(upload_data)
str(upload_data)
'data.frame': 20 obs. of 112 variables:
$ LastName : Factor w/ 20 levels "Bobbitt","Boney",..: 1 2 3 4 5 6 7 8 9 10 ...
$ FirstName : Factor w/ 20 levels "Makynna","Hattie",..: 1 2 3 4 5 6 7 8 9 10 ...
$ Gender : Factor w/ 2 levels "Male","Female": 2 2 2 1 2 2 2 1 1 1 ...
$ Ethnicity : num NA NA NA NA NA NA NA NA NA NA ...
$ Grade : Factor w/ 3 levels "Third Grade",..: 1 1 1 1 1 1 2 2 2 2 ...
$ Teacher : Factor w/ 4 levels "Maryanna Luna",..: 1 1 1 1 1 1 2 2 2 2 ...
$ Attendance : num 16 18 11 17 14 15 17 10 15 18 ...
$ Wordsread : num 51858 28575 49406 38867 33160 ...
$ MARSIGRSPre : num 2.4 1.8 3.2 3.4 3 3.4 1.6 2.2 3.4 2.6 ...
$ MARSIGRSPost : num 3.2 3.4 NA 4 3.2 3.2 2.6 NA 5 3.8 ...
$ MARSIPSSPre : num 3 4 3.4 2.6 4.4 3.8 3.8 2 4 2.2 ...
$ MARSIPSSPost : num 3 5 NA 2.6 3.8 3.8 3 NA 5 4.2 ...
$ MARSISRSPre : num 2.8 2 3 3 4 3.6 3.6 3.6 4.2 3.4 ...
$ MARSISRSPost : num 2.8 4.2 NA 3 3.4 4.2 4 NA 5 2.8 ...
$ MARSITotalPre : num 2.73 2.6 3.2 3 3.8 3.8 3 2.6 3.86 2.73 ...
$ MARSITotalPost : num 3 4.2 NA 3.2 3.46 3.73 3.2 NA 5 3.6 ...
$ GRADEWRRawPre : num 27 28 23 28 24 28 NA NA NA NA ...
$ GRADEWRRawPost : num 28 29 NA 30 24 26 NA NA NA NA ...
$ GRADEWRStaninePre : num 4 5 3 5 3 5 NA NA NA NA ...
$ GRADEWRStaninePost : num 5 6 NA 8 3 4 NA NA NA NA ...
$ GRADEWRGEPre : num NA NA NA NA NA NA NA NA NA NA ...
$ GRADEWRGEPost : num NA NA NA NA NA NA NA NA NA NA ...
$ GRADEWRSSPre : num NA NA NA NA NA NA NA NA NA NA ...
$ GRADEWRSSPost : num NA NA NA NA NA NA NA NA NA NA ...
$ GRADEWRNCEPre : num NA NA NA NA NA NA NA NA NA NA ...
$ GRADEWRNCEPost : num NA NA NA NA NA NA NA NA NA NA ...
$ GRADEWRGSVPre : num NA NA NA NA NA NA NA NA NA NA ...
$ GRADEWRGSVPost : num NA NA NA NA NA NA NA NA NA NA ...
$ GRADEVocRSPre : num 20 16 15 7 14 21 19 15 19 15 ...
$ GRADEVocRSPost : num 12 18 NA 6 15 NA 14 NA 17 13 ...
$ GRADEVocStaninePre : num 5 4 4 2 3 5 4 4 4 3 ...
$ GRADEVocStaninePost : num 3 4 NA 1 4 NA 4 NA 5 4 ...
$ GRADEVocGEPre : num NA NA NA NA NA NA 4.2 3.4 4.2 3.4 ...
$ GRADEVocGEPost : num NA NA NA NA NA NA 3.2 NA 3.8 3 ...
$ GRADEVocSSPre : num NA NA NA NA NA NA 95 93 95 87 ...
$ GRADEVocSSPost : num NA NA NA NA NA NA 91 NA 96 89 ...
$ GRADEVocNCEPre : num NA NA NA NA NA NA 43 40 43 32 ...
$ GRADEVocNCEPost : num NA NA NA NA NA NA 37 NA 44 35 ...
$ GRADEVocGSVPre : num NA NA NA NA NA NA NA NA NA NA ...
$ GRADEVocGSVPost : num NA NA NA NA NA NA NA NA NA NA ...
$ GRADEVocCompRSPre : num 47 44 38 35 38 49 NA NA NA NA ...
$ GRADEVocCompRSPost : num 40 47 NA 36 39 NA NA NA NA NA ...
$ GRADEVocCompStaninePre : num 5 4 3 2 3 5 NA NA NA NA ...
$ GRADEVocCompStaninePost : num 3 5 NA 3 3 NA NA NA NA NA ...
$ GRADEVocCompGEPre : num 2.5 2.3 2 1.9 2 2.7 NA NA NA NA ...
$ GRADEVocCompGEPost : num 2.1 2.5 NA 1.9 2 NA NA NA NA NA ...
$ GRADEVocCompSSPre : num 97 93 86 82 86 100 NA NA NA NA ...
$ GRADEVocCompSSPost : num 88 96 NA 83 86 NA NA NA NA NA ...
$ GRADEVocCompNCEPre : num 46 40 30 25 30 50 NA NA NA NA ...
$ GRADEVocCompNCEPost : num 33 44 NA 26 30 NA NA NA NA NA ...
$ GRADEVocCompGSVPre : num NA NA NA NA NA NA NA NA NA NA ...
$ GRADEVocCompGSVPost : num NA NA NA NA NA NA NA NA NA NA ...
$ GRADESenCompRSPre : num 14 14 10 8 13 15 13 8 15 13 ...
$ GRADESenCompRSPost : num 9 17 NA 8 12 NA 13 NA 12 10 ...
$ GRADESenCompStaninePre : num 4 4 3 2 4 4 5 3 5 5 ...
$ GRADESenCompStaninePost : num 3 4 NA 2 3 NA 5 NA 4 4 ...
$ GRADESenCompGEPre : num NA NA NA NA NA NA NA NA NA NA ...
$ GRADESenCompGEPost : num NA NA NA NA NA NA NA NA NA NA ...
$ GRADESenCompSSPre : num NA NA NA NA NA NA NA NA NA NA ...
$ GRADESenCompSSPost : num NA NA NA NA NA NA NA NA NA NA ...
$ GRADESenCompNCEPre : num NA NA NA NA NA NA NA NA NA NA ...
$ GRADESenCompNCEPost : num NA NA NA NA NA NA NA NA NA NA ...
$ GRADESenCompGSVPre : num NA NA NA NA NA NA NA NA NA NA ...
$ GRADESenCompGSVPost : num NA NA NA NA NA NA NA NA NA NA ...
$ GRADEPassCompRSPre : num 9 9 8 6 11 12 12 11 10 14 ...
$ GRADEPassCompRSPost : num 9 17 NA 4 14 NA 18 NA 11 11 ...
$ GRADEPassCompStaninePre : num 3 3 3 2 4 4 4 4 3 5 ...
$ GRADEPassCompStaninePost: num 3 5 NA 1 4 NA 5 NA 4 4 ...
$ GRADEPassCompGEPre : num NA NA NA NA NA NA NA NA NA NA ...
$ GRADEPassCompGEPost : num NA NA NA NA NA NA NA NA NA NA ...
$ GRADEPassCompSSPre : num NA NA NA NA NA NA NA NA NA NA ...
$ GRADEPassCompSSPost : num NA NA NA NA NA NA NA NA NA NA ...
$ GRADEPassCompNCEPre : num NA NA NA NA NA NA NA NA NA NA ...
$ GRADEPassCompNCEPost : num NA NA NA NA NA NA NA NA NA NA ...
$ GRADEPassCompGSVPre : num NA NA NA NA NA NA NA NA NA NA ...
$ GRADEPassCompGSVPost : num NA NA NA NA NA NA NA NA NA NA ...
$ GRADECompCompRSPre : num 23 23 18 14 24 27 25 19 25 27 ...
$ GRADECompCompRSPost : num 18 34 NA 12 26 NA 31 NA 23 21 ...
$ GRADECompCompStaninePre : num 3 3 3 2 4 4 4 3 4 4 ...
$ GRADECompCompStaninePost: num 3 5 NA 2 4 NA 5 NA 4 4 ...
$ GRADECompCompGEPre : num 2.1 2.1 1.8 1.5 2.2 2.5 3.4 2.6 3.4 3.7 ...
$ GRADECompCompGEPost : num 1.8 3.4 NA 1.3 2.4 NA 4.4 NA 3.1 2.9 ...
$ GRADECompCompSSPre : num 88 88 82 77 90 93 90 88 90 92 ...
$ GRADECompCompSSPost : num 82 102 NA 74 92 NA 102 NA 93 91 ...
$ GRADECompCompNCEPre : num 33 33 25 18 36 40 36 33 36 39 ...
$ GRADECompCompNCEPost : num 25 53 NA 13 39 NA 53 NA 40 37 ...
$ GRADECompCompGSVPre : num NA NA NA NA NA NA NA NA NA NA ...
$ GRADECompCompGSVPost : num NA NA NA NA NA NA NA NA NA NA ...
$ GRADETotalTRSPre : num 70 67 56 49 62 76 44 34 44 42 ...
$ GRADETotalTRSPost : num 58 81 NA 48 65 NA 45 NA 40 34 ...
$ GRADETotalTStaninePre : num 4 4 3 2 3 5 4 4 4 4 ...
$ GRADETotalTStaninePost : num 3 5 NA 2 4 NA 5 NA 4 4 ...
$ GRADETotalTGEPre : num 2.4 2.3 2 1.8 2.1 2.7 3.7 2.8 3.7 3.5 ...
$ GRADETotalTGEPost : num 2 3.1 NA 1.8 2.2 NA 3.8 NA 3.3 2.8 ...
$ GRADETotalTSSPre : num 92 90 84 79 87 96 91 90 91 90 ...
$ GRADETotalTSSPost : num 84 100 NA 78 89 NA 98 NA 95 90 ...
$ GRADETotalTNCEPre : num 39 36 28 21 32 44 37 36 37 36 ...
$ GRADETotalTNCEPost : num 28 50 NA 19 35 NA 47 NA 43 36 ...
$ GRADETotalTGSVPre : num 406 401 385 375 394 415 434 417 434 430 ...
[list output truncated]
updated_data <- select(upload_data, "Gender", "Grade", "Attendance", "Wordsread", "MARSIGRSPre", "MARSIGRSPost", "MARSIPSSPre", "MARSIPSSPost", "MARSISRSPre", "MARSISRSPost", "MARSITotalPre", "MARSITotalPost", "GRADEVocStaninePre", "GRADEVocStaninePost", "GRADESenCompStaninePre", "GRADESenCompStaninePost", "GRADEPassCompStaninePre", "GRADEPassCompStaninePost", "GRADECompCompStaninePre", "GRADECompCompStaninePost", "GRADETotalTStaninePre", "GRADETotalTStaninePost", "GRADEListCompStaninePre", "GRADEListCompStaninePost")
summary(updated_data)
Gender Grade Attendance Wordsread MARSIGRSPre
Male : 6 Third Grade :6 Min. : 1.00 Min. : 16631 Min. :1.2
Female:14 Fourth Grade:6 1st Qu.:10.75 1st Qu.: 24946 1st Qu.:2.3
Fifth Grade :8 Median :15.50 Median : 33160 Median :3.0
Mean :13.10 Mean : 40343 Mean :2.8
3rd Qu.:17.00 3rd Qu.: 48778 3rd Qu.:3.4
Max. :18.00 Max. :108885 Max. :3.6
NA's :5 NA's :1
MARSIGRSPost MARSIPSSPre MARSIPSSPost MARSISRSPre
Min. :2.600 Min. :2.000 Min. :2.600 Min. :2.000
1st Qu.:3.200 1st Qu.:3.400 1st Qu.:3.600 1st Qu.:3.300
Median :3.400 Median :4.000 Median :4.200 Median :3.600
Mean :3.569 Mean :3.811 Mean :4.046 Mean :3.653
3rd Qu.:4.000 3rd Qu.:4.400 3rd Qu.:4.600 3rd Qu.:4.100
Max. :5.000 Max. :4.800 Max. :5.000 Max. :5.000
NA's :7 NA's :1 NA's :7 NA's :1
MARSISRSPost MARSITotalPre MARSITotalPost GRADEVocStaninePre
Min. :2.800 Min. :2.600 Min. :3.000 Min. :2.000
1st Qu.:3.200 1st Qu.:2.865 1st Qu.:3.400 1st Qu.:3.000
Median :4.200 Median :3.730 Median :3.730 Median :4.000
Mean :3.831 Mean :3.429 Mean :3.813 Mean :3.778
3rd Qu.:4.200 3rd Qu.:3.860 3rd Qu.:4.200 3rd Qu.:4.000
Max. :5.000 Max. :4.260 Max. :5.000 Max. :5.000
NA's :7 NA's :1 NA's :7 NA's :2
GRADEVocStaninePost GRADESenCompStaninePre GRADESenCompStaninePost
Min. :1.000 Min. :2.000 Min. :2.000
1st Qu.:4.000 1st Qu.:3.000 1st Qu.:3.000
Median :4.000 Median :4.000 Median :4.000
Mean :3.769 Mean :3.889 Mean :3.692
3rd Qu.:4.000 3rd Qu.:5.000 3rd Qu.:4.000
Max. :5.000 Max. :5.000 Max. :5.000
NA's :7 NA's :2 NA's :7
GRADEPassCompStaninePre GRADEPassCompStaninePost GRADECompCompStaninePre
Min. :2.000 Min. :1.000 Min. :2.000
1st Qu.:3.000 1st Qu.:4.000 1st Qu.:3.000
Median :4.000 Median :4.000 Median :3.500
Mean :3.737 Mean :4.077 Mean :3.611
3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000
Max. :6.000 Max. :7.000 Max. :6.000
NA's :1 NA's :7 NA's :2
GRADECompCompStaninePost GRADETotalTStaninePre GRADETotalTStaninePost
Min. :2 Min. :2.000 Min. :2
1st Qu.:4 1st Qu.:3.000 1st Qu.:4
Median :4 Median :4.000 Median :4
Mean :4 Mean :3.833 Mean :4
3rd Qu.:4 3rd Qu.:4.750 3rd Qu.:4
Max. :6 Max. :6.000 Max. :6
NA's :7 NA's :2 NA's :7
GRADEListCompStaninePre GRADEListCompStaninePost
Min. :1.000 Min. :1.000
1st Qu.:3.000 1st Qu.:2.000
Median :5.000 Median :3.000
Mean :4.684 Mean :3.143
3rd Qu.:6.000 3rd Qu.:3.000
Max. :9.000 Max. :7.000
NA's :1 NA's :6
func_data <- updated_data
aggr_data <- updated_data
updated_na <- is.na(updated_data)
head(updated_na)
Gender Grade Attendance Wordsread MARSIGRSPre MARSIGRSPost MARSIPSSPre
[1,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[2,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[3,] FALSE FALSE FALSE FALSE FALSE TRUE FALSE
[4,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[5,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[6,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE
MARSIPSSPost MARSISRSPre MARSISRSPost MARSITotalPre MARSITotalPost
[1,] FALSE FALSE FALSE FALSE FALSE
[2,] FALSE FALSE FALSE FALSE FALSE
[3,] TRUE FALSE TRUE FALSE TRUE
[4,] FALSE FALSE FALSE FALSE FALSE
[5,] FALSE FALSE FALSE FALSE FALSE
[6,] FALSE FALSE FALSE FALSE FALSE
GRADEVocStaninePre GRADEVocStaninePost GRADESenCompStaninePre
[1,] FALSE FALSE FALSE
[2,] FALSE FALSE FALSE
[3,] FALSE TRUE FALSE
[4,] FALSE FALSE FALSE
[5,] FALSE FALSE FALSE
[6,] FALSE TRUE FALSE
GRADESenCompStaninePost GRADEPassCompStaninePre GRADEPassCompStaninePost
[1,] FALSE FALSE FALSE
[2,] FALSE FALSE FALSE
[3,] TRUE FALSE TRUE
[4,] FALSE FALSE FALSE
[5,] FALSE FALSE FALSE
[6,] TRUE FALSE TRUE
GRADECompCompStaninePre GRADECompCompStaninePost GRADETotalTStaninePre
[1,] FALSE FALSE FALSE
[2,] FALSE FALSE FALSE
[3,] FALSE TRUE FALSE
[4,] FALSE FALSE FALSE
[5,] FALSE FALSE FALSE
[6,] FALSE TRUE FALSE
GRADETotalTStaninePost GRADEListCompStaninePre GRADEListCompStaninePost
[1,] FALSE FALSE FALSE
[2,] FALSE FALSE FALSE
[3,] TRUE FALSE TRUE
[4,] FALSE FALSE FALSE
[5,] FALSE FALSE FALSE
[6,] TRUE FALSE FALSE
sum(updated_na)
[1] 88
mean(updated_na)
[1] 0.1833333
There are multiple ways to impute missing data in R.
updated_data$Wordsread[is.na(updated_data$Wordsread)] <- mean(updated_data$Wordsread, na.rm=TRUE)
updated_data$MARSIGRSPre[is.na(updated_data$MARSIGRSPre)] <- mean(updated_data$MARSIGRSPre, na.rm=TRUE)
updated_data$MARSIGRSPost[is.na(updated_data$MARSIGRSPost)] <- mean(updated_data$MARSIGRSPost, na.rm=TRUE)
updated_data$MARSIPSSPre[is.na(updated_data$MARSIPSSPre)] <- mean(updated_data$MARSIPSSPre, na.rm=TRUE)
updated_data$MARSIPSSPost[is.na(updated_data$MARSIPSSPost)] <- mean(updated_data$MARSIPSSPost, na.rm=TRUE)
updated_data$MARSISRSPre[is.na(updated_data$MARSISRSPre)] <- mean(updated_data$MARSISRSPre, na.rm=TRUE)
updated_data$MARSISRSPost[is.na(updated_data$MARSISRSPost)] <- mean(updated_data$MARSISRSPost, na.rm=TRUE)
updated_data$MARSITotalPre[is.na(updated_data$MARSITotalPre)] <- mean(updated_data$MARSITotalPre, na.rm=TRUE)
updated_data$MARSITotalPost[is.na(updated_data$MARSITotalPost)] <- mean(updated_data$MARSITotalPost, na.rm=TRUE)
updated_data$GRADEVocStaninePre[is.na(updated_data$GRADEVocStaninePre)] <- mean(updated_data$GRADEVocStaninePre, na.rm=TRUE)
updated_data$GRADEVocStaninePost[is.na(updated_data$GRADEVocStaninePost)] <- mean(updated_data$GRADEVocStaninePost, na.rm=TRUE)
updated_data$GRADESenCompStaninePre[is.na(updated_data$GRADESenCompStaninePre)] <- mean(updated_data$GRADESenCompStaninePre, na.rm=TRUE)
updated_data$GRADESenCompStaninePost[is.na(updated_data$GRADESenCompStaninePost)] <- mean(updated_data$GRADESenCompStaninePost, na.rm=TRUE)
updated_data$GRADEPassCompStaninePre[is.na(updated_data$GRADEPassCompStaninePre)] <- mean(updated_data$GRADEPassCompStaninePre, na.rm=TRUE)
updated_data$GRADEPassCompStaninePost[is.na(updated_data$GRADEPassCompStaninePost)] <- mean(updated_data$GRADEPassCompStaninePost, na.rm=TRUE)
updated_data$GRADECompCompStaninePre[is.na(updated_data$GRADECompCompStaninePre)] <- mean(updated_data$GRADECompCompStaninePre, na.rm=TRUE)
updated_data$GRADECompCompStaninePost[is.na(updated_data$GRADECompCompStaninePost)] <- mean(updated_data$GRADECompCompStaninePost, na.rm=TRUE)
updated_data$GRADETotalTStaninePre[is.na(updated_data$GRADETotalTStaninePre)] <- mean(updated_data$GRADETotalTStaninePre, na.rm=TRUE)
updated_data$GRADETotalTStaninePost[is.na(updated_data$GRADETotalTStaninePost)] <- mean(updated_data$GRADETotalTStaninePost, na.rm=TRUE)
updated_data$GRADEListCompStaninePre[is.na(updated_data$GRADEListCompStaninePre)] <- mean(updated_data$GRADEListCompStaninePre, na.rm=TRUE)
updated_data$GRADEListCompStaninePost[is.na(updated_data$GRADEListCompStaninePost)] <- mean(updated_data$GRADEListCompStaninePost, na.rm=TRUE)
# Checking if There's any NAs
sum(is.na(updated_data))
[1] 0
There’s no missing data at all.
# Creating Function
func_data_noNas <- func_data
for(i in 1:ncol(func_data)){
func_data_noNas[, i][is.na(func_data_noNas[, i])] <- mean(func_data_noNas[ ,i], na.rm = TRUE)
}
# Checking
sum(is.na(func_data_noNas))
[1] 0
There’s not missing data, at all.
This function doesn’t properly work in our case because the aggregate function tries to calculate the aggregate of all of the values together. There is wide variety in our data because the wordsread are in thousands, most of the outcome variables are in 5-point Likert Scales, attendance is 1 through 18 etc. This function works fine if we have columns with exact range of data. Anyway, for the demonstration purpose, I used the function, anyway.
library(zoo)
aggr_data_noNas <- na.aggregate(aggr_data)
# Checking
sum(is.na(aggr_data_noNas))
[1] 88
The program ran because the function ignored the action because of wide range of dissimilarities in the data. We still have 88 missing data points.
summary(updated_data)
Gender Grade Attendance Wordsread MARSIGRSPre
Male : 6 Third Grade :6 Min. : 1.00 Min. : 16631 Min. :1.20
Female:14 Fourth Grade:6 1st Qu.:10.75 1st Qu.: 29984 1st Qu.:2.35
Fifth Grade :8 Median :15.50 Median : 39605 Median :2.90
Mean :13.10 Mean : 40343 Mean :2.80
3rd Qu.:17.00 3rd Qu.: 42295 3rd Qu.:3.40
Max. :18.00 Max. :108885 Max. :3.60
MARSIGRSPost MARSIPSSPre MARSIPSSPost MARSISRSPre
Min. :2.600 Min. :2.000 Min. :2.600 Min. :2.000
1st Qu.:3.350 1st Qu.:3.400 1st Qu.:3.800 1st Qu.:3.350
Median :3.569 Median :4.000 Median :4.046 Median :3.600
Mean :3.569 Mean :3.811 Mean :4.046 Mean :3.653
3rd Qu.:3.627 3rd Qu.:4.400 3rd Qu.:4.600 3rd Qu.:4.050
Max. :5.000 Max. :4.800 Max. :5.000 Max. :5.000
MARSISRSPost MARSITotalPre MARSITotalPost GRADEVocStaninePre
Min. :2.800 Min. :2.600 Min. :3.000 Min. :2.000
1st Qu.:3.723 1st Qu.:2.933 1st Qu.:3.565 1st Qu.:3.000
Median :3.831 Median :3.595 Median :3.813 Median :4.000
Mean :3.831 Mean :3.429 Mean :3.813 Mean :3.778
3rd Qu.:4.200 3rd Qu.:3.860 3rd Qu.:3.980 3rd Qu.:4.000
Max. :5.000 Max. :4.260 Max. :5.000 Max. :5.000
GRADEVocStaninePost GRADESenCompStaninePre GRADESenCompStaninePost
Min. :1.000 Min. :2.000 Min. :2.000
1st Qu.:3.769 1st Qu.:3.000 1st Qu.:3.519
Median :3.885 Median :4.000 Median :3.692
Mean :3.769 Mean :3.889 Mean :3.692
3rd Qu.:4.000 3rd Qu.:5.000 3rd Qu.:4.000
Max. :5.000 Max. :5.000 Max. :5.000
GRADEPassCompStaninePre GRADEPassCompStaninePost GRADECompCompStaninePre
Min. :2.000 Min. :1.000 Min. :2.000
1st Qu.:3.000 1st Qu.:4.000 1st Qu.:3.000
Median :3.868 Median :4.038 Median :3.611
Mean :3.737 Mean :4.077 Mean :3.611
3rd Qu.:4.000 3rd Qu.:4.077 3rd Qu.:4.000
Max. :6.000 Max. :7.000 Max. :6.000
GRADECompCompStaninePost GRADETotalTStaninePre GRADETotalTStaninePost
Min. :2 Min. :2.000 Min. :2
1st Qu.:4 1st Qu.:3.000 1st Qu.:4
Median :4 Median :4.000 Median :4
Mean :4 Mean :3.833 Mean :4
3rd Qu.:4 3rd Qu.:4.250 3rd Qu.:4
Max. :6 Max. :6.000 Max. :6
GRADEListCompStaninePre GRADEListCompStaninePost
Min. :1.000 Min. :1.000
1st Qu.:3.000 1st Qu.:2.750
Median :4.842 Median :3.000
Mean :4.684 Mean :3.143
3rd Qu.:5.500 3rd Qu.:3.143
Max. :9.000 Max. :7.000
xtabs(~Gender+Grade, data=updated_data)
Grade
Gender Third Grade Fourth Grade Fifth Grade
Male 1 3 2
Female 5 3 6
# Creating Empty Data Frame
sim_data <- matrix(nrow=2000, ncol=24, data="tobe added")
colnames(sim_data) <- c("Gender", "Grade", "Attendance", "Wordsread", "MARSIGRSPre", "MARSIGRSPost", "MARSIPSSPre", "MARSIPSSPost", "MARSISRSPre", "MARSISRSPost", "MARSITotalPre", "MARSITotalPost", "GRADEVocStaninePre", "GRADEVocStaninePost", "GRADESenCompStaninePre", "GRADESenCompStaninePost", "GRADEPassCompStaninePre", "GRADEPassCompStaninePost", "GRADECompCompStaninePre", "GRADECompCompStaninePost", "GRADETotalTStaninePre", "GRADETotalTStaninePost", "GRADEListCompStaninePre", "GRADEListCompStaninePost")
head(sim_data)
Gender Grade Attendance Wordsread MARSIGRSPre
[1,] "tobe added" "tobe added" "tobe added" "tobe added" "tobe added"
[2,] "tobe added" "tobe added" "tobe added" "tobe added" "tobe added"
[3,] "tobe added" "tobe added" "tobe added" "tobe added" "tobe added"
[4,] "tobe added" "tobe added" "tobe added" "tobe added" "tobe added"
[5,] "tobe added" "tobe added" "tobe added" "tobe added" "tobe added"
[6,] "tobe added" "tobe added" "tobe added" "tobe added" "tobe added"
MARSIGRSPost MARSIPSSPre MARSIPSSPost MARSISRSPre MARSISRSPost
[1,] "tobe added" "tobe added" "tobe added" "tobe added" "tobe added"
[2,] "tobe added" "tobe added" "tobe added" "tobe added" "tobe added"
[3,] "tobe added" "tobe added" "tobe added" "tobe added" "tobe added"
[4,] "tobe added" "tobe added" "tobe added" "tobe added" "tobe added"
[5,] "tobe added" "tobe added" "tobe added" "tobe added" "tobe added"
[6,] "tobe added" "tobe added" "tobe added" "tobe added" "tobe added"
MARSITotalPre MARSITotalPost GRADEVocStaninePre GRADEVocStaninePost
[1,] "tobe added" "tobe added" "tobe added" "tobe added"
[2,] "tobe added" "tobe added" "tobe added" "tobe added"
[3,] "tobe added" "tobe added" "tobe added" "tobe added"
[4,] "tobe added" "tobe added" "tobe added" "tobe added"
[5,] "tobe added" "tobe added" "tobe added" "tobe added"
[6,] "tobe added" "tobe added" "tobe added" "tobe added"
GRADESenCompStaninePre GRADESenCompStaninePost GRADEPassCompStaninePre
[1,] "tobe added" "tobe added" "tobe added"
[2,] "tobe added" "tobe added" "tobe added"
[3,] "tobe added" "tobe added" "tobe added"
[4,] "tobe added" "tobe added" "tobe added"
[5,] "tobe added" "tobe added" "tobe added"
[6,] "tobe added" "tobe added" "tobe added"
GRADEPassCompStaninePost GRADECompCompStaninePre GRADECompCompStaninePost
[1,] "tobe added" "tobe added" "tobe added"
[2,] "tobe added" "tobe added" "tobe added"
[3,] "tobe added" "tobe added" "tobe added"
[4,] "tobe added" "tobe added" "tobe added"
[5,] "tobe added" "tobe added" "tobe added"
[6,] "tobe added" "tobe added" "tobe added"
GRADETotalTStaninePre GRADETotalTStaninePost GRADEListCompStaninePre
[1,] "tobe added" "tobe added" "tobe added"
[2,] "tobe added" "tobe added" "tobe added"
[3,] "tobe added" "tobe added" "tobe added"
[4,] "tobe added" "tobe added" "tobe added"
[5,] "tobe added" "tobe added" "tobe added"
[6,] "tobe added" "tobe added" "tobe added"
GRADEListCompStaninePost
[1,] "tobe added"
[2,] "tobe added"
[3,] "tobe added"
[4,] "tobe added"
[5,] "tobe added"
[6,] "tobe added"
set.seed(-123)
Gender <- c("Male", "Female")
Grade <- c("Third Grade","Fourth Grade","Fifth Grade")
Attendance <- 1:18
Wordsread <- 16631:108885
MARSIGRSPre <- 1:5
MARSIGRSPost <- 1:5
MARSIPSSPre <- 1:5
MARSIPSSPost <- 1:5
MARSISRSPre <- 1:5
MARSISRSPost <- 1:5
MARSITotalPre <- 1:5
MARSITotalPost <- 1:5
GRADEVocStaninePre <- 2:5
GRADEVocStaninePost <- 1:5
GRADESenCompStaninePre <- 2:5
GRADESenCompStaninePost <- 2:5
GRADEPassCompStaninePre <- 2:6
GRADEPassCompStaninePost <- 1:7
GRADECompCompStaninePre <- 2:6
GRADECompCompStaninePost <- 2:6
GRADETotalTStaninePre <- 2:6
GRADETotalTStaninePost <- 2:6
GRADEListCompStaninePre <- 1:9
GRADEListCompStaninePost <- 1:9
for(i in 1:2000){
sim_data [i, 1] <- sample(Gender, size=1, replace = TRUE)
sim_data [i, 2] <- sample(Grade, size=1, replace = TRUE)
sim_data [i, 3] <- sample(Attendance, size=1, replace = TRUE)
sim_data [i, 4] <- sample(Wordsread, size=1, replace = FALSE)
sim_data [i, 5] <- sample(MARSIGRSPre, size=1, replace = TRUE)
sim_data [i, 6] <- sample(MARSIGRSPost, size=1, replace = TRUE)
sim_data [i, 7] <- sample(MARSIPSSPre, size=1, replace = TRUE)
sim_data [i, 8] <- sample(MARSIPSSPost, size=1, replace = TRUE)
sim_data [i, 9] <- sample(MARSISRSPre, size=1, replace = TRUE)
sim_data [i, 10] <- sample(MARSISRSPost, size=1, replace = TRUE)
sim_data [i, 11] <- sample(MARSITotalPre, size=1, replace = TRUE)
sim_data [i, 12] <- sample(MARSITotalPost, size=1, replace = TRUE)
sim_data [i, 13] <- sample(GRADEVocStaninePre, size=1, replace = TRUE)
sim_data [i, 14] <- sample(GRADEVocStaninePost, size=1, replace = TRUE)
sim_data [i, 15] <- sample(GRADESenCompStaninePre, size=1, replace = TRUE)
sim_data [i, 16] <- sample(GRADESenCompStaninePost, size=1, replace = TRUE)
sim_data [i, 17] <- sample(GRADEPassCompStaninePre, size=1, replace = TRUE)
sim_data [i, 18] <- sample(GRADEPassCompStaninePost, size=1, replace = TRUE)
sim_data [i, 19] <- sample(GRADECompCompStaninePre, size=1, replace = TRUE)
sim_data [i, 20] <- sample(GRADECompCompStaninePost, size=1, replace = TRUE)
sim_data [i, 21] <- sample(GRADETotalTStaninePre, size=1, replace = TRUE)
sim_data [i, 22] <- sample(GRADETotalTStaninePost, size=1, replace = TRUE)
sim_data [i, 23] <- sample(GRADEListCompStaninePre, size=1, replace = TRUE)
sim_data [i, 24] <- sample(GRADEListCompStaninePost, size=1, replace = TRUE)
}
sim_data <- as.data.frame(sim_data)
str(sim_data)
'data.frame': 2000 obs. of 24 variables:
$ Gender : chr "Female" "Male" "Female" "Female" ...
$ Grade : chr "Fourth Grade" "Fifth Grade" "Fifth Grade" "Fifth Grade" ...
$ Attendance : chr "6" "8" "6" "2" ...
$ Wordsread : chr "30036" "92332" "20499" "104772" ...
$ MARSIGRSPre : chr "5" "2" "3" "1" ...
$ MARSIGRSPost : chr "3" "3" "2" "1" ...
$ MARSIPSSPre : chr "3" "5" "5" "4" ...
$ MARSIPSSPost : chr "5" "3" "1" "3" ...
$ MARSISRSPre : chr "4" "4" "1" "3" ...
$ MARSISRSPost : chr "1" "1" "4" "5" ...
$ MARSITotalPre : chr "1" "3" "5" "1" ...
$ MARSITotalPost : chr "4" "1" "4" "5" ...
$ GRADEVocStaninePre : chr "3" "5" "4" "5" ...
$ GRADEVocStaninePost : chr "4" "2" "3" "5" ...
$ GRADESenCompStaninePre : chr "5" "5" "3" "5" ...
$ GRADESenCompStaninePost : chr "2" "4" "5" "3" ...
$ GRADEPassCompStaninePre : chr "5" "6" "4" "4" ...
$ GRADEPassCompStaninePost: chr "7" "4" "5" "3" ...
$ GRADECompCompStaninePre : chr "5" "6" "3" "5" ...
$ GRADECompCompStaninePost: chr "3" "6" "3" "3" ...
$ GRADETotalTStaninePre : chr "5" "3" "2" "4" ...
$ GRADETotalTStaninePost : chr "4" "6" "6" "6" ...
$ GRADEListCompStaninePre : chr "6" "9" "8" "2" ...
$ GRADEListCompStaninePost: chr "4" "2" "3" "9" ...
sim_data$Gender <- as.factor(sim_data$Gender)
sim_data$Grade <- as.factor(sim_data$Grade)
sim_data$Attendance <- as.numeric(sim_data$Attendance)
sim_data$Wordsread <- as.numeric(sim_data$Wordsread)
sim_data$MARSIGRSPre <- as.numeric(sim_data$MARSIGRSPre)
sim_data$MARSIGRSPost <- as.numeric(sim_data$MARSIGRSPost)
sim_data$MARSIPSSPre <- as.numeric(sim_data$MARSIPSSPre)
sim_data$MARSIPSSPost <- as.numeric(sim_data$MARSIPSSPost)
sim_data$MARSISRSPre <- as.numeric(sim_data$MARSISRSPre)
sim_data$MARSISRSPost <- as.numeric(sim_data$MARSISRSPost)
sim_data$MARSITotalPre <- as.numeric(sim_data$MARSITotalPre)
sim_data$MARSITotalPost <- as.numeric(sim_data$MARSITotalPost)
sim_data$GRADEVocStaninePre <- as.numeric(sim_data$GRADEVocStaninePre)
sim_data$GRADEVocStaninePost <- as.numeric(sim_data$GRADEVocStaninePost)
sim_data$GRADESenCompStaninePre <- as.numeric(sim_data$GRADESenCompStaninePre)
sim_data$GRADESenCompStaninePost <- as.numeric(sim_data$GRADESenCompStaninePost)
sim_data$GRADEPassCompStaninePre <- as.numeric(sim_data$GRADEPassCompStaninePre)
sim_data$GRADEPassCompStaninePost <- as.numeric(sim_data$GRADEPassCompStaninePost)
sim_data$GRADECompCompStaninePre <- as.numeric(sim_data$GRADECompCompStaninePre)
sim_data$GRADECompCompStaninePost <- as.numeric(sim_data$GRADECompCompStaninePost)
sim_data$GRADETotalTStaninePre <- as.numeric(sim_data$GRADETotalTStaninePre)
sim_data$GRADETotalTStaninePost <- as.numeric(sim_data$GRADETotalTStaninePost)
sim_data$GRADEListCompStaninePre <- as.numeric(sim_data$GRADEListCompStaninePre)
sim_data$GRADEListCompStaninePost <- as.numeric(sim_data$GRADEListCompStaninePost)
summary(sim_data)
Gender Grade Attendance Wordsread
Female:1008 Fifth Grade :661 Min. : 1.000 Min. : 16801
Male : 992 Fourth Grade:683 1st Qu.: 5.000 1st Qu.: 39588
Third Grade :656 Median : 9.000 Median : 62770
Mean : 9.406 Mean : 62656
3rd Qu.:14.000 3rd Qu.: 85280
Max. :18.000 Max. :108818
MARSIGRSPre MARSIGRSPost MARSIPSSPre MARSIPSSPost MARSISRSPre
Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.00
1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.00
Median :3.000 Median :3.000 Median :3.000 Median :3.000 Median :3.00
Mean :3.028 Mean :2.982 Mean :2.995 Mean :3.075 Mean :2.96
3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.00
Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.00
MARSISRSPost MARSITotalPre MARSITotalPost GRADEVocStaninePre
Min. :1.000 Min. :1.00 Min. :1.000 Min. :2.000
1st Qu.:2.000 1st Qu.:2.00 1st Qu.:2.000 1st Qu.:2.000
Median :3.000 Median :3.00 Median :3.000 Median :4.000
Mean :2.965 Mean :2.99 Mean :3.018 Mean :3.494
3rd Qu.:4.000 3rd Qu.:4.00 3rd Qu.:4.000 3rd Qu.:5.000
Max. :5.000 Max. :5.00 Max. :5.000 Max. :5.000
GRADEVocStaninePost GRADESenCompStaninePre GRADESenCompStaninePost
Min. :1.00 Min. :2.000 Min. :2.00
1st Qu.:2.00 1st Qu.:3.000 1st Qu.:2.00
Median :3.00 Median :4.000 Median :4.00
Mean :3.02 Mean :3.527 Mean :3.49
3rd Qu.:4.00 3rd Qu.:5.000 3rd Qu.:4.00
Max. :5.00 Max. :5.000 Max. :5.00
GRADEPassCompStaninePre GRADEPassCompStaninePost GRADECompCompStaninePre
Min. :2.000 Min. :1.000 Min. :2.000
1st Qu.:3.000 1st Qu.:2.000 1st Qu.:3.000
Median :4.000 Median :4.000 Median :4.000
Mean :3.971 Mean :4.027 Mean :3.943
3rd Qu.:5.000 3rd Qu.:6.000 3rd Qu.:5.000
Max. :6.000 Max. :7.000 Max. :6.000
GRADECompCompStaninePost GRADETotalTStaninePre GRADETotalTStaninePost
Min. :2.000 Min. :2.000 Min. :2.000
1st Qu.:3.000 1st Qu.:3.000 1st Qu.:3.000
Median :4.000 Median :4.000 Median :4.000
Mean :3.997 Mean :4.046 Mean :3.975
3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:5.000
Max. :6.000 Max. :6.000 Max. :6.000
GRADEListCompStaninePre GRADEListCompStaninePost
Min. :1.000 Min. :1.000
1st Qu.:3.000 1st Qu.:3.000
Median :5.000 Median :5.000
Mean :5.005 Mean :4.971
3rd Qu.:7.000 3rd Qu.:7.000
Max. :9.000 Max. :9.000
mixed_data <- rbind(updated_data, sim_data)
mixed_data$Wordsread <- round(mixed_data$Wordsread, digits = 0)
mixed_data$Wordsread <- round(mixed_data$Wordsread, digits = 0)
mixed_data$MARSIGRSPre <- round(mixed_data$MARSIGRSPre, digits = 1)
mixed_data$MARSIGRSPost <- round(mixed_data$MARSIGRSPost, digits = 1)
mixed_data$MARSIPSSPre <- round(mixed_data$MARSIPSSPre, digits = 1)
mixed_data$MARSIPSSPost <- round(mixed_data$MARSIPSSPost, digits = 1)
mixed_data$MARSISRSPre <- round(mixed_data$MARSISRSPre, digits = 1)
mixed_data$MARSISRSPost <- round(mixed_data$MARSISRSPost, digits = 1)
mixed_data$MARSITotalPre <- round(mixed_data$MARSITotalPre, digits = 1)
mixed_data$MARSITotalPost <- round(mixed_data$MARSITotalPost, digits = 1)
mixed_data$GRADEVocStaninePre <- round(mixed_data$GRADEVocStaninePre, digits = 1)
mixed_data$GRADEVocStaninePost <- round(mixed_data$GRADEVocStaninePost, digits = 1)
mixed_data$GRADESenCompStaninePre <- round(mixed_data$GRADESenCompStaninePre, digits = 1)
mixed_data$GRADESenCompStaninePost <- round(mixed_data$GRADESenCompStaninePost, digits = 1)
mixed_data$GRADEPassCompStaninePre <- round(mixed_data$GRADEPassCompStaninePre, digits = 1)
mixed_data$GRADEPassCompStaninePost <- round(mixed_data$GRADEPassCompStaninePost, digits = 1)
mixed_data$GRADECompCompStaninePre <- round(mixed_data$GRADECompCompStaninePre, digits = 1)
mixed_data$GRADECompCompStaninePost <- round(mixed_data$GRADECompCompStaninePost, digits = 1)
mixed_data$GRADETotalTStaninePre <- round(mixed_data$GRADETotalTStaninePre, digits = 1)
mixed_data$GRADETotalTStaninePost <- round(mixed_data$GRADETotalTStaninePost, digits = 1)
mixed_data$GRADEListCompStaninePre <- round(mixed_data$GRADEListCompStaninePre, digits = 1)
mixed_data$GRADEListCompStaninePost <- round(mixed_data$GRADEListCompStaninePost, digits = 1)
head(mixed_data, 21)
Gender Grade Attendance Wordsread MARSIGRSPre MARSIGRSPost
1 Female Third Grade 16 51858 2.4 3.2
2 Female Third Grade 18 28575 1.8 3.4
3 Female Third Grade 11 49406 3.2 3.6
4 Male Third Grade 17 38867 3.4 4.0
5 Female Third Grade 14 33160 3.0 3.2
6 Female Third Grade 15 108885 3.4 3.2
7 Female Fourth Grade 17 16631 1.6 2.6
8 Male Fourth Grade 10 40343 2.2 3.6
9 Male Fourth Grade 15 19578 3.4 5.0
10 Male Fourth Grade 18 21316 2.6 3.8
11 Female Fourth Grade 18 19662 1.2 3.4
12 Female Fifth Grade 3 40343 2.8 3.6
13 Male Fifth Grade 14 30453 2.2 4.0
14 Female Fifth Grade 17 31066 3.2 4.2
15 Female Fifth Grade 17 71353 3.6 3.6
16 Female Fifth Grade 16 48149 3.4 3.4
17 Female Fifth Grade 18 36191 3.4 3.0
18 Female Fifth Grade 5 40343 2.8 3.6
19 Male Fifth Grade 2 40343 3.6 3.6
20 Female Fourth Grade 1 40343 2.8 3.6
21 Female Fourth Grade 6 30036 5.0 3.0
MARSIPSSPre MARSIPSSPost MARSISRSPre MARSISRSPost MARSITotalPre
1 3.0 3.0 2.8 2.8 2.7
2 4.0 5.0 2.0 4.2 2.6
3 3.4 4.0 3.0 3.8 3.2
4 2.6 2.6 3.0 3.0 3.0
5 4.4 3.8 4.0 3.4 3.8
6 3.8 3.8 3.6 4.2 3.8
7 3.8 3.0 3.6 4.0 3.0
8 2.0 4.0 3.6 3.8 2.6
9 4.0 5.0 4.2 5.0 3.9
10 2.2 4.2 3.4 2.8 2.7
11 3.4 3.6 3.2 3.2 2.6
12 4.4 4.0 4.2 3.8 3.8
13 4.4 4.6 3.8 4.2 3.5
14 4.6 4.6 4.4 4.6 4.1
15 4.4 4.0 3.6 3.8 3.9
16 4.8 4.8 4.6 4.2 4.3
17 4.0 4.6 5.0 4.2 4.1
18 4.8 4.0 3.6 3.8 3.7
19 4.4 4.0 3.8 3.8 3.9
20 3.8 4.0 3.7 3.8 3.4
21 3.0 5.0 4.0 1.0 1.0
MARSITotalPost GRADEVocStaninePre GRADEVocStaninePost GRADESenCompStaninePre
1 3.0 5.0 3.0 4.0
2 4.2 4.0 4.0 4.0
3 3.8 4.0 3.8 3.0
4 3.2 2.0 1.0 2.0
5 3.5 3.0 4.0 4.0
6 3.7 5.0 3.8 4.0
7 3.2 4.0 4.0 5.0
8 3.8 4.0 3.8 3.0
9 5.0 4.0 5.0 5.0
10 3.6 3.0 4.0 5.0
11 3.4 2.0 4.0 4.0
12 3.8 4.0 3.8 5.0
13 4.3 4.0 4.0 3.0
14 4.5 3.0 4.0 2.0
15 3.8 3.0 4.0 3.0
16 4.1 5.0 5.0 5.0
17 3.9 5.0 3.0 5.0
18 3.8 4.0 3.8 4.0
19 3.8 3.8 3.8 3.9
20 3.8 3.8 3.8 3.9
21 4.0 3.0 4.0 5.0
GRADESenCompStaninePost GRADEPassCompStaninePre GRADEPassCompStaninePost
1 3.0 3.0 3.0
2 4.0 3.0 5.0
3 3.7 3.0 4.1
4 2.0 2.0 1.0
5 3.0 4.0 4.0
6 3.7 4.0 4.1
7 5.0 4.0 5.0
8 3.7 4.0 4.1
9 4.0 3.0 4.0
10 4.0 5.0 4.0
11 4.0 4.0 4.0
12 3.7 6.0 4.1
13 2.0 2.0 4.0
14 3.0 2.0 4.0
15 5.0 3.0 4.0
16 5.0 6.0 7.0
17 4.0 4.0 4.0
18 3.7 6.0 4.1
19 3.7 3.7 4.1
20 3.7 3.0 4.1
21 2.0 5.0 7.0
GRADECompCompStaninePre GRADECompCompStaninePost GRADETotalTStaninePre
1 3.0 3 4.0
2 3.0 5 4.0
3 3.0 4 3.0
4 2.0 2 2.0
5 4.0 4 3.0
6 4.0 4 5.0
7 4.0 5 4.0
8 3.0 4 4.0
9 4.0 4 4.0
10 4.0 4 4.0
11 3.0 4 3.0
12 6.0 4 5.0
13 2.0 3 3.0
14 2.0 4 2.0
15 3.0 4 3.0
16 6.0 6 6.0
17 4.0 4 5.0
18 5.0 4 5.0
19 3.6 4 3.8
20 3.6 4 3.8
21 5.0 3 5.0
GRADETotalTStaninePost GRADEListCompStaninePre GRADEListCompStaninePost
1 3 5.0 3.0
2 5 3.0 3.0
3 4 5.0 3.1
4 2 3.0 2.0
5 4 3.0 2.0
6 4 7.0 3.0
7 5 7.0 3.0
8 4 4.0 3.1
9 4 9.0 3.0
10 4 5.0 1.0
11 4 7.0 2.0
12 4 3.0 3.1
13 3 5.0 5.0
14 4 7.0 5.0
15 4 3.0 2.0
16 6 4.0 7.0
17 4 1.0 3.0
18 4 3.0 3.1
19 4 4.7 3.1
20 4 5.0 3.1
21 4 6.0 4.0
write.csv(mixed_data, "mixed_data.csv", row.names=FALSE)