2021 & 2022 Overton Data Manipulation

rm(list = ls())
#setwd("C:/Users/nghimire/OneDrive - The University of Texas at Tyler/Overton Data Summer 2022")
library(tidyverse) # To manipulate dataframe
library(ggplot2) # Just in case, we need to visualize something
library(readxl) # To read data from excel spreadsheet
library(haven) # To read spss data

1. Getting 2021 Data Ready

A. Loading Checking the Names of Variables

# Loading Data
data_2021 <- read_spss("Overton Summer 2021 School Data.sav")
data_2021 <- data.frame(data_2021)
# Checking the Variable Names
names(data_2021)

  [1] "LastName"                 "FirstName"               
  [3] "Gender"                   "Ethnicity"               
  [5] "Grade"                    "Teacher"                 
  [7] "Attendance"               "Wordsread"               
  [9] "MARSIGRSPre"              "MARSIGRSPost"            
 [11] "MARSIPSSPre"              "MARSIPSSPost"            
 [13] "MARSISRSPre"              "MARSISRSPost"            
 [15] "MARSITotalPre"            "MARSITotalPost"          
 [17] "GRADEWRRawPre"            "GRADEWRRawPost"          
 [19] "GRADEWRStaninePre"        "GRADEWRStaninePost"      
 [21] "GRADEWRGEPre"             "GRADEWRGEPost"           
 [23] "GRADEWRSSPre"             "GRADEWRSSPost"           
 [25] "GRADEWRNCEPre"            "GRADEWRNCEPost"          
 [27] "GRADEWRGSVPre"            "GRADEWRGSVPost"          
 [29] "GRADEVocRSPre"            "GRADEVocRSPost"          
 [31] "GRADEVocStaninePre"       "GRADEVocStaninePost"     
 [33] "GRADEVocGEPre"            "GRADEVocGEPost"          
 [35] "GRADEVocSSPre"            "GRADEVocSSPost"          
 [37] "GRADEVocNCEPre"           "GRADEVocNCEPost"         
 [39] "GRADEVocGSVPre"           "GRADEVocGSVPost"         
 [41] "GRADEVocCompRSPre"        "GRADEVocCompRSPost"      
 [43] "GRADEVocCompStaninePre"   "GRADEVocCompStaninePost" 
 [45] "GRADEVocCompGEPre"        "GRADEVocCompGEPost"      
 [47] "GRADEVocCompSSPre"        "GRADEVocCompSSPost"      
 [49] "GRADEVocCompNCEPre"       "GRADEVocCompNCEPost"     
 [51] "GRADEVocCompGSVPre"       "GRADEVocCompGSVPost"     
 [53] "GRADESenCompRSPre"        "GRADESenCompRSPost"      
 [55] "GRADESenCompStaninePre"   "GRADESenCompStaninePost" 
 [57] "GRADESenCompGEPre"        "GRADESenCompGEPost"      
 [59] "GRADESenCompSSPre"        "GRADESenCompSSPost"      
 [61] "GRADESenCompNCEPre"       "GRADESenCompNCEPost"     
 [63] "GRADESenCompGSVPre"       "GRADESenCompGSVPost"     
 [65] "GRADEPassCompRSPre"       "GRADEPassCompRSPost"     
 [67] "GRADEPassCompStaninePre"  "GRADEPassCompStaninePost"
 [69] "GRADEPassCompGEPre"       "GRADEPassCompGEPost"     
 [71] "GRADEPassCompSSPre"       "GRADEPassCompSSPost"     
 [73] "GRADEPassCompNCEPre"      "GRADEPassCompNCEPost"    
 [75] "GRADEPassCompGSVPre"      "GRADEPassCompGSVPost"    
 [77] "GRADECompCompRSPre"       "GRADECompCompRSPost"     
 [79] "GRADECompCompStaninePre"  "GRADECompCompStaninePost"
 [81] "GRADECompCompGEPre"       "GRADECompCompGEPost"     
 [83] "GRADECompCompSSPre"       "GRADECompCompSSPost"     
 [85] "GRADECompCompNCEPre"      "GRADECompCompNCEPost"    
 [87] "GRADECompCompGSVPre"      "GRADECompCompGSVPost"    
 [89] "GRADETotalTRSPre"         "GRADETotalTRSPost"       
 [91] "GRADETotalTStaninePre"    "GRADETotalTStaninePost"  
 [93] "GRADETotalTGEPre"         "GRADETotalTGEPost"       
 [95] "GRADETotalTSSPre"         "GRADETotalTSSPost"       
 [97] "GRADETotalTNCEPre"        "GRADETotalTNCEPost"      
 [99] "GRADETotalTGSVPre"        "GRADETotalTGSVPost"      
[101] "GRADEListCompRSPre"       "GRADEListCompRSPost"     
[103] "GRADEListCompStaninePre"  "GRADEListCompStaninePost"
[105] "GRADEListCompGEPre"       "GRADEListCompGEPost"     
[107] "GRADEListCompSSPre"       "GRADEListCompSSPost"     
[109] "GRADEListCompNCEPre"      "GRADEListCompNCEPost"    
[111] "GRADEListCompGSVPre"      "GRADEListCompGSVPost"

B. Subsetting Data

short_2021 <- select(data_2021, LastName, Gender, Grade, Teacher, Attendance, Wordsread, MARSITotalPre, MARSITotalPost, GRADETotalTGEPre, GRADETotalTGEPost, GRADETotalTNCEPre, GRADETotalTNCEPost, GRADETotalTGSVPre, GRADETotalTGSVPost, GRADEListCompRSPre, GRADEListCompRSPost)
# Making Sure
names(short_2021)

 [1] "LastName"            "Gender"              "Grade"              
 [4] "Teacher"             "Attendance"          "Wordsread"          
 [7] "MARSITotalPre"       "MARSITotalPost"      "GRADETotalTGEPre"   
[10] "GRADETotalTGEPost"   "GRADETotalTNCEPre"   "GRADETotalTNCEPost" 
[13] "GRADETotalTGSVPre"   "GRADETotalTGSVPost"  "GRADEListCompRSPre" 
[16] "GRADEListCompRSPost"

C. Renaming the Column Names

colnames(short_2021) <- c("student", "gender", "grade", "tech_er", "attendance", "words_read", "pre_marsi_avg", "post_marsi_avg", "pre_tt_ge", "post_tt_ge", "pre_tt_nce", "post_tt_nce", "pre_tt_gsv", "post_tt_gsv", "listening_pre", "listening_post")
#short_2021$gender <- as.factor(short_2021$gender)
#short_2021$grade <- as.factor(short_2021$grade)
#short_2021$tech_er <- as.factor(short_2021$tech_er)

str(short_2021)

'data.frame':   20 obs. of  16 variables:
 $ student       : dbl+lbl [1:20]  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,...
   ..@ label        : chr "Last Name"
   ..@ format.spss  : chr "F8.2"
   ..@ display_width: int 11
   ..@ labels       : Named num  1 2 3 4 5 6 7 8 9 10 ...
   .. ..- attr(*, "names")= chr [1:20] "Bobbitt" "Boney" "Devora" "Holland" ...
 $ gender        : dbl+lbl [1:20] 2, 2, 2, 1, 2, 2, 2, 1, 1, 1, 2, 2, 1, 2, 2, 2, 2, 2, 1, 2
   ..@ label        : chr "Gender"
   ..@ format.spss  : chr "F8.2"
   ..@ display_width: int 11
   ..@ labels       : Named num  1 2
   .. ..- attr(*, "names")= chr [1:2] "Male" "Female"
 $ grade         : dbl+lbl [1:20] 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 2
   ..@ label        : chr "Grade"
   ..@ format.spss  : chr "F8.2"
   ..@ display_width: int 15
   ..@ labels       : Named num  1 2 3
   .. ..- attr(*, "names")= chr [1:3] "Third Grade" "Fourth Grade" "Fifth Grade"
 $ tech_er       : dbl+lbl [1:20] 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 4, 4, 4, 3, 4, 3, 4, 4, 2
   ..@ label        : chr "Teacher"
   ..@ format.spss  : chr "F8.2"
   ..@ display_width: int 19
   ..@ labels       : Named num  1 2 3 4
   .. ..- attr(*, "names")= chr [1:4] "Maryanna Luna" "Ceara Oliver" "Jacie Gunter" "Christy Turner"
 $ attendance    : num  16 18 11 17 14 15 17 10 15 18 ...
  ..- attr(*, "label")= chr "Days Attended"
  ..- attr(*, "format.spss")= chr "F8.2"
  ..- attr(*, "display_width")= int 11
 $ words_read    : num  51858 28575 49406 38867 33160 ...
  ..- attr(*, "label")= chr "Words Read"
  ..- attr(*, "format.spss")= chr "F8.2"
  ..- attr(*, "display_width")= int 17
 $ pre_marsi_avg : num  2.73 2.6 3.2 3 3.8 3.8 3 2.6 3.86 2.73 ...
  ..- attr(*, "label")= chr "MARSI Composite Score Pretest"
  ..- attr(*, "format.spss")= chr "F8.2"
  ..- attr(*, "display_width")= int 15
 $ post_marsi_avg: num  3 4.2 NA 3.2 3.46 3.73 3.2 NA 5 3.6 ...
  ..- attr(*, "label")= chr "MARSI Composite Score Post"
  ..- attr(*, "format.spss")= chr "F8.2"
  ..- attr(*, "display_width")= int 19
 $ pre_tt_ge     : num  2.4 2.3 2 1.8 2.1 2.7 3.7 2.8 3.7 3.5 ...
  ..- attr(*, "label")= chr "GRADE Total Test Grade Equivalent Score Pre"
  ..- attr(*, "format.spss")= chr "F8.2"
 $ post_tt_ge    : num  2 3.1 NA 1.8 2.2 NA 3.8 NA 3.3 2.8 ...
  ..- attr(*, "label")= chr "GRADE Total Test Grade Equivalent Score Post"
  ..- attr(*, "format.spss")= chr "F8.2"
 $ pre_tt_nce    : num  39 36 28 21 32 44 37 36 37 36 ...
  ..- attr(*, "label")= chr "GRADE Total Test National Curve Equivalent Pre"
  ..- attr(*, "format.spss")= chr "F8.2"
  ..- attr(*, "display_width")= int 21
 $ post_tt_nce   : num  28 50 NA 19 35 NA 47 NA 43 36 ...
  ..- attr(*, "label")= chr "GRADE Total Test National Curve Equivalent Post"
  ..- attr(*, "format.spss")= chr "F8.2"
  ..- attr(*, "display_width")= int 21
 $ pre_tt_gsv    : num  406 401 385 375 394 415 434 417 434 430 ...
  ..- attr(*, "label")= chr "GRADE Total Test Growth Scale Value Pre"
  ..- attr(*, "format.spss")= chr "F8.2"
  ..- attr(*, "display_width")= int 22
 $ post_tt_gsv   : num  387 423 NA 372 398 NA 436 NA 428 418 ...
  ..- attr(*, "label")= chr "GRADE Total Test Growth Scale Value Post"
  ..- attr(*, "format.spss")= chr "F8.2"
  ..- attr(*, "display_width")= int 21
 $ listening_pre : num  15 13 15 12 12 16 16 14 17 15 ...
  ..- attr(*, "label")= chr "GRADE Listening Comprehension Raw Score Pre"
  ..- attr(*, "format.spss")= chr "F8.2"
  ..- attr(*, "display_width")= int 26
 $ listening_post: num  13 12 NA 11 11 12 12 NA 13 9 ...
  ..- attr(*, "label")= chr "GRADE Listening Comprehension Raw Score Post"
  ..- attr(*, "format.spss")= chr "F8.2"
  ..- attr(*, "display_width")= int 30

D. Checking the Existing Level and Labels in Some categorial Data Removing Them

# Loading Required Library
library(sjlabelled)
# Checking the Labels in the Entire Dataset
    #get_labels(short_2021[ ,1:4])
# Getting Rid of the Labels
short_2021$student <- remove_labels(short_2021$student, labels = c(1:20)) # There were 20 lebels, i.e., names of the students and getting rid of all of them
# Checking back
get_labels(short_2021$student)

NULL

E. Checking the Labels of Other Categorical Variables

get_labels(short_2021[ ,2:4])

$gender
[1] "Male"   "Female"

$grade
[1] "Third Grade"  "Fourth Grade" "Fifth Grade" 

$tech_er
[1] "Maryanna Luna"  "Ceara Oliver"   "Jacie Gunter"   "Christy Turner"

E. Replacing the Old Labels by Something New

short_2021$gender <- replace_labels(short_2021$gender, 
                                    labels = c(
                                      "Male" = "1",
                                      "Female" = "2"
                                    ))
short_2021$grade <- replace_labels(short_2021$grade, 
                                    labels = c(
                                      "Second Grade" = "1",
                                      "Third Grade" = "2",
                                      "Fourth Grade" = "3"
                                    ))
short_2021$tech_er <- replace_labels(short_2021$tech_er, 
                                    labels = c(
                                      "LM" = "1",
                                      "OC" = "2",
                                      "TC" = "3",
                                      "GJ" = "4"
                                    ))
get_labels(short_2021[ ,2:4])

$gender
[1] "Male"   "Female"

$grade
[1] "Second Grade" "Third Grade"  "Fourth Grade"

$tech_er
[1] "LM" "OC" "TC" "GJ"

F. Adding a New Column and Checking All Variables

short_2021$year <- c(0)
summary(short_2021)

    student          gender        grade        tech_er      attendance   
 Min.   : 1.00   Min.   :1.0   Min.   :1.0   Min.   :1.0   Min.   : 1.00  
 1st Qu.: 5.75   1st Qu.:1.0   1st Qu.:1.0   1st Qu.:1.0   1st Qu.:10.75  
 Median :10.50   Median :2.0   Median :2.0   Median :2.0   Median :15.50  
 Mean   :10.50   Mean   :1.7   Mean   :2.1   Mean   :2.4   Mean   :13.10  
 3rd Qu.:15.25   3rd Qu.:2.0   3rd Qu.:3.0   3rd Qu.:4.0   3rd Qu.:17.00  
 Max.   :20.00   Max.   :2.0   Max.   :3.0   Max.   :4.0   Max.   :18.00  
                                                                          
   words_read     pre_marsi_avg   post_marsi_avg    pre_tt_ge    
 Min.   : 16631   Min.   :2.600   Min.   :3.000   Min.   :1.800  
 1st Qu.: 24946   1st Qu.:2.865   1st Qu.:3.400   1st Qu.:2.450  
 Median : 33160   Median :3.730   Median :3.730   Median :2.900  
 Mean   : 40343   Mean   :3.429   Mean   :3.813   Mean   :3.261  
 3rd Qu.: 48778   3rd Qu.:3.860   3rd Qu.:4.200   3rd Qu.:3.700  
 Max.   :108885   Max.   :4.260   Max.   :5.000   Max.   :6.200  
 NA's   :5        NA's   :1       NA's   :7       NA's   :2      
   post_tt_ge      pre_tt_nce     post_tt_nce      pre_tt_gsv     post_tt_gsv 
 Min.   :1.800   Min.   :21.00   Min.   :19.00   Min.   :375.0   Min.   :372  
 1st Qu.:2.800   1st Qu.:28.00   1st Qu.:35.00   1st Qu.:407.5   1st Qu.:418  
 Median :3.300   Median :36.00   Median :37.00   Median :419.5   Median :428  
 Mean   :3.308   Mean   :35.94   Mean   :38.38   Mean   :421.1   Mean   :422  
 3rd Qu.:3.800   3rd Qu.:42.75   3rd Qu.:43.00   3rd Qu.:434.0   3rd Qu.:436  
 Max.   :6.400   Max.   :56.00   Max.   :57.00   Max.   :466.0   Max.   :467  
 NA's   :7       NA's   :2       NA's   :7       NA's   :2       NA's   :7    
 listening_pre   listening_post       year  
 Min.   : 8.00   Min.   : 9.00   Min.   :0  
 1st Qu.:13.00   1st Qu.:11.00   1st Qu.:0  
 Median :15.00   Median :12.00   Median :0  
 Mean   :14.05   Mean   :12.36   Mean   :0  
 3rd Qu.:15.50   3rd Qu.:13.00   3rd Qu.:0  
 Max.   :17.00   Max.   :16.00   Max.   :0  
 NA's   :1       NA's   :6

There’s considerable number of missing data in all cases.

2. Getting 2022 Data

A. Loading Checking the Names of Variables

# Loading Data
data_2022 <- read_spss("reading_data_2022.sav")
data_2022 <- data.frame(data_2022)
# Checking the Variable Names
names(data_2022)

 [1] "student_name"               "student"                   
 [3] "year"                       "year_coded"                
 [5] "grade"                      "gender"                    
 [7] "teacher"                    "tech_er"                   
 [9] "words_read"                 "pre_marsi_grs"             
[11] "pre_marsi_pss"              "pre_marsi_srs"             
[13] "pre_marsi_avg"              "posttest_vocab_raw"        
[15] "posttest_vocab_stanine"     "posttest_vocab_ile"        
[17] "posttest_vocab_ge"          "posttest_vocab_ss"         
[19] "posttest_vocab_nce"         "posttest_cc_raw"           
[21] "posttest_cc_stanine"        "posttest_cc_ile"           
[23] "posttest_cc_ge"             "posttest_cc_ss"            
[25] "posttest_cc_nce"            "posttest_listening_comp"   
[27] "posttest_listening_stanine" "posttest_raw_total"        
[29] "posttest_tt_stanine"        "posttest_tt_ile"           
[31] "posttest_tt_ge"             "posttest_tt_ss"            
[33] "posttest_tt_nce"            "posttest_tt_gsv"           
[35] "post_marsi_grs"             "post_marsi_pss"            
[37] "post_marsi_srs"             "post_marsi_avg"            
[39] "pretest_vocab_raw"          "pretest_vocab_stanine"     
[41] "pretest_vocab_ile"          "pretest_vocab_ge"          
[43] "pretest_vocab_ss"           "pretest_vocab_nce"         
[45] "pretest_cc_raw"             "pretest_cc_stanine"        
[47] "pretest_cc_ile"             "pretest_cc_ge"             
[49] "pretest_cc_ss"              "pretest_cc_nce"            
[51] "pretest_listening_comp"     "pretest_listening_stanine" 
[53] "pretest_raw_total"          "pretest_tt_stanine"        
[55] "pretest_tt_ile"             "pretest_tt_ge"             
[57] "pretest_tt_ss"              "pretest_tt_nce"            
[59] "pretest_tt_gsv"

B. Doing Everything Needed Like 2021 Data

# Selecting Required Columns
short_2022 <- select(data_2022, gender, student, grade, tech_er, words_read, pre_marsi_avg, pretest_tt_ge, posttest_tt_ge, pretest_tt_nce, posttest_tt_nce, pretest_tt_gsv, posttest_tt_gsv, pretest_listening_stanine, posttest_listening_stanine, year_coded)

# Renaming Columns to match with the columns in 2021 data
short_2022 <- short_2022|>
  rename(year = year_coded,
         listening_pre = pretest_listening_stanine,
         listening_post = posttest_listening_stanine,
         pre_tt_ge  = pretest_tt_ge,
         post_tt_ge  = posttest_tt_ge, 
         pre_tt_nce = pretest_tt_nce, 
         post_tt_nce = posttest_tt_nce, 
         pre_tt_gsv = pretest_tt_gsv, 
         post_tt_gsv = posttest_tt_gsv
  )

# Checking the new list of included variables
names(short_2022)

 [1] "gender"         "student"        "grade"          "tech_er"       
 [5] "words_read"     "pre_marsi_avg"  "pre_tt_ge"      "post_tt_ge"    
 [9] "pre_tt_nce"     "post_tt_nce"    "pre_tt_gsv"     "post_tt_gsv"   
[13] "listening_pre"  "listening_post" "year"

# Changing the class of some of the variables 
short_2022$grade <- as.factor(short_2022$grade)
#short_2022$gender <- as.factor(short_2022$gender)
#short_2022$tech_er <- as.factor(short_2022$tech_er)

# cheking for the labels of the selected variables
get_labels(short_2022[, 3:5])

$grade
[1] "1" "2" "3" "4"

$tech_er
[1] "."  "MC" "WM" "TC" "FJ"

$words_read
NULL

# Having a look at the data
  #short_2022

# Changing the labels by new names
short_2022$grade <- replace_labels(short_2022$grade, 
                                    labels = c(
                                      "Second Grade" = "1",
                                      "Third Grade" = "2",
                                      "Fourth Grade" = "3",
                                      "Fifth Grade" = "4"
                                    ))

#Changing a dot into an NA in gender and tech_er variables
is.na(short_2022$gender) <- short_2022$gender== "."
is.na(short_2022$tech_er) <- short_2022$tech_er == "."

# Checking if that worked
get_labels(short_2022[, 3:5])

$grade
[1] "Second Grade" "Third Grade"  "Fourth Grade" "Fifth Grade" 

$tech_er
[1] NA   "MC" "WM" "TC" "FJ"

$words_read
NULL

C. Getting 2021 & 2022 data ready to merge

# Comparing variable names
library(waldo)
print(compare(names(short_2021), names(short_2022)), n = 25)

    old              | new                           
[1] "student"        -                               
[2] "gender"         | "gender"        [1]           
                     - "student"       [2]           
[3] "grade"          | "grade"         [3]           
[4] "tech_er"        | "tech_er"       [4]           
[5] "attendance"     -                               
[6] "words_read"     | "words_read"    [5]           
[7] "pre_marsi_avg"  | "pre_marsi_avg" [6]           
[8] "post_marsi_avg" -                               
[9] "pre_tt_ge"      | "pre_tt_ge"     [7]           
... ...                ...             and 2 more ...

D. Merging 2021 and 2022 dataset using `rbind.fill()` function from the `{plyr}` package

library(plyr)
merged_data <- rbind.fill(short_2021, short_2022)
summary(merged_data)

    student         gender              grade         tech_er         
 Min.   : 1.00   Length:39          Min.   :1.000   Length:39         
 1st Qu.: 5.50   Class :character   1st Qu.:1.000   Class :character  
 Median :10.00   Mode  :character   Median :2.000   Mode  :character  
 Mean   :10.26                      Mean   :2.308                     
 3rd Qu.:15.00                      3rd Qu.:3.000                     
 Max.   :20.00                      Max.   :4.000                     
                                                                      
   attendance      words_read     pre_marsi_avg   post_marsi_avg 
 Min.   : 1.00   Min.   :  3165   Min.   :1.530   Min.   :3.000  
 1st Qu.:10.75   1st Qu.: 19620   1st Qu.:2.783   1st Qu.:3.400  
 Median :15.50   Median : 30453   Median :3.365   Median :3.730  
 Mean   :13.10   Mean   : 35590   Mean   :3.380   Mean   :3.813  
 3rd Qu.:17.00   3rd Qu.: 48778   3rd Qu.:3.877   3rd Qu.:4.200  
 Max.   :18.00   Max.   :108885   Max.   :4.870   Max.   :5.000  
 NA's   :19      NA's   :8        NA's   :3       NA's   :26     
   pre_tt_ge       post_tt_ge      pre_tt_nce     post_tt_nce   
 Min.   :0.800   Min.   :0.900   Min.   :15.00   Min.   :19.00  
 1st Qu.:2.100   1st Qu.:2.200   1st Qu.:30.50   1st Qu.:37.00  
 Median :2.750   Median :3.100   Median :36.50   Median :46.00  
 Mean   :3.126   Mean   :3.183   Mean   :40.91   Mean   :48.28  
 3rd Qu.:3.700   3rd Qu.:3.900   3rd Qu.:49.00   3rd Qu.:53.00  
 Max.   :6.700   Max.   :6.400   Max.   :88.00   Max.   :84.00  
 NA's   :5       NA's   :10      NA's   :5       NA's   :10     
   pre_tt_gsv     post_tt_gsv    listening_pre  listening_post      year       
 Min.   :314.0   Min.   :319.0   Min.   : 2.0   Min.   : 1.0   Min.   :0.0000  
 1st Qu.:394.5   1st Qu.:399.0   1st Qu.: 4.5   1st Qu.: 5.0   1st Qu.:0.0000  
 Median :416.0   Median :423.0   Median :12.0   Median : 9.0   Median :0.0000  
 Mean   :414.7   Mean   :417.2   Mean   : 9.8   Mean   : 8.6   Mean   :0.4872  
 3rd Qu.:434.0   3rd Qu.:438.0   3rd Qu.:15.0   3rd Qu.:12.0   3rd Qu.:1.0000  
 Max.   :470.0   Max.   :467.0   Max.   :17.0   Max.   :16.0   Max.   :1.0000  
 NA's   :5       NA's   :10      NA's   :4      NA's   :9

#write.csv(merged_data, "overton_merged_data.csv")

2021 & 2022 Overton Data Manipulation

K-16 Literacy Center, UT Tyler

2022-07-28

1. Getting 2021 Data Ready

A. Loading Checking the Names of Variables

B. Subsetting Data

C. Renaming the Column Names

D. Checking the Existing Level and Labels in Some categorial Data Removing Them

E. Checking the Labels of Other Categorical Variables

E. Replacing the Old Labels by Something New

F. Adding a New Column and Checking All Variables

2. Getting 2022 Data

A. Loading Checking the Names of Variables

B. Doing Everything Needed Like 2021 Data

C. Getting 2021 & 2022 data ready to merge

D. Merging 2021 and 2022 dataset using `rbind.fill()` function from the `{plyr}` package

2021 & 2022 Overton Data Manipulation

K-16 Literacy Center, UT Tyler

2022-07-28

1. Getting 2021 Data Ready

A. Loading Checking the Names of Variables

B. Subsetting Data

C. Renaming the Column Names

D. Checking the Existing Level and Labels in Some categorial Data Removing Them

E. Checking the Labels of Other Categorical Variables

E. Replacing the Old Labels by Something New

F. Adding a New Column and Checking All Variables

2. Getting 2022 Data

A. Loading Checking the Names of Variables

B. Doing Everything Needed Like 2021 Data

C. Getting 2021 & 2022 data ready to merge

D. Merging 2021 and 2022 dataset using rbind.fill() function from the {plyr} package

D. Merging 2021 and 2022 dataset using `rbind.fill()` function from the `{plyr}` package