Prepare:

RQ1: To what extent (if any) do raw scores on standards-based CFAs predict performance on corresponding standards-based questions on the NC Check-In 2 (NCCI) and, ultimately, standards mastery?

Wrangle:

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.1.1     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dplyr)
library(readr)
CFAData <- read_csv("/cloud/project/5Clean_Data_CFAs - Math (5).csv")
## New names:
## * `` -> ...1
## Rows: 66 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (5): ...1, NBT1&2, NBT4, NBT5, NBT7
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
CheckInData <- X5Fourth_Grade_Dataset_NCCheckIn2_Mathematics_copy_3 <- read_csv("5Fourth Grade Dataset_NCCheckIn2_Mathematics copy 3.csv")
## New names:
## * `` -> ...2
## * `` -> ...3
## Rows: 66 Columns: 28
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (25): Item_3, Item_15, Item_17, Item_20, Item_24, Item_2, Item_7, Item_1...
## dbl  (1): Student
## lgl  (2): ...2, ...3
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
CheckInData <- subset(CheckInData, select=c("Item_3", "Item_15", "Item_17", "Item_20",
                              "Item_24", "Item_2", "Item_7", "Item_10",
                              "Item_14", "Item_22", "Item_1", "Item_8",
                              "Item_11", "Item_12", "Item_23", "Item_4",
                              "Item_6", "Item_13", "Item_16", "Item_19"))

CorrectAnswers <- c("D", "B", "C", "D", "C", "A", "D", "A", "D", "A", "A", "D", 
                    "C", "B", "D", "D", "B", "C", "D", "B")
NCCheckIn_Is_Correct <- sweep(CheckInData, 2, CorrectAnswers, "==")
NBT.2CheckIn <- (rowSums(NCCheckIn_Is_Correct[,c("Item_3", "Item_15", 
                                                    "Item_17", "Item_20", 
                                                    "Item_24")], 
                         na.rm=TRUE)/5) * 100
NBT.4CheckIn <- (rowSums(NCCheckIn_Is_Correct[,c("Item_2", "Item_7", "Item_10", 
                                                 "Item_14", "Item_22")], 
                         na.rm=TRUE)/5) * 100
NBT.5CheckIn <- (rowSums(NCCheckIn_Is_Correct[,c("Item_1", "Item_8", "Item_11", 
                                                 "Item_12", "Item_23")], 
                         na.rm=TRUE)/5) * 100
NBT.7CheckIn <- (rowSums(NCCheckIn_Is_Correct[,c("Item_4", "Item_6", "Item_13", 
                                                 "Item_16", "Item_19")], 
                         na.rm=TRUE)/5) * 100
NBT.4CheckIn <- data.frame(NBT.4CheckIn)
 NBT.4CheckIn <- mutate(NBT.4CheckIn, ID = 1:66)
 
NBT.7CheckIn <- data.frame(NBT.7CheckIn)
 NBT.7CheckIn <- mutate(NBT.7CheckIn, ID = 1:66)
 
NBT.2CheckIn <- data.frame(NBT.2CheckIn)
 NBT.2CheckIn <- mutate(NBT.2CheckIn, ID = 1:66)
 
NBT.5CheckIn <- data.frame(NBT.5CheckIn)
 NBT.5CheckIn <- mutate(NBT.5CheckIn, ID = 1:66)
 CFA_NBT.5 <- select(CFAData, "NBT5")
  CombinedNBT.5 <- bind_cols(CFA_NBT.5, NBT.5CheckIn)
  CombinedNBT.5
## # A tibble: 66 × 3
##     NBT5 NBT.5CheckIn    ID
##    <dbl>        <dbl> <int>
##  1    90           40     1
##  2   100          100     2
##  3    90          100     3
##  4    60            0     4
##  5   100           80     5
##  6   100          100     6
##  7    80           40     7
##  8   100           80     8
##  9   100           80     9
## 10   100           60    10
## # … with 56 more rows
CFA_NBT.2 <- select(CFAData, "NBT1&2")
  CombinedNBT.2 <- bind_cols(CFA_NBT.2, NBT.2CheckIn)
  CombinedNBT.2
## # A tibble: 66 × 3
##    `NBT1&2` NBT.2CheckIn    ID
##       <dbl>        <dbl> <int>
##  1       90           20     1
##  2      100           80     2
##  3      100           60     3
##  4       NA            0     4
##  5       80           60     5
##  6       90          100     6
##  7       NA           40     7
##  8       60           60     8
##  9      100          100     9
## 10       80           40    10
## # … with 56 more rows
CFA_NBT.4 <- select(CFAData, "NBT4")
  CombinedNBT.4 <- bind_cols(CFA_NBT.4, NBT.4CheckIn)
  CombinedNBT.4
## # A tibble: 66 × 3
##     NBT4 NBT.4CheckIn    ID
##    <dbl>        <dbl> <int>
##  1    90           60     1
##  2   100          100     2
##  3   100           60     3
##  4    50           40     4
##  5    90           80     5
##  6   100          100     6
##  7    60           20     7
##  8   100           60     8
##  9    90          100     9
## 10   100          100    10
## # … with 56 more rows
CFA_NBT.7 <- select(CFAData, "NBT7")
 CombinedNBT.7 <- bind_cols(CFA_NBT.7, NBT.7CheckIn)
 CombinedNBT.7
## # A tibble: 66 × 3
##     NBT7 NBT.7CheckIn    ID
##    <dbl>        <dbl> <int>
##  1    80           40     1
##  2    90           60     2
##  3    90           40     3
##  4    NA           40     4
##  5    80           60     5
##  6    80           40     6
##  7    90           40     7
##  8   100           80     8
##  9    80           60     9
## 10    80           20    10
## # … with 56 more rows
MegaData <- bind_cols(CombinedNBT.2, CombinedNBT.4, CombinedNBT.5, CombinedNBT.7)
## New names:
## * ID -> ID...3
## * ID -> ID...6
## * ID -> ID...9
## * ID -> ID...12
MegaData <- rename(MegaData, "NBT2" = "NBT1&2")
MegaData2 <- select(MegaData, "NBT2", "NBT.2CheckIn", "ID...3", "NBT4", "NBT.4CheckIn", "NBT5", "NBT.5CheckIn", "NBT7", "NBT.7CheckIn")
trial <- select(MegaData2, "ID...3", "NBT2", "NBT.2CheckIn")
trial <- mutate(trial, standard = "NBT2")
trial <- rename(trial, "CFA" = "NBT2")
trial <- rename(trial, "CheckIn" = "NBT.2CheckIn")
trial4 <- select(MegaData2, "ID...3", "NBT4", "NBT.4CheckIn")
trial4 <- mutate(trial4, standard = "NBT4")
trial4 <- rename(trial4, "CFA" = "NBT4")
trial4 <- rename(trial4, "CheckIn" = "NBT.4CheckIn")
trial5 <- select(MegaData2, "ID...3", "NBT5", "NBT.5CheckIn")
trial5 <- mutate(trial5, standard = "NBT5")
trial5 <- rename(trial5, "CFA" = "NBT5")
trial5 <- rename(trial5, CheckIn = NBT.5CheckIn)
trial7 <- select(MegaData2, "ID...3", "NBT7", "NBT.7CheckIn")
trial7 <- mutate(trial7, standard = "NBT7")
trial7 <- rename(trial7, "CFA" = "NBT7")
trial7 <- rename(trial7, "CheckIn" = "NBT.7CheckIn")
VerticalDataFrame <- bind_rows(trial, trial4, trial5, trial7)
## New names:
## * ID...3 -> ID
## New names:
## * ID...3 -> ID
## New names:
## * ID...3 -> ID
## New names:
## * ID...3 -> ID

Explore and Model:

MegaData %>% 
     ggplot() + 
     geom_smooth(mapping = aes(x = NBT2, y = NBT.2CheckIn), method = lm) +
  labs(x = "CFA NBT2", y = "Check-In NBT2", title = "Ability of CFA Data to Predict Student Performance on NC Check-Ins for Standard NBT2")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 5 rows containing non-finite values (stat_smooth).

MegaData %>% 
     ggplot() + 
     geom_smooth(mapping = aes(x = NBT4, y = NBT.4CheckIn), method = lm) +
  labs(x = "CFA NBT4", y = "Check-In NBT4", title = "Ability of CFA Data to Predict Student Performance on NC Check-Ins for Standard NBT4")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 7 rows containing non-finite values (stat_smooth).

MegaData %>% 
     ggplot() + 
     geom_smooth(mapping = aes(x = NBT5, y = NBT.5CheckIn), method = lm) +
  labs(x = "CFA NBT5", y = "Check-In NBT5", title = "Ability of CFA Data to Predict Student Performance on NC Check-Ins for Standard NBT5")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).

MegaData %>% 
     ggplot() + 
     geom_smooth(mapping = aes(x = NBT7, y = NBT.7CheckIn), method = lm) +
  labs(x = "CFA NBT7", y = "Check-In NBT7", title = "Ability of CFA Data to Predict Student Performance on NC Check-Ins for Standard NBT7")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 5 rows containing non-finite values (stat_smooth).

VerticalDataFrame %>% 
     ggplot() + 
     geom_smooth(mapping = aes(x = CFA, y = CheckIn), color = "blue", method = lm) + labs(x = "CFA Data", y = "Check-In Data", title = "Ability of CFA Data to Predict Student Performance on NC Check-Ins")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 20 rows containing non-finite values (stat_smooth).

Communicate: