nasa_ent.R

##############################################################################################################
##############################################################################################################
#########################                   Vocabulary Wave II                            ####################
#########################                                                                 ####################
#########################               Created by: Shally Novita                         ####################
#########################                     on 21.08.2023                               ####################
#########################                                                                #####################
##############################################################################################################
##############################################################################################################



###Load data
setwd('C:/Users/shall/OneDrive/Dokumente/Project/vocabulary')
file <- 'Data Nasa Batch II_W2E.csv'
df1 <- read.csv(file, header=TRUE, sep = ",", stringsAsFactors=FALSE)

##Delete No Participation Wave 2
df2 <- df1[df1$Wave2E!= 0, ]

#Judgment Literacy and Numeracy
df2$JN <- df2$E_Num1_OII + df2$E_Num2_OII 
df2$JL <- df2$E_Lit3_OII + df2$E_Lit4_OII
df2$HNA <- df2$A_hitung + df2$A_urut + df2$A_main + df2$A_numerik   + df2$A_hitungrima
df2$Expect <- df2$H_hitungkelompok + df2$H_hitung20 + df2$H_jumlah10 + df2$H_jumlah20 + df2$H_kurang10 + df2$H_kurang20


##Descriptive
library(dplyr)

## Warning: package 'dplyr' was built under R version 4.2.3

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(Hmisc)

## Warning: package 'Hmisc' was built under R version 4.2.3

## 
## Attaching package: 'Hmisc'

## The following objects are masked from 'package:dplyr':
## 
##     src, summarize

## The following objects are masked from 'package:base':
## 
##     format.pval, units

describe(df2$Wave2)

## df2$Wave2 
##        n  missing distinct     Info     Mean      Gmd 
##       84       29        1        0        1        0 
##              
## Value       1
## Frequency  84
## Proportion  1

describe(df2$TotEI)

## df2$TotEI 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##       84       29       30    0.997    17.61    9.958     4.15     7.30 
##      .25      .50      .75      .90      .95 
##    11.00    16.50    24.00    28.70    30.85 
## 
## lowest :  1  4  5  6  7, highest: 29 30 31 33 40

sd(df2$TotEI, na.rm=T)

## [1] 8.750928

describe(df2$TotEII)

## df2$TotEII 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##       84       29       33    0.998    19.89    9.538     8.15    10.00 
##      .25      .50      .75      .90      .95 
##    14.00    19.00    24.25    32.70    35.70 
## 
## lowest :  6  7  8  9 10, highest: 34 36 38 39 43

sd(df2$TotEII, na.rm=T)

## [1] 8.482466

describe(df2$DifE2_E1)

## df2$DifE2_E1 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##       84       29       33    0.998    2.286    9.851   -11.85    -7.70 
##      .25      .50      .75      .90      .95 
##    -2.25     2.00     8.25    12.70    15.85 
## 
## lowest : -23 -19 -14 -12 -11, highest:  15  16  17  21  22

sd(df2$DifE2_E1, na.rm =T)

## [1] 8.799742

describe(df2$HNA)

## df2$HNA 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##       83       30       19    0.995     14.7     5.45      7.0      8.2 
##      .25      .50      .75      .90      .95 
##     12.0     15.0     18.0     21.0     22.0 
##                                                                             
## Value          5     6     7     8     9    10    11    12    13    14    15
## Frequency      1     1     4     3     6     1     4     6     9     6     8
## Proportion 0.012 0.012 0.048 0.036 0.072 0.012 0.048 0.072 0.108 0.072 0.096
##                                                           
## Value         16    17    18    19    20    21    22    25
## Frequency      6     6     3     5     4     3     3     4
## Proportion 0.072 0.072 0.036 0.060 0.048 0.036 0.036 0.048
## 
## For the frequency table, variable is rounded to the nearest 0.2

sd(df2$HNA, na.rm = T)

## [1] 4.774768

describe(df2$Expect)

## df2$Expect 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##       82       31       16    0.874    20.28    5.045     9.05    13.00 
##      .25      .50      .75      .90      .95 
##    18.00    23.50    24.00    24.00    24.00 
##                                                                             
## Value       8.00  8.96  9.92 12.00 12.96 13.92 14.88 16.00 16.96 17.92 18.88
## Frequency      2     3     1     2     4     2     2     1     1     8     4
## Proportion 0.024 0.037 0.012 0.024 0.049 0.024 0.024 0.012 0.012 0.098 0.049
##                                         
## Value      20.00 20.96 21.92 22.88 24.00
## Frequency      2     2     4     3    41
## Proportion 0.024 0.024 0.049 0.037 0.500
## 
## For the frequency table, variable is rounded to the nearest 0.16

sd(df2$Expect, na.rm=T)

## [1] 4.909739

table(df2$Expect)

## 
##  8  9 10 12 13 14 15 16 17 18 19 20 21 22 23 24 
##  2  3  1  2  4  2  2  1  1  8  4  2  2  4  3 41

describe(df2$JN)

## df2$JN 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##       38       75       18    0.992    35.95    12.72    13.85    22.70 
##      .25      .50      .75      .90      .95 
##    29.25    37.00    44.75    48.30    49.00 
##                                                                             
## Value       7.00 12.59 13.88 21.62 22.91 23.77 27.64 32.80 33.66 39.68 42.69
## Frequency      1     1     1     1     2     3     1     6     3     1     4
## Proportion 0.026 0.026 0.026 0.026 0.053 0.079 0.026 0.158 0.079 0.026 0.105
##                                                     
## Value      43.98 44.84 45.70 46.99 47.85 48.71 50.00
## Frequency      4     2     1     1     2     3     1
## Proportion 0.105 0.053 0.026 0.026 0.053 0.079 0.026
## 
## For the frequency table, variable is rounded to the nearest 0.43

sd(df2$JN, na.rm = T)

## [1] 11.38028

describe(df2$JL)

## df2$JL 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##       38       75       21    0.992    168.2     68.9    33.85    53.70 
##      .25      .50      .75      .90      .95 
##   130.50   203.00   205.00   229.80   234.90 
##                                                                          
## Value       23.00  31.76  33.95  51.47  53.66 101.84 121.55 152.21 182.87
## Frequency       1      1      1      1      1      4      1      4      3
## Proportion  0.026  0.026  0.026  0.026  0.026  0.105  0.026  0.105  0.079
##                                                                   
## Value      202.58 204.77 213.53 222.29 226.67 233.24 239.81 242.00
## Frequency      10      3      1      2      1      2      1      1
## Proportion  0.263  0.079  0.026  0.053  0.026  0.053  0.026  0.026
## 
## For the frequency table, variable is rounded to the nearest 2.19

describe(df2$Umur)

## df2$Umur 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##       84       29       19    0.968    59.56    5.854    48.00    51.30 
##      .25      .50      .75      .90      .95 
##    56.75    60.00    63.25    66.00    66.00 
##                                                                             
## Value      48.00 48.95 50.85 51.99 52.94 53.89 54.84 55.98 56.93 57.88 58.83
## Frequency      6     1     2     2     1     2     1     6     4     2     1
## Proportion 0.071 0.012 0.024 0.024 0.012 0.024 0.012 0.071 0.048 0.024 0.012
##                                                           
## Value      59.97 60.92 61.87 62.82 63.96 64.91 65.86 67.00
## Frequency     25     2     1     7     4     1    13     3
## Proportion 0.298 0.024 0.012 0.083 0.048 0.012 0.155 0.036
## 
## For the frequency table, variable is rounded to the nearest 0.19

sd(df2$Umur, na.rm = T)

## [1] 5.285468

describe(df2$PenghasilanI)

## df2$PenghasilanI 
##        n  missing distinct     Info     Mean      Gmd 
##       81       32        6    0.767    1.765    1.146 
##                                               
## Value       1.00  1.96  2.98  4.00  4.96  7.00
## Frequency     49    18     6     4     2     2
## Proportion 0.605 0.222 0.074 0.049 0.025 0.025
## 
## For the frequency table, variable is rounded to the nearest 0.06

sd(df2$PenghasilanI, na.rm = T)

## [1] 1.296839

table(df2$PenghasilanI)

## 
##  1  2  3  4  5  7 
## 49 18  6  4  2  2

table(df2$JK)

## 
##  0  1 
## 38 46

#Numeracy1
df2$ent1 <- as.numeric(df2$E1) + as.numeric(df2$E2) + as.numeric(df2$E3) + as.numeric(df2$E4) + as.numeric(df2$E5)  
df2$ent2 <- df2$E6 + df2$E7 + df2$E8 + df2$E9 + df2$E10
df2$ent3 <- df2$E11 + df2$E12 + df2$E13 + df2$E14 + df2$E15  
df2$ent4 <- df2$E16 + df2$E17 + df2$E18 + df2$E19 + df2$E20 
df2$ent5 <- df2$E21 + df2$E22 + df2$E23 + df2$E24 + df2$E25
df2$ent6 <- df2$E26 + df2$E27 + df2$E28 + df2$E29 + df2$E30 
df2$ent7 <- df2$E31 + df2$E32 + df2$E33 + df2$E34 + df2$E35 
df2$ent8 <- df2$E36 + df2$E37 + df2$E38 + df2$E39 + df2$E40
df2$ent9 <- df2$E41 + df2$E42 + df2$E43 + df2$E44 + as.numeric(df2$E45) 
df2$ent <- df2$ent1 + df2$ent2 + df2$ent3 + df2$ent4 + df2$ent5 + df2$ent6 +
  df2$ent7 + df2$ent8 + df2$ent9

#Numeracy2
df2$ent1II <- df2$E1II + df2$E2II + df2$E3II + df2$E4II + df2$E5II  
df2$ent2II <- df2$E6II + df2$E7II + df2$E8II + df2$E9II + df2$E10II
df2$ent3II <- as.numeric(df2$E11II) + as.numeric(df2$E12II) + as.numeric(df2$E13II) + as.numeric(df2$E14II) + as.numeric(df2$E15II)

## Warning: NAs introduced by coercion

df2$ent4II <- df2$E16II + df2$E17II + df2$E18II + df2$E19II + df2$E20II 
df2$ent5II <- df2$E21II + df2$E22II + df2$E23II + df2$E24II + df2$E25II
df2$ent6II <- df2$E26II + df2$E27II + df2$E28II + df2$E29II + df2$E30II 
df2$ent7II <- df2$E31II + df2$E32II + df2$E33II + df2$E34II + df2$E35II 
df2$ent8II <- df2$E36II + df2$E37II + df2$E38II + df2$E39II + df2$E40II
df2$ent9II <- as.numeric(df2$E41II) + as.numeric(df2$E42II) + as.numeric(df2$E43II) + as.numeric(df2$E44II) + as.numeric(df2$E45II)

## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion

df2$entII <- df2$ent1II + df2$ent2II + df2$ent3II + df2$ent4II + df2$ent5II + df2$ent6II +
  df2$ent7II + df2$ent8II + df2$ent9II


##Descriptive
describe(df2$ent1)

## df2$ent1 
##        n  missing distinct     Info     Mean      Gmd 
##       84       29        6    0.918    3.762    1.405 
##                                               
## Value          0     1     2     3     4     5
## Frequency      1     7     5    17    22    32
## Proportion 0.012 0.083 0.060 0.202 0.262 0.381
## 
## For the frequency table, variable is rounded to the nearest 0.05

sd(df2$ent1, na.rm=T)

## [1] 1.313682

describe(df2$ent2)

## df2$ent2 
##        n  missing distinct     Info     Mean      Gmd 
##       84       29        6     0.96    1.976    1.649 
##                                               
## Value          0     1     2     3     4     5
## Frequency     15    20    21    13    10     5
## Proportion 0.179 0.238 0.250 0.155 0.119 0.060
## 
## For the frequency table, variable is rounded to the nearest 0.05

sd(df2$ent2, na.rm=T)

## [1] 1.464242

describe(df2$ent3)

## df2$ent3 
##        n  missing distinct     Info     Mean      Gmd 
##       84       29        6    0.949    2.762    1.478 
##                                               
## Value          0     1     2     3     4     5
## Frequency      3    11    25    17    20     8
## Proportion 0.036 0.131 0.298 0.202 0.238 0.095
## 
## For the frequency table, variable is rounded to the nearest 0.05

sd(df2$ent3, na.rm=T)

## [1] 1.313682

describe(df2$ent4)

## df2$ent4 
##        n  missing distinct     Info     Mean      Gmd 
##       84       29        6     0.94    1.821    1.444 
##                                               
## Value          0     1     2     3     4     5
## Frequency     12    28    21    10    12     1
## Proportion 0.143 0.333 0.250 0.119 0.143 0.012
## 
## For the frequency table, variable is rounded to the nearest 0.05

sd(df2$ent4, na.rm=T)

## [1] 1.300238

describe(df2$ent5)

## df2$ent5 
##        n  missing distinct     Info     Mean      Gmd 
##       84       29        6    0.771   0.8214    1.196 
##                                               
## Value          0     1     2     3     4     5
## Frequency     51    13     9     7     3     1
## Proportion 0.607 0.155 0.107 0.083 0.036 0.012
## 
## For the frequency table, variable is rounded to the nearest 0.05

sd(df2$ent5, na.rm=T)

## [1] 1.243399

describe(df2$ent6)

## df2$ent6 
##        n  missing distinct     Info     Mean      Gmd 
##       83       30        6    0.949    1.904    1.529 
##                                               
## Value          0     1     2     3     4     5
## Frequency     16    16    26    11    13     1
## Proportion 0.193 0.193 0.313 0.133 0.157 0.012
## 
## For the frequency table, variable is rounded to the nearest 0.05

sd(df2$ent6, na.rm=T)

## [1] 1.358034

describe(df2$ent7)

## df2$ent7 
##        n  missing distinct     Info     Mean      Gmd 
##       83       30        6    0.961     1.88    1.838 
##                                               
## Value          0     1     2     3     4     5
## Frequency     22    18    15    11    10     7
## Proportion 0.265 0.217 0.181 0.133 0.120 0.084
## 
## For the frequency table, variable is rounded to the nearest 0.05

sd(df2$ent7, na.rm=T)

## [1] 1.633473

describe(df2$ent8)

## df2$ent8 
##        n  missing distinct     Info     Mean      Gmd 
##       83       30        6    0.964    2.325    1.747 
##                                               
## Value          0     1     2     3     4     5
## Frequency     10    18    21    12    13     9
## Proportion 0.120 0.217 0.253 0.145 0.157 0.108
## 
## For the frequency table, variable is rounded to the nearest 0.05

sd(df2$ent8, na.rm=T)

## [1] 1.538993

describe(df2$ent9)

## df2$ent9 
##        n  missing distinct     Info     Mean      Gmd 
##       82       31        6     0.93    1.341    1.463 
##                                               
## Value          0     1     2     3     4     5
## Frequency     28    23    16     8     4     3
## Proportion 0.341 0.280 0.195 0.098 0.049 0.037
## 
## For the frequency table, variable is rounded to the nearest 0.05

sd(df2$ent9, na.rm=T)

## [1] 1.362818

describe(df2$ent1II)

## df2$ent1II 
##        n  missing distinct     Info     Mean      Gmd 
##       82       31        4     0.89        4   0.9606 
##                                   
## Value       2.00  2.99  3.98  5.00
## Frequency      5    17    33    27
## Proportion 0.061 0.207 0.402 0.329
## 
## For the frequency table, variable is rounded to the nearest 0.03

sd(df2$ent1II, na.rm=T)

## [1] 0.8888889

describe(df2$ent2II)

## df2$ent2II 
##        n  missing distinct     Info     Mean      Gmd 
##       84       29        6    0.949    1.726    1.569 
##                                               
## Value          0     1     2     3     4     5
## Frequency     18    26    15    14     8     3
## Proportion 0.214 0.310 0.179 0.167 0.095 0.036
## 
## For the frequency table, variable is rounded to the nearest 0.05

sd(df2$ent2II, na.rm=T)

## [1] 1.408675

describe(df2$ent3II)

## df2$ent3II 
##        n  missing distinct     Info     Mean      Gmd 
##       80       33        7    0.949    3.388    1.745 
##                                                     
## Value       0.00  0.91  1.95  2.99  3.90  4.94 13.00
## Frequency      3     6    15    14    25    16     1
## Proportion 0.038 0.075 0.188 0.175 0.312 0.200 0.013
## 
## For the frequency table, variable is rounded to the nearest 0.13

sd(df2$ent3II, na.rm=T)

## [1] 1.753793

describe(df2$ent4II)

## df2$ent4II 
##        n  missing distinct     Info     Mean      Gmd 
##       83       30        6    0.953    1.699    1.595 
##                                               
## Value          0     1     2     3     4     5
## Frequency     20    23    16    13     8     3
## Proportion 0.241 0.277 0.193 0.157 0.096 0.036
## 
## For the frequency table, variable is rounded to the nearest 0.05

sd(df2$ent4II, na.rm=T)

## [1] 1.429096

describe(df2$ent5II)

## df2$ent5II 
##        n  missing distinct     Info     Mean      Gmd 
##       82       31        6    0.905    2.098    1.338 
##                                               
## Value          0     1     2     3     4     5
## Frequency     10    11    36    14     8     3
## Proportion 0.122 0.134 0.439 0.171 0.098 0.037
## 
## For the frequency table, variable is rounded to the nearest 0.05

sd(df2$ent5II, na.rm=T)

## [1] 1.233381

describe(df2$ent6II)

## df2$ent6II 
##        n  missing distinct     Info     Mean      Gmd 
##       81       32        6    0.961    2.012     1.69 
##                                               
## Value          0     1     2     3     4     5
## Frequency     14    20    19    14     7     7
## Proportion 0.173 0.247 0.235 0.173 0.086 0.086
## 
## For the frequency table, variable is rounded to the nearest 0.05

sd(df2$ent6II, na.rm=T)

## [1] 1.50411

describe(df2$ent7II)

## df2$ent7II 
##        n  missing distinct     Info     Mean      Gmd 
##       82       31        6    0.963    2.098    1.694 
##                                               
## Value          0     1     2     3     4     5
## Frequency     13    20    17    16    10     6
## Proportion 0.159 0.244 0.207 0.195 0.122 0.073
## 
## For the frequency table, variable is rounded to the nearest 0.05

sd(df2$ent7II, na.rm=T)

## [1] 1.495753

describe(df2$ent8II)

## df2$ent8II 
##        n  missing distinct     Info     Mean      Gmd 
##       82       31        6    0.951    1.963    1.547 
##                                               
## Value          0     1     2     3     4     5
## Frequency     12    22    23    10    12     3
## Proportion 0.146 0.268 0.280 0.122 0.146 0.037
## 
## For the frequency table, variable is rounded to the nearest 0.05

sd(df2$ent8II, na.rm=T)

## [1] 1.382832

describe(df2$ent9II)

## df2$ent9II 
##        n  missing distinct     Info     Mean      Gmd 
##       75       38        6    0.895     1.24    1.541 
##                                               
## Value          0     1     2     3     4     5
## Frequency     34    15     9    10     5     2
## Proportion 0.453 0.200 0.120 0.133 0.067 0.027
## 
## For the frequency table, variable is rounded to the nearest 0.05

sd(df2$ent9II, na.rm=T)

## [1] 1.450443

t.test(df2$TotEI, df2$TotEII)

## 
##  Welch Two Sample t-test
## 
## data:  df2$TotEI and df2$TotEII
## t = -1.7189, df = 165.84, p-value = 0.0875
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -4.9111285  0.3396999
## sample estimates:
## mean of x mean of y 
##  17.60714  19.89286

t.test(df2$ent1, df2$ent1II)

## 
##  Welch Two Sample t-test
## 
## data:  df2$ent1 and df2$ent1II
## t = -1.3705, df = 146.17, p-value = 0.1726
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.5814328  0.1052424
## sample estimates:
## mean of x mean of y 
##  3.761905  4.000000

t.test(df2$ent2, df2$ent2II)

## 
##  Welch Two Sample t-test
## 
## data:  df2$ent2 and df2$ent2II
## t = 1.1277, df = 165.75, p-value = 0.2611
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.1877038  0.6877038
## sample estimates:
## mean of x mean of y 
##   1.97619   1.72619

t.test(df2$ent3, df2$ent3II)

## 
##  Welch Two Sample t-test
## 
## data:  df2$ent3 and df2$ent3II
## t = -2.5757, df = 146.24, p-value = 0.011
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.1056094 -0.1455811
## sample estimates:
## mean of x mean of y 
##  2.761905  3.387500

t.test(df2$ent4, df2$ent4II)

## 
##  Welch Two Sample t-test
## 
## data:  df2$ent4 and df2$ent4II
## t = 0.57982, df = 163.16, p-value = 0.5628
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.2949985  0.5402652
## sample estimates:
## mean of x mean of y 
##  1.821429  1.698795

t.test(df2$ent5, df2$ent5II)

## 
##  Welch Two Sample t-test
## 
## data:  df2$ent5 and df2$ent5II
## t = -6.6382, df = 163.96, p-value = 4.433e-10
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.655721 -0.896544
## sample estimates:
## mean of x mean of y 
## 0.8214286 2.0975610

t.test(df2$ent6, df2$ent6II)

## 
##  Welch Two Sample t-test
## 
## data:  df2$ent6 and df2$ent6II
## t = -0.48553, df = 159.46, p-value = 0.628
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.5510066  0.3335441
## sample estimates:
## mean of x mean of y 
##  1.903614  2.012346

t.test(df2$ent7, df2$ent7II)

## 
##  Welch Two Sample t-test
## 
## data:  df2$ent7 and df2$ent7II
## t = -0.89441, df = 162.07, p-value = 0.3724
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.6994480  0.2633622
## sample estimates:
## mean of x mean of y 
##  1.879518  2.097561

t.test(df2$ent8, df2$ent8II)

## 
##  Welch Two Sample t-test
## 
## data:  df2$ent8 and df2$ent8II
## t = 1.5892, df = 161.56, p-value = 0.114
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.08780299  0.81157613
## sample estimates:
## mean of x mean of y 
##  2.325301  1.963415

t.test(df2$ent9, df2$ent9II)

## 
##  Welch Two Sample t-test
## 
## data:  df2$ent9 and df2$ent9II
## t = 0.45061, df = 151.51, p-value = 0.6529
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.3434090  0.5463358
## sample estimates:
## mean of x mean of y 
##  1.341463  1.240000

##Internal Consistency
library(ltm)

## Warning: package 'ltm' was built under R version 4.2.3

## Loading required package: MASS

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

## Loading required package: msm

## Loading required package: polycor

library(MASS)

expect <- dplyr::select(df2, H_hitungkelompok:H_kurang20)
cronbach.alpha(expect, na.rm=T)

## 
## Cronbach's alpha for the 'expect' data-set
## 
## Items: 6
## Sample units: 113
## alpha: 0.952

activities <- dplyr::select(df2, A_hitung:A_hitungrima)
cronbach.alpha(activities, na.rm=T)

## 
## Cronbach's alpha for the 'activities' data-set
## 
## Items: 5
## Sample units: 113
## alpha: 0.817

ENI<- dplyr::select(df2, E1:E45)
cronbach.alpha(ENI, na.rm=T)

## 
## Cronbach's alpha for the 'ENI' data-set
## 
## Items: 45
## Sample units: 113
## alpha: 0.9

ENII <- dplyr::select(df2, E1II: E45II)
cronbach.alpha(ENII, na.rm=T)

## 
## Cronbach's alpha for the 'ENII' data-set
## 
## Items: 45
## Sample units: 113
## alpha: 0.877

JN <- dplyr::select(df2, E_Num1_OII, E_Num2_OII )
cronbach.alpha(JN, na.rm=T)

## 
## Cronbach's alpha for the 'JN' data-set
## 
## Items: 2
## Sample units: 113
## alpha: 0.072

JL <- dplyr::select(df2, E_Lit3_OII, E_Lit4_OII)
cronbach.alpha(JL, na.rm=T)

## 
## Cronbach's alpha for the 'JL' data-set
## 
## Items: 2
## Sample units: 113
## alpha: 0.017

##Correlation
dcor <- dplyr::select(df2, JK, Umur, TotEI, TotEII, DifE2_E1, HNA, Expect, 
                PenghasilanI, JN)

cor(dcor, method= "pearson", use='complete.obs')

##                        JK        Umur      TotEI       TotEII    DifE2_E1
## JK            1.000000000 -0.20503762 -0.1624715  0.235256223  0.38765117
## Umur         -0.205037619  1.00000000  0.2702712  0.250803276 -0.01413474
## TotEI        -0.162471463  0.27027122  1.0000000  0.471972597 -0.50017874
## TotEII        0.235256223  0.25080328  0.4719726  1.000000000  0.52733779
## DifE2_E1      0.387651166 -0.01413474 -0.5001787  0.527337788  1.00000000
## HNA          -0.047240524 -0.10382136 -0.3182458 -0.241621788  0.06938838
## Expect        0.002391285 -0.09718759 -0.1444479 -0.004537562  0.13475502
## PenghasilanI -0.054704100 -0.21867285  0.1223444 -0.200323738 -0.31466793
## JN           -0.072902758 -0.06827852  0.4371099  0.453582348  0.02424433
##                      HNA       Expect PenghasilanI           JN
## JK           -0.04724052  0.002391285 -0.054704100 -0.072902758
## Umur         -0.10382136 -0.097187588 -0.218672851 -0.068278523
## TotEI        -0.31824582 -0.144447897  0.122344424  0.437109862
## TotEII       -0.24162179 -0.004537562 -0.200323738  0.453582348
## DifE2_E1      0.06938838  0.134755021 -0.314667933  0.024244327
## HNA           1.00000000  0.432435058  0.137265662 -0.130932564
## Expect        0.43243506  1.000000000  0.031491073 -0.060679394
## PenghasilanI  0.13726566  0.031491073  1.000000000  0.006182131
## JN           -0.13093256 -0.060679394  0.006182131  1.000000000

library(psych)

## Warning: package 'psych' was built under R version 4.2.3

## 
## Attaching package: 'psych'

## The following object is masked from 'package:ltm':
## 
##     factor.scores

## The following object is masked from 'package:polycor':
## 
##     polyserial

## The following object is masked from 'package:Hmisc':
## 
##     describe

cor_sig <- corr.test(dcor)$p    
cor_sig

##                       JK        Umur        TotEI       TotEII     DifE2_E1
## JK           0.000000000 1.000000000 7.907864e-02 1.000000e+00 1.468997e-01
## Umur         0.571955879 0.000000000 1.000000e+00 1.000000e+00 1.000000e+00
## TotEI        0.002396322 0.088710931 0.000000e+00 1.381927e-04 6.530934e-06
## TotEII       0.836222767 0.193640474 4.064492e-06 0.000000e+00 8.866195e-05
## DifE2_E1     0.005065507 0.666070824 1.814148e-07 2.533199e-06 0.000000e+00
## HNA          0.084015882 0.212101150 2.402244e-01 8.375702e-01 3.304165e-01
## Expect       0.766760533 0.525138870 4.780006e-01 3.344002e-01 8.301585e-01
## PenghasilanI 0.077276622 0.002426239 5.464505e-03 7.315799e-01 1.602623e-02
## JN           0.570981247 0.544834695 6.075245e-03 4.204864e-03 8.830607e-01
##                      HNA    Expect PenghasilanI        JN
## JK           1.000000000 1.0000000   1.00000000 1.0000000
## Umur         1.000000000 1.0000000   0.07907864 1.0000000
## TotEI        1.000000000 1.0000000   0.15300613 0.1640316
## TotEII       1.000000000 1.0000000   1.00000000 0.1261459
## DifE2_E1     1.000000000 1.0000000   0.41668207 1.0000000
## HNA          0.000000000 0.1092783   1.00000000 1.0000000
## Expect       0.003525105 0.0000000   1.00000000 1.0000000
## PenghasilanI 0.361287582 0.3665283   0.00000000 1.0000000
## JN           0.598202686 0.7895655   0.97103194 0.0000000

##Analysis of Numeracy (Paper 1)
library(psych)
fE1 <- lm(DifE2_E1 ~ PenghasilanI + JK + Umur,data=df2)
summary(fE1)

## 
## Call:
## lm(formula = DifE2_E1 ~ PenghasilanI + JK + Umur, data = df2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -22.7216  -5.5361  -0.5017   6.4738  17.2800 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept)   14.3682    12.3673   1.162   0.2489  
## PenghasilanI  -1.7489     0.7878  -2.220   0.0294 *
## JK             4.6076     1.9392   2.376   0.0200 *
## Umur          -0.1954     0.1933  -1.011   0.3152  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.371 on 77 degrees of freedom
##   (32 observations deleted due to missingness)
## Multiple R-squared:  0.1547, Adjusted R-squared:  0.1218 
## F-statistic: 4.698 on 3 and 77 DF,  p-value: 0.004584

fE2 <- lm(DifE2_E1 ~ PenghasilanI + JK + Umur + HNA, data=df2)
summary(fE2)

## 
## Call:
## lm(formula = DifE2_E1 ~ PenghasilanI + JK + Umur + HNA, data = df2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -22.1247  -5.5772  -0.1479   6.6681  17.9077 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept)    7.5065    12.7884   0.587  0.55898   
## PenghasilanI  -1.8483     0.7756  -2.383  0.01970 * 
## JK             5.3727     1.9386   2.771  0.00703 **
## Umur          -0.1682     0.1906  -0.882  0.38041   
## HNA            0.3513     0.2005   1.752  0.08387 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.226 on 75 degrees of freedom
##   (33 observations deleted due to missingness)
## Multiple R-squared:  0.1983, Adjusted R-squared:  0.1555 
## F-statistic: 4.637 on 4 and 75 DF,  p-value: 0.002124

fE3 <- lm(DifE2_E1 ~ PenghasilanI + JK + Umur + HNA + Expect, data=df2)
summary(fE3)

## 
## Call:
## lm(formula = DifE2_E1 ~ PenghasilanI + JK + Umur + HNA + Expect, 
##     data = df2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -21.4864  -5.7553  -0.0935   5.9574  18.4046 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   11.3824    13.2701   0.858  0.39384   
## PenghasilanI  -1.9980     0.7893  -2.531  0.01351 * 
## JK             5.5085     1.9499   2.825  0.00609 **
## Umur          -0.1974     0.1958  -1.008  0.31658   
## HNA            0.3499     0.2170   1.612  0.11120   
## Expect        -0.1007     0.2017  -0.499  0.61924   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.259 on 73 degrees of freedom
##   (34 observations deleted due to missingness)
## Multiple R-squared:  0.2105, Adjusted R-squared:  0.1564 
## F-statistic: 3.893 on 5 and 73 DF,  p-value: 0.003492

fE4 <- lm(DifE2_E1 ~ PenghasilanI + JK + Umur + HNA + Expect + JN, data=df2)
summary(fE4)

## 
## Call:
## lm(formula = DifE2_E1 ~ PenghasilanI + JK + Umur + HNA + Expect + 
##     JN, data = df2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -17.6364  -4.1164  -0.6836   4.9039  13.2770 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  -7.68472   19.25235  -0.399   0.6926  
## PenghasilanI -3.14619    1.67054  -1.883   0.0694 .
## JK            6.76601    2.83529   2.386   0.0235 *
## Umur          0.03628    0.25409   0.143   0.8874  
## HNA           0.16162    0.30251   0.534   0.5971  
## Expect        0.21134    0.33535   0.630   0.5333  
## JN            0.05639    0.11968   0.471   0.6409  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.087 on 30 degrees of freedom
##   (76 observations deleted due to missingness)
## Multiple R-squared:  0.2683, Adjusted R-squared:  0.1219 
## F-statistic: 1.833 on 6 and 30 DF,  p-value: 0.1261

nasa_ent.R

shall

2023-11-29