library(readxl)
library(kableExtra)
knitr::opts_chunk$set(echo = T, warning=FALSE, message=F)
mydata <- read_excel("~/Dropbox (BI Norwegian Business School)/forskning/lesesenteret/bente/2022 motivation/Hefte1A-D_BM(motivasjon snudd).xlsx")
library(lavaan)
## This is lavaan 0.6-18.2090
## lavaan is FREE software! Please report any bugs.
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.3.2
## Warning: package 'tidyr' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::group_rows() masks kableExtra::group_rows()
## ✖ dplyr::lag()        masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(semPlot)
library(papeR)
## Loading required package: car
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
## 
## Loading required package: xtable
## Registered S3 method overwritten by 'papeR':
##   method    from
##   Anova.lme car 
## 
## Attaching package: 'papeR'
## 
## The following objects are masked from 'package:dplyr':
## 
##     summarise, summarize
## 
## The following object is masked from 'package:utils':
## 
##     toLatex
# handle missing

mydata[mydata < -989] <- NA
mydata <- as.data.frame(mydata)

#control
interessegenerell<-"H1a_mles1s"; mydata$interessegenerell <- mydata[, interessegenerell]
mestringgenerell <-"H1a_mles2s";  mydata$mestringgenerell <- mydata[, mestringgenerell]

#STAVING obs! removed "B3_st101s" non-informative
staving_items <- c("B3_st140s","B3_st102s","B3_st103s","B3_st160s","B3_st155s","B3_st117s","B3_st110s")         

prestaving_interesse <- "B3_m6a_1a"; mydata$prestaving_interesse <- mydata[, prestaving_interesse]
prestaving_mestring <- "B3_m14a_1a"; mydata$prestaving_mestring <- mydata[, prestaving_mestring]
poststaving_interesse <- "B3_m4a_1a"; mydata$poststaving_interesse <- mydata[, poststaving_interesse]
poststaving_mestring <- "B3_m12a_1a"; mydata$poststaving_mestring <- mydata[, poststaving_mestring]
poststaving_fortsattmestring <- "B3_m2a_1a"; mydata$poststaving_fortsattmestring <- mydata[, poststaving_fortsattmestring]
mydata$staving_sum <- rowSums(mydata[, staving_items])

allstaving <- c("staving_sum", "prestaving_mestring", "prestaving_interesse", "poststaving_mestring","poststaving_interesse", "poststaving_fortsattmestring")
#VOKABULAR
vok_items <- c("B3_vsf16s","B3_vsf08s","B3_vsf22s","B3_vsf41s","B3_vsf11s","B3_vsf13s","B3_vsf07s","B3_vsf42s")         

prevok_interesse <- "B3_m6a_1b"; mydata$prevok_interesse <- mydata[, prevok_interesse]
prevok_mestring <- "B3_m14a_1b"; mydata$prevok_mestring <- mydata[, prevok_mestring]
postvok_interesse <- "B3_m4b_1b";mydata$postvok_interesse <- mydata[, postvok_interesse]
postvok_mestring <- "B3_m12b_1b"; mydata$postvok_mestring <- mydata[, postvok_mestring]
postvok_fortsattmestring <- "B3_m2b_1b";mydata$postvok_fortsattmestring <- mydata[,postvok_fortsattmestring]
mydata$vok_sum <- rowSums(mydata[, vok_items])
allvocs <- c("vok_sum", "prevok_mestring", "prevok_interesse", "postvok_mestring","postvok_interesse", "postvok_fortsattmestring")


#SETNINGSLESING
sl_items <- c("B3_sl122s","B3_sl232s","B3_sl262s","B3_sl0201s","B3_sl271s","B3_sl0203s","B3_sl1701s","B3_sl0503s")

presl_interesse <- "B3_m6b_1b"; mydata$presl_interesse <- mydata[, presl_interesse]
presl_mestring <- "B3_m14b_1b"; mydata$presl_mestring <- mydata[, presl_mestring]
postsl_interesse <- "B3_m4a_1b"; mydata$postsl_interesse <- mydata[, postsl_interesse]
postsl_mestring <- "B3_m12a_1b";mydata$postsl_mestring <- mydata[, postsl_mestring]
postsl_fortsattmestring <- "B3_m2a_1b"; mydata$postsl_fortsattmestring <- mydata[, postsl_fortsattmestring]
mydata$sl_sum <- rowSums(mydata[, sl_items])
allsl <- c("sl_sum", "presl_mestring", "presl_interesse", "postsl_mestring","postsl_interesse", "postsl_fortsattmestring")

mydata$gender <- mydata$`Kj¯nn`

#install.packages("modelsummary")
library(modelsummary)

#concentrate on these observed variables
dd <- mydata %>% select(586:ncol(mydata))

oldnames <- colnames(dd)
newnames <- str_replace_all(oldnames, "interesse", "interest")
newnames <- str_replace_all(newnames, "generell", "_gen")
newnames <- str_replace_all(newnames, "fortsatt", "cont_")
newnames <- str_replace_all(newnames, "mestring", "master")
newnames <- str_replace_all(newnames, "staving", "spell")
newnames <- str_replace_all(newnames, "vok", "voc")
newnames <- str_replace_all(newnames, "sl", "sr")
newnames <- str_replace_all(newnames, "sum", "score")

colnames(dd) <- newnames

Directions

Njål, here are some directions:

****What we want to understand eventually is whether existing or triggered (pre-and post test) interest and/or existing self-concept and/or triggered (2 triggers- pre and post test) self-efficacy predict performance on each of the three subtests?

Wondering: should we be considering cluster analysis? This would be person-centered, and account for within person considerations.

DATA SUMMARIES

Note the large missing percentages for all variables except gender.

datasummary_skim(dd)
Unique (#) Missing (%) Mean SD Min Median Max
interest_gen 6 42 3.5 1.2 1.0 4.0 5.0
master_gen 6 34 3.9 1.0 1.0 4.0 5.0
prespell_interest 6 16 4.0 1.1 1.0 4.0 5.0
prespell_master 6 17 4.1 1.0 1.0 4.0 5.0
postspell_interest 6 17 4.0 1.3 1.0 4.0 5.0
postspell_master 6 17 4.2 1.0 1.0 5.0 5.0
postspell_cont_master 4 17 4.4 0.7 3.0 5.0 5.0
spell_score 8 21 4.8 1.5 1.0 5.0 7.0
prevoc_interest 6 16 3.8 1.4 1.0 4.0 5.0
prevoc_master 6 18 4.0 1.2 1.0 4.0 5.0
postvoc_interest 6 16 4.1 1.3 1.0 5.0 5.0
postvoc_master 6 16 4.3 1.1 1.0 5.0 5.0
postvoc_cont_master 4 17 4.4 0.8 3.0 5.0 5.0
voc_score 10 10 7.1 1.5 0.0 8.0 8.0
presr_interest 6 17 3.9 1.2 1.0 4.0 5.0
presr_master 6 18 4.0 1.1 1.0 4.0 5.0
postsr_interest 6 18 3.9 1.3 1.0 4.0 5.0
postsr_master 6 19 4.3 1.0 1.0 5.0 5.0
postsr_cont_master 4 19 4.3 0.8 3.0 5.0 5.0
sr_score 10 17 5.5 2.1 0.0 6.0 8.0
gender 2 0 1.5 0.5 1.0 1.0 2.0

by gender

datasummary_balance(~gender, dd) 
1
2
Mean Std. Dev. Mean Std. Dev.
interest_gen 3.6 1.1 3.4 1.4
master_gen 3.9 1.0 4.0 1.0
prespell_interest 4.1 1.0 4.0 1.2
prespell_master 4.1 0.9 4.2 1.0
postspell_interest 4.0 1.2 4.0 1.4
postspell_master 4.3 0.9 4.2 1.1
postspell_cont_master 4.5 0.7 4.4 0.8
spell_score 4.8 1.5 4.9 1.6
prevoc_interest 3.8 1.3 3.8 1.4
prevoc_master 4.1 1.1 4.0 1.3
postvoc_interest 4.1 1.2 4.1 1.3
postvoc_master 4.3 1.0 4.3 1.1
postvoc_cont_master 4.4 0.8 4.4 0.8
voc_score 7.1 1.3 7.0 1.7
presr_interest 3.9 1.1 3.8 1.4
presr_master 4.1 1.0 4.0 1.2
postsr_interest 4.0 1.1 3.9 1.4
postsr_master 4.2 1.0 4.3 1.0
postsr_cont_master 4.4 0.8 4.3 0.8
sr_score 5.8 1.9 5.1 2.3

correlations

datasummary_correlation(select(dd, -"gender"))
interest_gen master_gen prespell_interest prespell_master postspell_interest postspell_master postspell_cont_master spell_score prevoc_interest prevoc_master postvoc_interest postvoc_master postvoc_cont_master voc_score presr_interest presr_master postsr_interest postsr_master postsr_cont_master sr_score
interest_gen 1 . . . . . . . . . . . . . . . . . . .
master_gen .42 1 . . . . . . . . . . . . . . . . . .
prespell_interest .30 .19 1 . . . . . . . . . . . . . . . . .
prespell_master .22 .35 .35 1 . . . . . . . . . . . . . . . .
postspell_interest .23 .20 .58 .30 1 . . . . . . . . . . . . . . .
postspell_master .11 .24 .22 .47 .42 1 . . . . . . . . . . . . . .
postspell_cont_master .22 .17 .53 .28 .61 .34 1 . . . . . . . . . . . . .
spell_score .12 .13 .05 .11 .06 .15 .07 1 . . . . . . . . . . . .
prevoc_interest .32 .21 .42 .28 .48 .19 .40 .04 1 . . . . . . . . . . .
prevoc_master .26 .38 .30 .45 .34 .42 .37 .08 .60 1 . . . . . . . . . .
postvoc_interest .15 .07 .37 .16 .38 .11 .33 −.01 .64 .49 1 . . . . . . . . .
postvoc_master .15 .13 .26 .28 .25 .31 .31 .04 .46 .65 .58 1 . . . . . . . .
postvoc_cont_master .20 .05 .35 .18 .36 .11 .44 −.07 .61 .46 .73 .55 1 . . . . . . .
voc_score .01 .14 −.02 −.06 .03 .02 .08 .09 .12 .12 .19 .18 .16 1 . . . . . .
presr_interest .35 .16 .50 .30 .51 .22 .46 .02 .63 .46 .52 .42 .50 .12 1 . . . . .
presr_master .25 .33 .33 .48 .37 .39 .33 .11 .45 .56 .35 .52 .33 .06 .52 1 . . . .
postsr_interest .23 .18 .47 .26 .45 .19 .45 .09 .60 .45 .56 .41 .53 .09 .65 .45 1 . . .
postsr_master .29 .28 .27 .34 .23 .32 .27 .10 .48 .59 .38 .60 .32 .21 .39 .52 .47 1 . .
postsr_cont_master .27 .09 .35 .21 .38 .09 .45 .04 .52 .43 .47 .43 .60 .13 .61 .40 .60 .42 1 .
sr_score .11 .14 .04 .09 .08 .13 .04 .34 .21 .19 .14 .21 .13 .37 .14 .20 .17 .25 .18 1

Direction 1

Preliminary analysis to say what the range and mean level of self-concept is for the population studied, also broken out by existing interest, existing self-efficacy, and by gender

I assume: self-concept=master_gen, a single item.

By existing interest: The correlation is 0.4179609.

tmp <- dd %>% filter(!is.na(interest_gen)) %>%  group_by(interest_gen) %>% dplyr::summarise(master_gen=mean(master_gen, na.rm=T))

ggplot(tmp, aes(interest_gen, master_gen))+geom_point()+ylab("Mean value")