library(readxl)
library(kableExtra)
knitr::opts_chunk$set(echo = T, warning=FALSE, message=F)
mydata <- read_excel("~/Dropbox (BI Norwegian Business School)/forskning/lesesenteret/bente/2022 motivation/Hefte1A-D_BM(motivasjon snudd).xlsx")
library(lavaan)

## This is lavaan 0.6-18.2090
## lavaan is FREE software! Please report any bugs.

library(tidyverse)

## Warning: package 'ggplot2' was built under R version 4.3.2

## Warning: package 'tidyr' was built under R version 4.3.2

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::group_rows() masks kableExtra::group_rows()
## ✖ dplyr::lag()        masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(semPlot)
library(papeR)

## Loading required package: car
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
## 
## Loading required package: xtable
## Registered S3 method overwritten by 'papeR':
##   method    from
##   Anova.lme car 
## 
## Attaching package: 'papeR'
## 
## The following objects are masked from 'package:dplyr':
## 
##     summarise, summarize
## 
## The following object is masked from 'package:utils':
## 
##     toLatex

# handle missing

mydata[mydata < -989] <- NA
mydata <- as.data.frame(mydata)

#control
interessegenerell<-"H1a_mles1s"; mydata$interessegenerell <- mydata[, interessegenerell]
mestringgenerell <-"H1a_mles2s";  mydata$mestringgenerell <- mydata[, mestringgenerell]

#STAVING obs! removed "B3_st101s" non-informative
staving_items <- c("B3_st140s","B3_st102s","B3_st103s","B3_st160s","B3_st155s","B3_st117s","B3_st110s")         

prestaving_interesse <- "B3_m6a_1a"; mydata$prestaving_interesse <- mydata[, prestaving_interesse]
prestaving_mestring <- "B3_m14a_1a"; mydata$prestaving_mestring <- mydata[, prestaving_mestring]
poststaving_interesse <- "B3_m4a_1a"; mydata$poststaving_interesse <- mydata[, poststaving_interesse]
poststaving_mestring <- "B3_m12a_1a"; mydata$poststaving_mestring <- mydata[, poststaving_mestring]
poststaving_fortsattinteresse <- "B3_m2a_1a"; mydata$poststaving_fortsattinteresse <- mydata[, poststaving_fortsattinteresse]
mydata$staving_sum <- rowSums(mydata[, staving_items])

allstaving <- c("staving_sum", "prestaving_mestring", "prestaving_interesse", "poststaving_mestring","poststaving_interesse", "poststaving_fortsattinteresse")
#VOKABULAR
vok_items <- c("B3_vsf16s","B3_vsf08s","B3_vsf22s","B3_vsf41s","B3_vsf11s","B3_vsf13s","B3_vsf07s","B3_vsf42s")         

prevok_interesse <- "B3_m6a_1b"; mydata$prevok_interesse <- mydata[, prevok_interesse]
prevok_mestring <- "B3_m14a_1b"; mydata$prevok_mestring <- mydata[, prevok_mestring]
postvok_interesse <- "B3_m4b_1b";mydata$postvok_interesse <- mydata[, postvok_interesse]
postvok_mestring <- "B3_m12b_1b"; mydata$postvok_mestring <- mydata[, postvok_mestring]
postvok_fortsattinteresse <- "B3_m2b_1b";mydata$postvok_fortsattinteresse <- mydata[,postvok_fortsattinteresse]
mydata$vok_sum <- rowSums(mydata[, vok_items])
allvocs <- c("vok_sum", "prevok_mestring", "prevok_interesse", "postvok_mestring","postvok_interesse", "postvok_fortsattinteresse")


#SETNINGSLESING
sl_items <- c("B3_sl122s","B3_sl232s","B3_sl262s","B3_sl0201s","B3_sl271s","B3_sl0203s","B3_sl1701s","B3_sl0503s")

presl_interesse <- "B3_m6b_1b"; mydata$presl_interesse <- mydata[, presl_interesse]
presl_mestring <- "B3_m14b_1b"; mydata$presl_mestring <- mydata[, presl_mestring]
postsl_interesse <- "B3_m4a_1b"; mydata$postsl_interesse <- mydata[, postsl_interesse]
postsl_mestring <- "B3_m12a_1b";mydata$postsl_mestring <- mydata[, postsl_mestring]
postsl_fortsattinteresse <- "B3_m2a_1b"; mydata$postsl_fortsattinteresse <- mydata[, postsl_fortsattinteresse]
mydata$sl_sum <- rowSums(mydata[, sl_items])
allsl <- c("sl_sum", "presl_mestring", "presl_interesse", "postsl_mestring","postsl_interesse", "postsl_fortsattinteresse")

mydata$gender <- mydata$`Kj¯nn`

#install.packages("modelsummary")
library(modelsummary)

#concentrate on these observed variables
dd <- mydata %>% select(586:ncol(mydata))

oldnames <- colnames(dd)
newnames <- str_replace_all(oldnames, "interesse", "interest")
newnames <- str_replace_all(newnames, "generell", "_gen")
newnames <- str_replace_all(newnames, "fortsatt", "cont_")
newnames <- str_replace_all(newnames, "mestring", "master")
newnames <- str_replace_all(newnames, "staving", "spell")
newnames <- str_replace_all(newnames, "vok", "voc")
newnames <- str_replace_all(newnames, "sl", "sr")
newnames <- str_replace_all(newnames, "sum", "score")

colnames(dd) <- newnames
knitr::opts_chunk$set(echo = F, warning=FALSE, message=F)

Directions

Njål, here are some directions:

Preliminary analysis to say what the range and mean level of self-concept is for the population studied, also broken out by existing interest, existing self-efficacy, and by gender. QUESTION: what variable is self-concept? See above code.
For each subtest, please run a correlation of interest (existing, pre-test, post test) and self-efficacy (existing, pre-test, post test), continued work w task Does it make sense to create groups of relative levels of: interest-self-efficacy within person analysis?

****What we want to understand eventually is whether existing or triggered (pre-and post test) interest and/or existing self-concept and/or triggered (2 triggers- pre and post test) self-efficacy predict performance on each of the three subtests?

Wondering: should we be considering cluster analysis? This would be person-centered, and account for within person considerations.

DATA SUMMARIES

Note the large missing percentages for all variables except gender.

	Unique (#)	Missing (%)	Mean	SD	Min	Median	Max
interest_gen	6	42	3.5	1.2	1.0	4.0	5.0
master_gen	6	34	3.9	1.0	1.0	4.0	5.0
prespell_interest	6	16	4.0	1.1	1.0	4.0	5.0
prespell_master	6	17	4.1	1.0	1.0	4.0	5.0
postspell_interest	6	17	4.0	1.3	1.0	4.0	5.0
postspell_master	6	17	4.2	1.0	1.0	5.0	5.0
postspell_cont_interest	4	17	4.4	0.7	3.0	5.0	5.0
spell_score	8	21	4.8	1.5	1.0	5.0	7.0
prevoc_interest	6	16	3.8	1.4	1.0	4.0	5.0
prevoc_master	6	18	4.0	1.2	1.0	4.0	5.0
postvoc_interest	6	16	4.1	1.3	1.0	5.0	5.0
postvoc_master	6	16	4.3	1.1	1.0	5.0	5.0
postvoc_cont_interest	4	17	4.4	0.8	3.0	5.0	5.0
voc_score	10	10	7.1	1.5	0.0	8.0	8.0
presr_interest	6	17	3.9	1.2	1.0	4.0	5.0
presr_master	6	18	4.0	1.1	1.0	4.0	5.0
postsr_interest	6	18	3.9	1.3	1.0	4.0	5.0
postsr_master	6	19	4.3	1.0	1.0	5.0	5.0
postsr_cont_interest	4	19	4.3	0.8	3.0	5.0	5.0
sr_score	10	17	5.5	2.1	0.0	6.0	8.0
gender	2	0	1.5	0.5	1.0	1.0	2.0

by gender

	1		2
	Mean	Std. Dev.	Mean	Std. Dev.
interest_gen	3.6	1.1	3.4	1.4
master_gen	3.9	1.0	4.0	1.0
prespell_interest	4.1	1.0	4.0	1.2
prespell_master	4.1	0.9	4.2	1.0
postspell_interest	4.0	1.2	4.0	1.4
postspell_master	4.3	0.9	4.2	1.1
postspell_cont_interest	4.5	0.7	4.4	0.8
spell_score	4.8	1.5	4.9	1.6
prevoc_interest	3.8	1.3	3.8	1.4
prevoc_master	4.1	1.1	4.0	1.3
postvoc_interest	4.1	1.2	4.1	1.3
postvoc_master	4.3	1.0	4.3	1.1
postvoc_cont_interest	4.4	0.8	4.4	0.8
voc_score	7.1	1.3	7.0	1.7
presr_interest	3.9	1.1	3.8	1.4
presr_master	4.1	1.0	4.0	1.2
postsr_interest	4.0	1.1	3.9	1.4
postsr_master	4.2	1.0	4.3	1.0
postsr_cont_interest	4.4	0.8	4.3	0.8
sr_score	5.8	1.9	5.1	2.3

correlations

	interest_gen	master_gen	prespell_interest	prespell_master	postspell_interest	postspell_master	postspell_cont_interest	spell_score	prevoc_interest	prevoc_master	postvoc_interest	postvoc_master	postvoc_cont_interest	voc_score	presr_interest	presr_master	postsr_interest	postsr_master	postsr_cont_interest	sr_score
interest_gen	1	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.
master_gen	.42	1	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.
prespell_interest	.30	.19	1	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.
prespell_master	.22	.35	.35	1	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.
postspell_interest	.23	.20	.58	.30	1	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.
postspell_master	.11	.24	.22	.47	.42	1	.	.	.	.	.	.	.	.	.	.	.	.	.	.
postspell_cont_interest	.22	.17	.53	.28	.61	.34	1	.	.	.	.	.	.	.	.	.	.	.	.	.
spell_score	.12	.13	.05	.11	.06	.15	.07	1	.	.	.	.	.	.	.	.	.	.	.	.
prevoc_interest	.32	.21	.42	.28	.48	.19	.40	.04	1	.	.	.	.	.	.	.	.	.	.	.
prevoc_master	.26	.38	.30	.45	.34	.42	.37	.08	.60	1	.	.	.	.	.	.	.	.	.	.
postvoc_interest	.15	.07	.37	.16	.38	.11	.33	−.01	.64	.49	1	.	.	.	.	.	.	.	.	.
postvoc_master	.15	.13	.26	.28	.25	.31	.31	.04	.46	.65	.58	1	.	.	.	.	.	.	.	.
postvoc_cont_interest	.20	.05	.35	.18	.36	.11	.44	−.07	.61	.46	.73	.55	1	.	.	.	.	.	.	.
voc_score	.01	.14	−.02	−.06	.03	.02	.08	.09	.12	.12	.19	.18	.16	1	.	.	.	.	.	.
presr_interest	.35	.16	.50	.30	.51	.22	.46	.02	.63	.46	.52	.42	.50	.12	1	.	.	.	.	.
presr_master	.25	.33	.33	.48	.37	.39	.33	.11	.45	.56	.35	.52	.33	.06	.52	1	.	.	.	.
postsr_interest	.23	.18	.47	.26	.45	.19	.45	.09	.60	.45	.56	.41	.53	.09	.65	.45	1	.	.	.
postsr_master	.29	.28	.27	.34	.23	.32	.27	.10	.48	.59	.38	.60	.32	.21	.39	.52	.47	1	.	.
postsr_cont_interest	.27	.09	.35	.21	.38	.09	.45	.04	.52	.43	.47	.43	.60	.13	.61	.40	.60	.42	1	.
sr_score	.11	.14	.04	.09	.08	.13	.04	.34	.21	.19	.14	.21	.13	.37	.14	.20	.17	.25	.18	1

Direction 1

Preliminary analysis to say what the range and mean level of self-concept is for the population studied, also broken out by existing interest, existing self-efficacy, and by gender

I assume: self-concept=master_gen, a single item.

By existing interest: The correlation is 0.4179609.

Direction 1 revised

Consider the effect of general interest on continued interest for 3 test types, separately for each gender. Remark: general interest has 42% missing! After removing the missing, there are still missing data wrt continued interest: Remark2: Continued interest only has the values 3, 4, 5!

We plot continued interest as a function of general interest, separately for each gender, and for each test:

A better graph of the same might be

Here, we dont see that the underlying groups are small so that the noise is large. A smoothed grapsh shows the uncertainty:

Small gender differences (?) so let us not separate by gender:

Note that the grey band is larger at low general interest, since few kids report low general interest:

## 
##   1   2   3   4   5 
##  37  19 107  90  87

Interest

N Foldnes

2024-08-24