R code for fitting a mixed-effects model of preposition development
in British children’s writing
## 3.3.1 Data preparation
#1-1) Load in libraries
# install them first if not already installed in your R environment: e.g., to install Hmisc, type the following command: install.packages("Hmisc")
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.1 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(Hmisc)
##
## Attaching package: 'Hmisc'
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
## The following objects are masked from 'package:base':
##
## format.pval, units
#1-2) read two CSV files in your working directory
setwd("~/R folder/tutorial_GiG_TAASSC")
#1-3) read two CSV files named 'metadata_gig.csv' and 'resultsSK.csv'
metadata <- read.csv('metadata_gig.csv')
results <- read.csv('resultsSK.csv')
#1-4) remove the '.txt' extension from the 'filename' column of the 'results'
results$filename <- gsub(' .txt', '', results$filename)
#1-5) match the 'file_id' column of the 'metadata' data frame with the 'filename' column of 'results' data frame
order <- match(metadata$file_id, results$filename)
#1-6) use indexing to match the rows in the 'results' data frame to the 'metadata' dataframe
results <- results[order,]
#1-7) create a new data frame named 'output' by combining both dataframes
results_meta <- data.frame(metadata, results)
#1-8) Rename variables
results_meta <- rename(results_meta, student = text_id)
results_meta <- rename(results_meta, year = year_group)
results_meta <- rename(results_meta, title = text_title)
results_meta <- rename(results_meta, PP = prep_nominal)
head(results_meta)
## 3.3.2 Visualize the frequency of prepositions per nominal
#2-1) Convert grouping variable to factor
results_meta$year <- factor(results_meta$year, levels = c('Year_2', 'Year_6', 'Year_9', 'Year_11'))
#2-2) visualise the normed frequencies of prepostions per nouns across school years
ggplot(results_meta, aes(year, PP, linetype = genre)) +
stat_summary(fun = 'mean', geom = 'line', aes(group = genre)) +
stat_summary(fun.data = 'mean_cl_boot', geom = 'errorbar', width = 0.2) +
labs(y = "Prepositions per Nominal", x = "Year Group")

#2-3) Save the visual to the 'outputs' folder
ggsave('output/PP_graph.jpeg')
## Saving 7 x 5 in image
#3.3.3 Fit mixed-effects model with the desired order of factor levels
#3-1) fit a mixed-effects model
results_meta.model <- lmer(PP ~ year*genre + (1 | title), data = results_meta, REML = FALSE)
#3-2) Print the model summary
summary(results_meta.model)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: PP ~ year * genre + (1 | title)
## Data: results_meta
##
## AIC BIC logLik deviance df.resid
## 28710.9 28770.4 -14345.4 28690.9 2815
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.1748 -0.1948 -0.0890 0.0129 31.5489
##
## Random effects:
## Groups Name Variance Std.Dev.
## title (Intercept) 57.46 7.58
## Residual 1464.21 38.26
## Number of obs: 2825, groups: title, 326
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 10.9623 2.8996 3.781
## yearYear_6 -5.1888 4.2399 -1.224
## yearYear_9 -7.0254 5.9557 -1.180
## yearYear_11 -6.4775 6.5134 -0.994
## genrenon_literary 4.5324 4.0061 1.131
## yearYear_6:genrenon_literary 0.8179 5.4926 0.149
## yearYear_9:genrenon_literary 4.7361 6.8407 0.692
## yearYear_11:genrenon_literary 4.4056 7.3789 0.597
##
## Correlation of Fixed Effects:
## (Intr) yrYr_6 yrYr_9 yrY_11 gnrnn_ yY_6:_ yY_9:_
## yearYear_6 -0.684
## yearYear_9 -0.487 0.333
## yearYear_11 -0.445 0.304 0.217
## gnrnn_ltrry -0.724 0.495 0.352 0.322
## yrYr_6:gnr_ 0.528 -0.772 -0.257 -0.235 -0.729
## yrYr_9:gnr_ 0.424 -0.290 -0.871 -0.189 -0.586 0.427
## yrYr_11:gn_ 0.393 -0.269 -0.191 -0.883 -0.543 0.396 0.318