R code for fitting a mixed-effects model of preposition development
in British children’s writing
## 3.3.1 Data preparation
# Load in libraries
# install them first if not already installed in your R environment: e.g., to install Hmisc, type the following command: install.packages("Hmisc")
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.1 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(Hmisc)
##
## Attaching package: 'Hmisc'
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
## The following objects are masked from 'package:base':
##
## format.pval, units
# read two CSV files in your working directory
setwd("~/R folder/tutorial_GiG_TAASSC")
metadata <- read.csv('metadata_gig.csv')
results <- read.csv('resultsSK.csv')
results$filename <- gsub(' .txt', '', results$filename)
order <- match(metadata$file_id, results$filename)
results <- results[order,]
results_meta <- data.frame(metadata, results)
head(results_meta)
# Rename variables
results_meta <- rename(results_meta, year = year_group)
results_meta <- rename(results_meta, title = text_title)
results_meta <- rename(results_meta, PP = prep_nominal)
# Convert grouping variable to factor
results_meta$year <- factor(results_meta$year, levels = c('Year_2', 'Year_6', 'Year_9', 'Year_11'))
## 3.3.2 Visualize with the desired order of factor levels
ggplot(results_meta, aes(year, PP, linetype = genre)) +
stat_summary(fun = 'mean', geom = 'line', aes(group = genre)) +
stat_summary(fun.data = 'mean_cl_boot', geom = 'errorbar', width = 0.2) +
labs(y = "Prepositions per Nominal", x = "Year Group")

# Save the visual to the 'outputs' folder
ggsave('output/PP_graph.jpeg')
## Saving 7 x 5 in image
#3.3.3 Fit mixed-effects model with the desired order of factor levels
results_meta.model <- lmer(PP ~ year + (1 | title), data = results_meta, REML = FALSE)
# Print model summary
summary(results_meta.model)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: PP ~ year + (1 | title)
## Data: results_meta
##
## AIC BIC logLik deviance df.resid
## 28710.8 28746.4 -14349.4 28698.8 2819
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.2305 -0.1843 -0.0982 0.0065 31.5050
##
## Random effects:
## Groups Name Variance Std.Dev.
## title (Intercept) 66.49 8.154
## Residual 1463.06 38.250
## Number of obs: 2825, groups: title, 326
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 13.2769 2.0532 6.466
## yearYear_6 -3.8327 2.7363 -1.401
## yearYear_9 -1.1544 2.7604 -0.418
## yearYear_11 -0.8614 2.8744 -0.300
##
## Correlation of Fixed Effects:
## (Intr) yrYr_6 yrYr_9
## yearYear_6 -0.750
## yearYear_9 -0.744 0.558
## yearYear_11 -0.714 0.536 0.532