R code for fitting a mixed-effects model of preposition development in British children’s writing

## 3.3.1 Data preparation
# Load in libraries 
# install them first if not already installed in your R environment: e.g., to install Hmisc, type the following command: install.packages("Hmisc")
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.1     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lme4)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
library(Hmisc)
## 
## Attaching package: 'Hmisc'
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
# read two CSV files in your working directory
setwd("~/R folder/tutorial_GiG_TAASSC")

metadata <- read.csv('metadata_gig.csv')
results <- read.csv('resultsSK.csv')

results$filename <- gsub(' .txt', '', results$filename)
order <- match(metadata$file_id, results$filename)
results <- results[order,]
results_meta <- data.frame(metadata, results)
head(results_meta)
# Rename variables
results_meta <- rename(results_meta, year = year_group)
results_meta <- rename(results_meta, title = text_title)
results_meta <- rename(results_meta, PP = prep_nominal)
# Convert grouping variable to factor
results_meta$year <- factor(results_meta$year, levels = c('Year_2', 'Year_6', 'Year_9', 'Year_11'))
## 3.3.2 Visualize with the desired order of factor levels
ggplot(results_meta, aes(year, PP, linetype = genre)) +
  stat_summary(fun = 'mean', geom = 'line', aes(group = genre)) +
  stat_summary(fun.data = 'mean_cl_boot', geom = 'errorbar', width = 0.2) +
  labs(y = "Prepositions per Nominal", x = "Year Group")

# Save the visual to the 'outputs' folder
ggsave('output/PP_graph.jpeg')
## Saving 7 x 5 in image
#3.3.3 Fit mixed-effects model with the desired order of factor levels
results_meta.model <- lmer(PP ~ year + (1 | title), data = results_meta, REML = FALSE)
# Print model summary
summary(results_meta.model)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: PP ~ year + (1 | title)
##    Data: results_meta
## 
##      AIC      BIC   logLik deviance df.resid 
##  28710.8  28746.4 -14349.4  28698.8     2819 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.2305 -0.1843 -0.0982  0.0065 31.5050 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  title    (Intercept)   66.49   8.154  
##  Residual             1463.06  38.250  
## Number of obs: 2825, groups:  title, 326
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  13.2769     2.0532   6.466
## yearYear_6   -3.8327     2.7363  -1.401
## yearYear_9   -1.1544     2.7604  -0.418
## yearYear_11  -0.8614     2.8744  -0.300
## 
## Correlation of Fixed Effects:
##             (Intr) yrYr_6 yrYr_9
## yearYear_6  -0.750              
## yearYear_9  -0.744  0.558       
## yearYear_11 -0.714  0.536  0.532