<a https://docs.google.com/document/d/17bGg1gwmfbiygIhWI45n0N1ECs3JGxNJJ-OoJVU4y0Q/edit?usp=sharing
Read in Vocabulary Data
# Load necessary libraries
#Read in data
Osborne <- read.table("osbornesuddick.txt", header = TRUE)
long <- reshape(Osborne,
varying = c("wisc_1", "wisc_2", "wisc_3", "wisc_4"),
v.names = "score",
timevar = "time",
times = c(0, 1, 2, 3),
new.row.names = 1:1000,
direction = "long")
long.sort <- long[order(long$subno),]
long.sort[1:10,]
## subno moth_ed time score id
## 1 1 9.5 0 22.13 1
## 205 1 9.5 1 24.97 1
## 409 1 9.5 2 41.76 1
## 613 1 9.5 3 49.91 1
## 2 2 5.5 0 9.17 2
## 206 2 5.5 1 13.91 2
## 410 2 5.5 2 20.10 2
## 614 2 5.5 3 39.10 2
## 3 3 14.0 0 30.03 3
## 207 3 14.0 1 39.27 3
library(lattice)
It is helpful to look at the patterns of individual data points across all measurement occasions to gain insight into the patterns of growth in the data. Unfortunately, for large datasets, as you can see below, the resulting plot just looks like a solid mass. The Galton squeeze diagram (Campbell & Kenny (1999), p. 163) sorts the data by values of a measurement occasion and calculates subgroup means for the data. This can be done for any measurement occasion or even for the average across all measurement occasions.
Spaghetti plots are useful for determining - if regression to the mean is responsible for longitudinal differences, - if a different functional form of change exists for higher versus lower-scoring individuals relative to a measurement occasion,
Forward squeeze (pre → post) and reverse squeeze (post → pre) can be used to emphasize that RTM is bidirectional when variables are imperfectly correlated.
Squeeze plots are also a diagnostic in quasi-experiments/program evaluation. When groups are selected for extreme pretest scores (or via matching), squeeze diagrams help distinguish artifactual “improvement” from real treatment effects.
long <- reshape(Osborne,
varying = c("wisc_1", "wisc_2", "wisc_3", "wisc_4"),
v.names = "score",
timevar = "time",
times = c(0, 1, 2, 3),
new.row.names = 1:1000,
direction = "long")
long.sort <- long[order(long$subno),]
long.sort[1:10,]
## subno moth_ed time score id
## 1 1 9.5 0 22.13 1
## 205 1 9.5 1 24.97 1
## 409 1 9.5 2 41.76 1
## 613 1 9.5 3 49.91 1
## 2 2 5.5 0 9.17 2
## 206 2 5.5 1 13.91 2
## 410 2 5.5 2 20.10 2
## 614 2 5.5 3 39.10 2
## 3 3 14.0 0 30.03 3
## 207 3 14.0 1 39.27 3
long <- reshape(Osborne,
varying = c("wisc_1", "wisc_2", "wisc_3", "wisc_4"),
v.names = "score",
timevar = "time",
times = c(0, 1, 2, 3),
new.row.names = 1:1000,
direction = "long")
long.sort <- long[order(long$subno),]
long.sort[1:10,]
## subno moth_ed time score id
## 1 1 9.5 0 22.13 1
## 205 1 9.5 1 24.97 1
## 409 1 9.5 2 41.76 1
## 613 1 9.5 3 49.91 1
## 2 2 5.5 0 9.17 2
## 206 2 5.5 1 13.91 2
## 410 2 5.5 2 20.10 2
## 614 2 5.5 3 39.10 2
## 3 3 14.0 0 30.03 3
## 207 3 14.0 1 39.27 3
#Plots can be done in either lattice or ggplot
#Here's lattice
library(lattice)
xyplot(score ~ time, groups = subno,
data = long.sort,
type = "l" ,xlab="Time",ylab="Score",
main = "Lattice Spaghetti Plot",
scales =list(x=list(at=c(0,1,2,4,6))))
#Here's ggplot
library(ggplot2)
library(svglite)
library(cowplot)
ggplot(long, aes(x = time, y = score, group = subno)) +
geom_line() +
geom_smooth(method = lm, se = FALSE, formula = y ~ x) +
background_grid(major = c("xy"),
minor = c("none"),
size.major = 0.2,
colour.major = "grey90",
colour.minor = "black98") +
labs(
title = "Ggplot Spaghetti Plot of Scores Over Time",
subtitle = "Individual trajectories",
x = "Time",
y = "Score"
)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
# helper: quintile-mean spaghetti plot with SD error bars + horizontal jitter
make_group_mean_plot <- function(data, sort_by, n_groups = 5,
times = c(0, 1, 2, 3),
jitter_width = 0.06) {
stopifnot(sort_by %in% c("wisc_1", "wisc_2", "wisc_3", "wisc_4"))
# Put participants into rank groups (quintiles) by the chosen occasion
dat_grp <- data %>% mutate(.grp = ntile(.data[[sort_by]], n_groups))
# Long + group summary stats at each time: mean and SD
long_stats <- dat_grp %>%
pivot_longer(starts_with("wisc_"), names_to = "occasion", values_to = "score") %>%
mutate(time = recode(occasion,
wisc_1 = times[1],
wisc_2 = times[2],
wisc_3 = times[3],
wisc_4 = times[4])) %>%
group_by(.grp, time) %>%
summarise(
mean_score = mean(score, na.rm = TRUE),
sd_score = sd(score, na.rm = TRUE),
n = sum(!is.na(score)),
.groups = "drop"
)
# Horizontal jitter per group (keeps points, bars, and lines aligned and off-set in the graph)
offsets <- seq(-(n_groups - 1) / 2, (n_groups - 1) / 2, length.out = n_groups) * jitter_width
long_stats <- long_stats %>% mutate(x_jit = time + offsets[.grp])
title_txt <- paste0("Spaghetti of Group Means +/- SD (sorted by ", sort_by, ")")
ggplot(long_stats, aes(x = x_jit, y = mean_score, group = .grp, color = factor(.grp))) +
geom_line(linewidth = 1, alpha = 0.9) +
geom_point(size = 2) +
geom_errorbar(aes(ymin = mean_score - sd_score,
ymax = mean_score + sd_score),
width = 0.06) +
scale_x_continuous(breaks = times, labels = times) +
labs(x = "Time", y = "Mean score",
color = paste("Quintile of", sort_by),
title = title_txt) +
theme_minimal(base_size = 13) +
theme(legend.position = "right")
}
# (1) Sorted by first occasion (wisc_1)
plot_by_w1 <- make_group_mean_plot(Osborne, sort_by = "wisc_1")
print(plot_by_w1)
# (2) Sorted by last occasion (wisc_4)
plot_by_w4 <- make_group_mean_plot(Osborne, sort_by = "wisc_4")
print(plot_by_w4)
# Renaming the variables for simplicity's sake in the graphs
Osborne <- Osborne %>%
rename(
t1 = wisc_1,
t2 = wisc_2,
t3 = wisc_3,
t4 = wisc_4
)
library(lavaan)
## This is lavaan 0.6-19
## lavaan is FREE software! Please report any bugs.
library(ggplot2)
library(dplyr)
library(tidyr)
library(cowplot)
# --- Fit your model
FCSI <- '
i =~ 1*t1 + 1*t2 + 1*t3 + 1*t4
s =~ NA*t1 + L1*t1 + L2*t2 + L3*t3 + L4*t4
s ~~ 1*s
i ~~ var_i*i
i ~~ 0*s
t1 ~~ var1*t1
t2 ~~ var2*t2
t3 ~~ var3*t3
t4 ~~ var4*t4
t1 ~ 0*1
t2 ~ 0*1
t3 ~ 0*1
t4 ~ 0*1
i ~ 1
s ~ 1
'
FCSIfit <- lavaan(FCSI, data = Osborne)
# --- Get factor scores
fscores <- lavPredict(FCSIfit) %>% as.data.frame() # columns: i, s
# Merge with observed data
dat <- cbind(Osborne, fscores)
# --- Long version for plotting
long_dat <- dat %>%
select(i, s, t1, t2, t3, t4) %>%
pivot_longer(cols = starts_with("t"),
names_to = "occasion", values_to = "score")
# --- Plot set 1: Intercept factor vs observed
p_intercept <- ggplot(long_dat, aes(x = i, y = score, color = occasion)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", se = FALSE, size = 1.2) +
facet_wrap(~ occasion, scales = "free_y") +
labs(title = "Observed Scores Predicted by Intercept Factor",
x = "Intercept Factor Score", y = "Observed Score") +
theme_minimal(base_size = 13) +
theme(legend.position = "none")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# --- Plot set 2: Slope factor vs observed
p_slope <- ggplot(long_dat, aes(x = s, y = score, color = occasion)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", se = FALSE, size = 1.2) +
facet_wrap(~ occasion, scales = "free_y") +
labs(title = "Observed Scores Predicted by Slope Factor",
x = "Slope Factor Score", y = "Observed Score") +
theme_minimal(base_size = 13) +
theme(legend.position = "none")
# Show them
print(p_intercept)
## `geom_smooth()` using formula = 'y ~ x'
print(p_slope)
## `geom_smooth()` using formula = 'y ~ x'
library(lavaan)
FCSI <- '
i =~ 1*t1 + 1*t2 + 1*t3 + 1*t4
s =~ NA*t1 + L1*t1 + L2*t2 + L3*t3 + L4*t4
# name the slope variance for influence tracking
s ~~ 1*s
i ~~ var_i*i
i ~~ 0*s
t1 ~~ var1*t1
t2 ~~ var2*t2
t3 ~~ var3*t3
t4 ~~ var4*t4
t1 ~ 0*1
t2 ~ 0*1
t3 ~ 0*1
t4 ~ 0*1
i ~ 1
s ~ 1
'
FCSIfit <- lavaan(FCSI, data=Osborne)
summary(FCSIfit, standardized=TRUE)
## lavaan 0.6-19 ended normally after 66 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 11
##
## Number of observations 204
##
## Model Test User Model:
##
## Test statistic 7.713
## Degrees of freedom 3
## P-value (Chi-square) 0.052
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## i =~
## t1 1.000 4.328 0.686
## t2 1.000 4.328 0.610
## t3 1.000 4.328 0.536
## t4 1.000 4.328 0.432
## s =~
## t1 (L1) 3.591 0.749 4.796 0.000 3.591 0.569
## t2 (L2) 4.821 0.694 6.950 0.000 4.821 0.680
## t3 (L3) 6.300 0.661 9.531 0.000 6.300 0.780
## t4 (L4) 8.108 0.676 11.996 0.000 8.108 0.809
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## i ~~
## s 0.000 0.000 0.000
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .t1 0.000 0.000 0.000
## .t2 0.000 0.000 0.000
## .t3 0.000 0.000 0.000
## .t4 0.000 0.000 0.000
## i -3.952 6.384 -0.619 0.536 -0.913 -0.913
## s 6.332 0.718 8.817 0.000 6.332 6.332
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## s 1.000 1.000 1.000
## i (var_) 18.731 6.179 3.031 0.002 1.000 1.000
## .t1 (var1) 8.211 1.424 5.767 0.000 8.211 0.206
## .t2 (var2) 8.348 1.103 7.567 0.000 8.348 0.166
## .t3 (var3) 6.896 1.154 5.976 0.000 6.896 0.106
## .t4 (var4) 15.863 2.675 5.930 0.000 15.863 0.158
library(influence.SEM)
## Warning: package 'influence.SEM' was built under R version 4.5.1
fitres <- sem.fitres(FCSIfit)
head(fitres)
## t1 t2 t3 t4 hat.t1 hat.t2 hat.t3 hat.t4 e.t1
## 1 22.13 24.97 41.76 49.91 20.897858 29.07450 38.90908 50.92114 1.2321417
## 2 9.17 13.91 20.10 39.10 9.592033 15.74597 23.14771 32.18827 -0.4220331
## 3 30.03 39.27 40.65 63.95 26.021901 35.09202 46.00124 59.32588 4.0080990
## 4 27.93 29.03 44.06 53.19 23.502709 32.12785 42.50185 55.17278 4.4272910
## 5 27.93 41.12 53.27 67.59 30.101441 39.94185 51.77754 66.23379 -2.1714413
## 6 12.25 17.95 32.50 39.51 14.349934 21.36129 29.79430 40.09448 -2.0999336
## e.t2 e.t3 e.t4
## 1 -4.104503 2.850922 -1.0111450
## 2 -1.835971 -3.047706 6.9117265
## 3 4.177978 -5.351237 4.6241171
## 4 -3.097846 1.558150 -1.9827840
## 5 1.178151 1.492463 1.3562123
## 6 -3.411292 2.705698 -0.5844812
library(ggplot2)
library(cowplot)
plots <- list()
for (var in c("t1", "t2", "t3", "t4")) {
predicted <- paste0("hat.", var)
p <- ggplot(fitres, aes_string(x = predicted, y = var)) +
geom_point(alpha = 0.6, color = "black") +
geom_smooth(method = "loess", se = FALSE, color = "black", size = 1.2, span = 0.5) +
labs(
title = paste("Predicted vs Actual", var),
x = paste("Predicted", var),
y = paste("Actual", var)
) +
theme_minimal(base_size = 13) +
theme(plot.title = element_text(hjust = 0.5))
plots[[var]] <- p
}
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Arrange the 4 plots into a 2x2 grid
combined <- plot_grid(
plots$t1, plots$t2,
plots$t3, plots$t4,
labels = c("A", "B", "C", "D"), # optional subplot labels
ncol = 2
)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
print(combined)
library(influence.SEM)
fitres <- sem.fitres(FCSIfit)
#Delta Chi
Dchi <- Deltachi(FCSI, data=Osborne)
head(Dchi)
## [1] -0.005102748 -1.534880394 1.439814358 -0.207210189 -0.258932661
## [6] 0.534237557
Dchi_df <- as.data.frame(Dchi)
Dchi_df <- Dchi_df %>%
mutate(Obs = row_number())
library(ggplot2)
ggplot(Dchi_df, aes(x = Obs, y = Dchi)) +
geom_point() +
geom_line() +
labs(x = "Observation", y = "Delta Chi Value") +
scale_y_continuous(breaks = seq(from = -1.6, to = 1.6, by = 0.2)) +
labs(title = "Figure 17.3 Delta Chi by Observation") +
theme_minimal()
# Cases with largest impact on chi-square
sort(Dchi, decreasing=TRUE)[1:5]
## [1] 1.6448782 1.4398144 1.0072240 0.9012845 0.8212425
#Likelihood distance
LD <- Likedist(FCSI, data=Osborne)
head(LD)
## [1] -0.04736678 0.10404429 0.25827830 -0.03033972 0.04330706 -0.04157347
# Identify large likelihood distances
sort(LD, decreasing=TRUE)[1:5]
## [1] 0.9250000 0.3047206 0.2929142 0.2810829 0.2582783
library(ggplot2)
# Combine Dchi and LD into a data frame
diagnostics <- data.frame(Dchi = Dchi, LD = LD)
# Plot Dchi vs LD
ggplot(diagnostics, aes(x = LD, y = Dchi)) +
geom_point() +
labs(
title = "Figure 17.4 Delta Chi-Square vs Likelihood Distance",
x = "Likelihood Distance (LD)",
y = "Delta Chi-Square (Dchi)"
) +
theme_minimal()
print(LD)
## [1] -0.0473667804 0.1040442927 0.2582782978 -0.0303397168 0.0433070633
## [6] -0.0415734654 0.0426755355 0.0111560242 -0.0509867130 -0.0291998732
## [11] 0.0310122059 -0.0293330241 0.0415975749 0.0920153169 0.0026045857
## [16] -0.0039212041 0.0658798486 0.0009090326 0.0041352423 -0.0279721104
## [21] -0.0088672622 0.1791359179 0.0015018644 -0.0038480226 0.0548692866
## [26] 0.0216406699 0.0607361096 0.1170574174 -0.0024325877 0.0043838953
## [31] -0.0277413984 0.0046272355 0.0113307787 -0.0435689572 -0.0252830420
## [36] 0.0322026191 0.0243629341 0.0237467133 -0.0213574649 -0.0115643219
## [41] -0.0001031683 0.0312855323 0.0315553368 -0.0401444996 0.0078378686
## [46] -0.0179579135 -0.0356908061 0.0418365127 -0.0023933599 -0.0064861671
## [51] 0.0360684587 0.0219532360 0.0488390362 -0.0054438825 -0.0255294984
## [56] 0.0154268449 0.0304230127 0.0108516071 -0.0192108189 -0.0213916468
## [61] -0.0108868035 -0.0018412737 0.0402445657 0.1061490432 -0.0068365624
## [66] -0.0375175243 -0.0316579580 0.0074971510 0.0032954054 0.0024558282
## [71] 0.0580057027 0.0164461748 0.0186760295 0.0163279973 0.0092382829
## [76] 0.0186958195 0.0422671181 -0.0125876532 -0.0099109049 0.0374509643
## [81] 0.0200053309 -0.0312947680 0.0037486140 -0.0125400777 -0.0038609404
## [86] 0.0200250393 -0.0299106059 0.2810829318 0.0322785721 0.0240735935
## [91] 0.0581887942 0.0713127304 0.0700059916 0.0796602806 0.0312772152
## [96] 0.0216353667 0.0754596314 0.0277952782 0.1679018395 0.0273264827
## [101] -0.0286600379 -0.0387945026 0.0079736505 -0.0012857239 0.0431700089
## [106] 0.0025627289 -0.0132523067 0.0135204660 0.0313305859 -0.0426323568
## [111] -0.0346311252 -0.0099647427 -0.0505048599 -0.0150236601 0.0042681784
## [116] 0.1253917479 0.0422623618 0.0313724093 -0.0003570950 0.9249999972
## [121] -0.0191562735 0.0170334103 0.0121588656 -0.0096714809 0.2160895658
## [126] 0.0029818200 0.2101949086 0.0833741515 -0.0124102775 -0.0165454937
## [131] 0.0478604451 0.0121564974 0.1243401279 -0.0509481935 0.1297324542
## [136] -0.0088268027 0.0422671031 0.0820699744 0.0438263213 0.0982832808
## [141] -0.0432729391 0.0088707949 0.1706977014 0.1162218949 -0.0280574646
## [146] 0.1938111856 0.0613750838 -0.0543526220 0.0555533621 0.0076544814
## [151] -0.0091457563 0.0734748181 0.0353834079 0.0373444578 -0.0414394489
## [156] 0.0146454040 0.0687156922 0.0237802020 0.1814262130 0.0158067048
## [161] -0.0002150501 -0.0069273237 0.0233820226 0.0354376504 0.0059591027
## [166] 0.0031397821 -0.0028367496 -0.0134720737 0.0897303570 -0.0519023422
## [171] 0.1840947956 0.0643016564 0.1328404142 0.0147573401 0.1275711989
## [176] -0.0175480014 -0.0364903619 0.0230614089 0.0130658267 0.0615952441
## [181] 0.0557800302 0.0303438244 -0.0186455780 0.3047205567 -0.0479207291
## [186] 0.1729446302 0.0367499472 0.0105511783 0.0963754902 0.0591708161
## [191] 0.0405602598 0.0114126934 -0.0543111896 -0.0372855436 0.0543512007
## [196] 0.0357045918 0.0434242222 0.0849158302 0.2364314183 0.1098836287
## [201] 0.0209442775 -0.0158966677 0.0807072213 0.2929142063
LDdf<-data.frame(LD)
quantile(Osborne$t1, probs = seq(0.1, 0.9, by = 0.1))
## 10% 20% 30% 40% 50% 60% 70% 80% 90%
## 11.358 13.046 15.196 16.954 18.150 20.290 21.900 24.526 27.190
quantile(Osborne$t2, probs = seq(0.1, 0.9, by = 0.1))
## 10% 20% 30% 40% 50% 60% 70% 80% 90%
## 17.876 20.034 21.466 23.552 25.375 28.624 31.092 33.190 36.248
quantile(Osborne$t3, probs = seq(0.1, 0.9, by = 0.1))
## 10% 20% 30% 40% 50% 60% 70% 80% 90%
## 26.774 29.380 31.833 34.112 35.645 37.280 39.710 43.190 45.543
quantile(Osborne$t4, probs = seq(0.1, 0.9, by = 0.1))
## 10% 20% 30% 40% 50% 60% 70% 80% 90%
## 34.683 38.632 40.917 43.988 47.475 50.448 53.669 56.770 60.555
#Generalized Cook's distance
gCD <- genCookDist(FCSI, data=Osborne)
head(gCD)
## [1] 0.03206096 0.08574387 0.18691677 0.04050668 0.08240841 0.02328056
# Top generalized Cook distances
sort(gCD, decreasing=TRUE)[1:5]
## [1] 0.9850733 0.5085786 0.3363826 0.2591383 0.2528644
gCD_df<-data.frame(gCD)
gCD_df <- gCD_df %>%
mutate(Obs = row_number())
ggplot(gCD_df, aes(x = Obs, y = gCD)) +
geom_point() +
labs(
title = "Figure 17.5 Generalized Cook's Distance",
x = "Observation",
y = "Generalized Cook's Distance"
) +
theme_minimal()
library(lavaan)
library(influence.SEM)
library(dplyr)
library(tidyr)
library(ggplot2)
#Parameter Influence
FCSI <- '
i =~ 1*t1 + 1*t2 + 1*t3 + 1*t4
s =~ t1 + NA*t1+t2 + t3 + t4
# name the slope variance for influence tracking
s ~~ 1*s
i ~~ var_i*i
i ~~ 0*s
t1 ~~ var1*t1
t2 ~~ var2*t2
t3 ~~ var3*t3
t4 ~~ var4*t4
t1 ~ 0*1
t2 ~ 0*1
t3 ~ 0*1
t4 ~ 0*1
i ~ 1
s ~ 1
'
FCSIfit <- sem(FCSI, data=Osborne)
summary(FCSIfit, standardized=TRUE)
## lavaan 0.6-19 ended normally after 66 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 11
##
## Number of observations 204
##
## Model Test User Model:
##
## Test statistic 7.713
## Degrees of freedom 3
## P-value (Chi-square) 0.052
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## i =~
## t1 1.000 4.328 0.686
## t2 1.000 4.328 0.610
## t3 1.000 4.328 0.536
## t4 1.000 4.328 0.432
## s =~
## t1 3.591 0.749 4.796 0.000 3.591 0.569
## t2 4.821 0.694 6.950 0.000 4.821 0.680
## t3 6.300 0.661 9.531 0.000 6.300 0.780
## t4 8.108 0.676 11.996 0.000 8.108 0.809
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## i ~~
## s 0.000 0.000 0.000
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .t1 0.000 0.000 0.000
## .t2 0.000 0.000 0.000
## .t3 0.000 0.000 0.000
## .t4 0.000 0.000 0.000
## i -3.952 6.384 -0.619 0.536 -0.913 -0.913
## s 6.332 0.718 8.817 0.000 6.332 6.332
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## s 1.000 1.000 1.000
## i (var_) 18.731 6.179 3.031 0.002 1.000 1.000
## .t1 (var1) 8.211 1.424 5.767 0.000 8.211 0.206
## .t2 (var2) 8.348 1.103 7.567 0.000 8.348 0.166
## .t3 (var3) 6.896 1.154 5.976 0.000 6.896 0.106
## .t4 (var4) 15.863 2.675 5.930 0.000 15.863 0.158
parameterEstimates(FCSIfit)
## lhs op rhs label est se z pvalue ci.lower ci.upper
## 1 i =~ t1 1.000 0.000 NA NA 1.000 1.000
## 2 i =~ t2 1.000 0.000 NA NA 1.000 1.000
## 3 i =~ t3 1.000 0.000 NA NA 1.000 1.000
## 4 i =~ t4 1.000 0.000 NA NA 1.000 1.000
## 5 s =~ t1 3.591 0.749 4.796 0.000 2.123 5.058
## 6 s =~ t2 4.821 0.694 6.950 0.000 3.461 6.180
## 7 s =~ t3 6.300 0.661 9.531 0.000 5.004 7.595
## 8 s =~ t4 8.108 0.676 11.996 0.000 6.783 9.432
## 9 s ~~ s 1.000 0.000 NA NA 1.000 1.000
## 10 i ~~ i var_i 18.731 6.179 3.031 0.002 6.620 30.842
## 11 i ~~ s 0.000 0.000 NA NA 0.000 0.000
## 12 t1 ~~ t1 var1 8.211 1.424 5.767 0.000 5.420 11.001
## 13 t2 ~~ t2 var2 8.348 1.103 7.567 0.000 6.186 10.510
## 14 t3 ~~ t3 var3 6.896 1.154 5.976 0.000 4.634 9.157
## 15 t4 ~~ t4 var4 15.863 2.675 5.930 0.000 10.620 21.107
## 16 t1 ~1 0.000 0.000 NA NA 0.000 0.000
## 17 t2 ~1 0.000 0.000 NA NA 0.000 0.000
## 18 t3 ~1 0.000 0.000 NA NA 0.000 0.000
## 19 t4 ~1 0.000 0.000 NA NA 0.000 0.000
## 20 i ~1 -3.952 6.384 -0.619 0.536 -16.466 8.561
## 21 s ~1 6.332 0.718 8.817 0.000 4.925 7.740
PAR <- c("s=~t1","s=~t2","s=~t3","s=~t4","i~1","s~1")
LY <- parinfluence(PAR,FCSI,Osborne,cook=TRUE)
str(LY)
## List of 2
## $ gCD : num [1:204, 1:6] 1.71e-05 1.82e-03 6.47e-04 2.69e-03 6.73e-03 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : chr [1:6] "s=~t1" "s=~t2" "s=~t3" "s=~t4" ...
## $ Dparm: num [1:204, 1:6] 0.00413 0.04261 0.02543 0.05188 0.08206 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : chr [1:6] "s=~t1" "s=~t2" "s=~t3" "s=~t4" ...
LY_df<-data.frame(LY)
LY_df <- LY_df %>%
mutate(Obs = row_number())
names(LY_df)
## [1] "gCD.s..t1" "gCD.s..t2" "gCD.s..t3" "gCD.s..t4" "gCD.i.1"
## [6] "gCD.s.1" "Dparm.s..t1" "Dparm.s..t2" "Dparm.s..t3" "Dparm.s..t4"
## [11] "Dparm.i.1" "Dparm.s.1" "Obs"
library(ggplot2)
library(dplyr)
library(tidyr)
library(ggplot2)
# If LY_df doesn't yet have slope scores, add them (pick your fit object)
if (!"s" %in% names(LY_df)) {
s_scores <- if (exists("FCSIfit")) as.numeric(lavPredict(FCSIfit)[, "s"])
else if (exists("fit")) as.numeric(lavPredict(fit)[, "s"])
else stop("Need a lavaan fit (FCSIfit or fit) to compute slope scores.")
# If LY_df has an Obs index, join by Obs; else bind in row order
if ("Obs" %in% names(LY_df) && length(s_scores) == nrow(LY_df)) {
LY_df <- LY_df |> mutate(s = s_scores)
} else {
LY_df$s <- s_scores
}
}
# To avoid any name clashes, rename 's' -> 'slope'
names(LY_df)[names(LY_df) == "s"] <- "slope"
# Build long frame with safe, string-based selection
want <- c("Obs", "slope", "Dparm.s..t1", "Dparm.s..t2", "Dparm.s..t3", "Dparm.s..t4")
long_s <- LY_df |>
select(all_of(want)) |>
pivot_longer(
cols = c("Dparm.s..t1","Dparm.s..t2","Dparm.s..t3","Dparm.s..t4"),
names_to = "param",
values_to = "influence"
) |>
mutate(occasion = dplyr::recode(param,
"Dparm.s..t1"="t1",
"Dparm.s..t2"="t2",
"Dparm.s..t3"="t3",
"Dparm.s..t4"="t4"))
# Fences per facet
bounds <- long_s |>
group_by(occasion) |>
summarise(
med = median(influence, na.rm = TRUE),
iqr = IQR(influence, na.rm = TRUE),
lower = med - 1.5 * iqr,
upper = med + 1.5 * iqr,
.groups = "drop"
)
# Plot (note the .data pronoun + 'slope' column)
ggplot(long_s, aes(x = .data[["slope"]], y = .data[["influence"]])) +
geom_point(alpha = 0.6) +
geom_smooth(method = "loess", se = FALSE, span = 0.5) +
geom_hline(data = bounds, aes(yintercept = lower), linetype = "dashed", color = "red") +
geom_hline(data = bounds, aes(yintercept = upper), linetype = "dashed", color = "red") +
facet_wrap(~ occasion, ncol = 2, scales = "free_y") +
labs(
title = "Case Influence on Slope Loadings (per occasion)",
x = "Slope Factor Score (s)",
y = "Influence (Dparm)"
) +
theme_minimal(base_size = 12)
## `geom_smooth()` using formula = 'y ~ x'
library(lavaan)
library(influence.SEM)
library(dplyr)
library(tidyr)
library(ggplot2)
# --- Fit (your model as-is) ---
FCSI <- '
i =~ 1*t1 + 1*t2 + 1*t3 + 1*t4
s =~ t1 + NA*t1 + t2 + t3 + t4
s ~~ 1*s
i ~~ var_i*i
i ~~ 0*s
t1 ~~ var1*t1
t2 ~~ var2*t2
t3 ~~ var3*t3
t4 ~~ var4*t4
t1 ~ 0*1
t2 ~ 0*1
t3 ~ 0*1
t4 ~ 0*1
i ~ 1
s ~ 1
'
FCSIfit <- sem(FCSI, data = Osborne)
# --- Influence for mean parameters only ---
PAR <- c("i~1","s~1")
LY <- parinfluence(PAR, FCSI, Osborne, cook = TRUE)
# Pull the D contributions; your columns are "i~1" and "s~1"
Dparm <- as.data.frame(LY$Dparm)
Dparm$Obs <- seq_len(nrow(Dparm))
# Factor scores to use on x-axes
scores <- lavPredict(FCSIfit)
Dparm$i_score <- as.numeric(scores[, "i"])
Dparm$s_score <- as.numeric(scores[, "s"])
# Make sure we have the expected columns; fall back is easy to add if needed
target_cols <- intersect(c("i~1","s~1"), names(Dparm))
stopifnot(length(target_cols) == 2)
# Long form; pair each mean’s influence with the matching factor score
long_means <- Dparm |>
pivot_longer(cols = all_of(target_cols),
names_to = "param",
values_to = "influence") |>
mutate(score = if_else(param == "i~1", i_score, s_score))
# Tukey fences per facet
bounds <- long_means |>
group_by(param) |>
summarise(
med = median(influence, na.rm = TRUE),
iqr = IQR(influence, na.rm = TRUE),
lower = med - 1.5 * iqr,
upper = med + 1.5 * iqr,
.groups = "drop"
)
library(ggh4x)
## Warning: package 'ggh4x' was built under R version 4.5.1
ggplot(long_means, aes(x = score, y = influence)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "loess", se = FALSE, span = 0.5) +
geom_hline(data = bounds, aes(yintercept = lower), linetype = "dashed", color = "red") +
geom_hline(data = bounds, aes(yintercept = upper), linetype = "dashed", color = "red") +
facet_wrap(~ param, ncol = 2, scales = "free_x") +
ggh4x::facetted_pos_scales(
x = list(param == "s~1" ~ scale_x_continuous(limits = c(2, 10), expand = expansion(mult = 0)))
) +
labs(
title = "Case Influence on Factor Means",
x = "Matching Factor Score (i or s)",
y = "Influence (Dparm)"
) +
theme_minimal(base_size = 12)
## `geom_smooth()` using formula = 'y ~ x'
# Generalized Cook's distance
PAR <- c("i~1","s~1")
LY <- parinfluence(PAR, FCSI, Osborne, cook = TRUE)
# gCD matrix -> data frame; expect columns named "i~1" and "s~1"
gcd_df <- as.data.frame(LY$gCD)
# Fallback rename if your installation uses sanitized names
if (!all(c("i~1","s~1") %in% names(gcd_df))) {
alt_i <- intersect(c("gCD.i..1","gCD.i_1","gCD.i1","gCD.i.1","i..1","i_1","i1","i.1"), names(gcd_df))
alt_s <- intersect(c("gCD.s..1","gCD.s_1","gCD.s1","gCD.s.1","s..1","s_1","s1","s.1"), names(gcd_df))
if (length(alt_i)) names(gcd_df)[match(alt_i[1], names(gcd_df))] <- "i~1"
if (length(alt_s)) names(gcd_df)[match(alt_s[1], names(gcd_df))] <- "s~1"
}
stopifnot(all(c("i~1","s~1") %in% names(gcd_df)))
# Factor scores for x-axes
scores <- lavPredict(FCSIfit)
gcd_df$Obs <- seq_len(nrow(gcd_df))
gcd_df$i_score <- as.numeric(scores[, "i"])
gcd_df$s_score <- as.numeric(scores[, "s"])
# Long form pairing each mean’s gCD with the matching factor score
long_gcd <- gcd_df |>
pivot_longer(cols = c("i~1","s~1"),
names_to = "param",
values_to = "gCD") |>
mutate(score = if_else(param == "i~1", i_score, s_score))
# Tukey fences (optional visual guides)
bounds <- long_gcd |>
group_by(param) |>
summarise(
med = median(gCD, na.rm = TRUE),
iqr = IQR(gCD, na.rm = TRUE),
lower = med - 1.5 * iqr,
upper = med + 1.5 * iqr,
.groups = "drop"
)
# Plot: per-facet x scales; force slope-mean (s~1) x-range to 2..10
ggplot(long_gcd, aes(x = score, y = gCD)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "loess", se = FALSE, span = 0.5) +
geom_hline(data = bounds, aes(yintercept = lower), linetype = "dashed", color = "red") +
geom_hline(data = bounds, aes(yintercept = upper), linetype = "dashed", color = "red") +
facet_wrap(~ param, ncol = 2, scales = "free_x") +
ggh4x::facetted_pos_scales(
x = list(param == "s~1" ~ scale_x_continuous(limits = c(2, 10), expand = expansion(mult = 0)))
) +
labs(
title = "Generalized Cook's Distance for Factor Means",
x = "Matching Factor Score (i or s)",
y = "gCD"
) +
theme_minimal(base_size = 12)
## `geom_smooth()` using formula = 'y ~ x'
# References