1. R Codes

library(semPlot)
library(lavaan)

## This is lavaan 0.6-17
## lavaan is FREE software! Please report any bugs.

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(qgraph)
library(magrittr)
# install.packages(c("semPlot", "lavaan", "dplyr", "qgraph", "magrittr"))
#First Input data

# set directory

# you will need to tell R where to find the data

#below is the location of data for me, so change to be relevant for you

#setwd("C:\Users\klmcdonald2\OneDrive - The University of Alabama\Documents\SEMsp25")

#create r data table

#name dataset "ex1sem"

# read data with col names

ex1sem<- readxl::read_xlsx("./hw1_data rev.xlsx")



# define missing values

ex1sem[ex1sem == -99] <- NA #setting missing values

print(ex1sem)

## # A tibble: 20 × 4
##      HW1   HW2   HW3 FINAL
##    <dbl> <dbl> <dbl> <dbl>
##  1    73    80    75    82
##  2    93    95    93    96
##  3    89    91    90    90
##  4    56    98    75    80
##  5    73    66    70    77
##  6    53    46    55    55
##  7    69    74    77    70
##  8    47    56    90    70
##  9    87    79    90    92
## 10    79    70    88    85
## 11    69    75    73    70
## 12    70    65    74    72
## 13    78    95    91    88
## 14    79    80    73    80
## 15    70    73    85    82
## 16    93    62    82    90
## 17    78    75    68    76
## 18    62    90    93    90
## 19    88    92    86    91
## 20    95    82    88    93

# get means for variables in data frame mydata

# excluding missing values

sapply(ex1sem, mean, na.rm=TRUE)

##   HW1   HW2   HW3 FINAL 
## 75.05 77.20 80.80 81.45

# mean,median,25th and 75th quartiles,min,max

summary(ex1sem)

##       HW1             HW2             HW3            FINAL      
##  Min.   :47.00   Min.   :46.00   Min.   :55.00   Min.   :55.00  
##  1st Qu.:69.00   1st Qu.:69.00   1st Qu.:73.75   1st Qu.:75.00  
##  Median :75.50   Median :77.00   Median :83.50   Median :82.00  
##  Mean   :75.05   Mean   :77.20   Mean   :80.80   Mean   :81.45  
##  3rd Qu.:87.25   3rd Qu.:90.25   3rd Qu.:90.00   3rd Qu.:90.00  
##  Max.   :95.00   Max.   :98.00   Max.   :93.00   Max.   :96.00

#install psych package

# can get basic descriptives

library(psych)

## 
## Attaching package: 'psych'

## The following object is masked from 'package:lavaan':
## 
##     cor2cov

describeFast(ex1sem)

## 
##  Number of observations =  20 of which  20   are complete cases.   Number of variables =  4  of which  0  are numeric and  0  are factors 
## 
##  To list the items and their counts, print with short = FALSE

describe(ex1sem,na.rm=TRUE, skew = TRUE, ranges = TRUE,
         
         type=3,check=TRUE,fast=TRUE,quant=NULL,IQR=FALSE,omit=FALSE,data=NULL)

##       vars  n  mean    sd median min max range  skew kurtosis   se
## HW1      1 20 75.05 13.64   75.5  47  95    48 -0.32    -0.87 3.05
## HW2      2 20 77.20 13.95   77.0  46  98    52 -0.34    -0.70 3.12
## HW3      3 20 80.80 10.26   83.5  55  93    38 -0.66    -0.39 2.29
## FINAL    4 20 81.45 10.41   82.0  55  96    41 -0.68    -0.23 2.33

# model statements and request summary statistics

#install lavaan package if you haven't already

#library activates the package

library (lavaan)

# agg regressed on gender

model.1 <- ('FINAL ~ HW1+HW2+HW3
            HW1 ~~ HW2
            HW1 ~~ HW3
            HW2 ~~ HW3') 

fit <-sem(model.1,data=ex1sem, meanstructure=TRUE)

print(summary (fit, standardized=TRUE, rsquare= TRUE) )

## lavaan 0.6.17 ended normally after 70 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        14
## 
##   Number of observations                            20
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   FINAL ~                                                               
##     HW1               0.353    0.059    6.040    0.000    0.353    0.463
##     HW2               0.205    0.062    3.328    0.001    0.205    0.275
##     HW3               0.467    0.084    5.555    0.000    0.467    0.460
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   HW1 ~~                                                                
##     HW2              71.440   43.471    1.643    0.100   71.440    0.395
##     HW3              52.960   32.008    1.655    0.098   52.960    0.398
##   HW2 ~~                                                                
##     HW3              71.740   34.390    2.086    0.037   71.740    0.527
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .FINAL             1.327    5.979    0.222    0.824    1.327    0.131
##     HW1              75.050    2.973   25.246    0.000   75.050    5.645
##     HW2              77.200    3.041   25.386    0.000   77.200    5.676
##     HW3              80.800    2.237   36.124    0.000   80.800    8.078
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .FINAL             9.609    3.039    3.162    0.002    9.609    0.093
##     HW1             176.748   55.892    3.162    0.002  176.748    1.000
##     HW2             184.960   58.489    3.162    0.002  184.960    1.000
##     HW3             100.060   31.642    3.162    0.002  100.060    1.000
## 
## R-Square:
##                    Estimate
##     FINAL             0.907

# The Std.all column contains the fully standardized estimates 

# the Std.lv column contains the estimates standardized on the 

# variances of latent only (not relevant if no latent variables, as is case here)

2. Model structure plot

# ==============================================================================
# Helper Function: Add Significance Visuals (Flexible)
# ------------------------------------------------------------------------------
# Arguments:
#   type:   "std" (Standardized) or "est" (Unstandardized)
#   digits: Number of decimal places (default = 2)
# ==============================================================================
add_sig_visuals <- function(plot_object, fit_object, type = "std", digits = 2) {
  
  # 1. Process Parameters
  params <- parameterEstimates(fit_object, standardized = TRUE) %>%
    filter(op %in% c("~", "~~", "=~")) %>%
    mutate(
      # Select value based on user input ("est" or "std")
      val_to_show = if(type == "est") est else std.all,
      
      # Assign significance stars
      stars = case_when(
        pvalue < 0.001 ~ "***",
        pvalue < 0.01  ~ "**",
        pvalue < 0.05  ~ "*",
        TRUE           ~ ""
      ),
      
      # Define line type: Solid (1) if significant, Dashed (2) otherwise
      target_lty = ifelse(pvalue < 0.05, 1, 2),
      
      # Format label: Dynamic rounding + stars
      new_label = paste0(sprintf(paste0("%.", digits, "f"), val_to_show), stars)
    )
  
  # 2. Map Nodes
  node_names <- plot_object$graphAttributes$Nodes$names
  
  # 3. Update Edges
  num_edges <- length(plot_object$Edgelist$from)
  
  for(i in 1:num_edges) {
    from_node <- node_names[plot_object$Edgelist$from[i]]
    to_node   <- node_names[plot_object$Edgelist$to[i]]
    
    match_row <- params %>%
      filter((lhs == from_node & rhs == to_node) | 
             (lhs == to_node & rhs == from_node))
    
    if(nrow(match_row) > 0) {
      plot_object$graphAttributes$Edges$lty[i]    <- match_row$target_lty[1]
      plot_object$graphAttributes$Edges$labels[i] <- match_row$new_label[1]
    }
  }
  
  return(plot_object)
}

# Unstandardized plot
print(semPaths(fit, 
         whatLabels = "est", 
         layout = "tree", 
         rotation = 2, 
         style = "ram", 
         exoVar = FALSE, 
         residuals = TRUE, 
         intercepts = FALSE,
         curvature = 2,
         sizeMan = 8, 
         sizeMan2 = 5,
         edge.label.cex = 0.9, 
         mar = c(4, 5, 4, 6), 
         nCharNodes = 0,
         edge.color = "black",
         fade = F,DoNotPlot = TRUE)%>%
        add_sig_visuals(fit,type="est") %>%
        plot())

## $mar
## [1] 0 0 0 0
## 
## $bg
## [1] "white"

# standardized plot
p <- semPaths(fit, 
         whatLabels = "std", 
         layout = "tree", 
         rotation = 2, 
         style = "ram",  
         exoVar = FALSE, 
         residuals = TRUE, 
         intercepts = FALSE,
         curvature = 2,
         sizeMan = 8, 
         sizeMan2 = 5,
         edge.label.cex = 0.9, 
         mar = c(4, 5, 4, 6), 
         nCharNodes = 0,
         edge.color = "black",
         fade = F,DoNotPlot = TRUE)%>%
        add_sig_visuals(fit) %>%
        plot()

print(p)

## $mar
## [1] 0 0 0 0
## 
## $bg
## [1] "white"

3. Findings

To examine the hypothesis that final exam performance varies as a function of homework scores, a path analysis was conducted to examine how HW1, HW2, and HW3 were uniquely related to the FINAL exam score. The analysis significantly predicted 90.7% of the variance in final exam scores (\(R^2 = .91\), \(p < .001\)). HW1 scores were positively related to final exam performance (\(b = .35\), \(\beta = .463\), \(SE = .059\), \(p < .001\)), as were HW2 scores (\(b = .21\), \(\beta = .275\), \(SE = .062\), \(p = .001\)) and HW3 scores (\(b = .47\), \(\beta = .460\), \(SE = .084\), \(p < .001\)), such that higher scores on homework assignments were associated with higher grades on the final exam.

HW1-regression

Qipeng Chen

1. R Codes

2. Model structure plot

3. Findings