getwd()
## [1] "/Users/lezou/EDUC 793/R Code and Data"
parentInvolve_dataset <- read.csv("parent.csv")
parentInvolve_dataset <- read.table("parent.csv", sep = ",",
                             stringsAsFactors = FALSE, na.strings = "NA",
                             header = TRUE)
# LOAD DATA
# Read dataset
parent <- read.csv("parent.csv", stringsAsFactors = FALSE)
# CREATE PARENT INVOLVEMENT TOTAL SCORE
# Items used:
# FSSPORTX, FSVOL, FSMTNG, FSPTMTNG, FSATCNFN, FSFUNDRS, FSCOMMTE, FSCOUNSLR
# Values: No = 0, Valid skip = -1, Yes = weighted 1–3 depending on item
# Total score = sum of 8 items
involvement_items <- c("FSSPORTX", "FSVOL", "FSMTNG", "FSPTMTNG",
                       "FSATCNFN", "FSFUNDRS", "FSCOMMTE", "FSCOUNSLR")
# Replace valid skips (-1) with 0
parent[involvement_items] <- lapply(parent[involvement_items], function(x){
  x[x == -1] <- 0
  return(x)
})
# Compute total involvement score
parent$involvement_total <- rowSums(parent[involvement_items], na.rm = TRUE)
# Draw graphs
# 1. LINE GRAPH:
#    Parent Involvement Total Score × EDUCATIONAL LEVEL
# Compute mean involvement for each education level (1–11)
edu_means <- tapply(parent$involvement_total, parent$P1EDUC, mean, na.rm = TRUE)
# Plot line graph
plot(as.numeric(names(edu_means)), edu_means,
     type = "o",                     # line + points
     pch = 19,                       # solid circles
     col = "blue",                  # brown line color
     lwd = 2,
     main = "Mean Parent Involvement by Parent Education Level",
     xlab = "Parent Education Level (1–11)",
     ylab = "Mean Parent Involvement Total Score")

# 2. LINE GRAPH:
#    Parent Involvement Total Score × INCOME LEVEL
# Compute mean involvement for income categories (1–12)
income_means <- tapply(parent$involvement_total, parent$TTLHHINC, mean, na.rm = TRUE)
# Plot line graph
plot(as.numeric(names(income_means)), income_means,
     type = "o",
     pch = 19,
     col = "green",
     lwd = 2,
     main = "Mean Parent Involvement by Household Income Level",
     xlab = "Household Income Category (1–12)",
     ylab = "Mean Parent Involvement Total Score")

# 3. SCATTERPLOT + RED TREND LINE:
#    Parent Involvement × Parent Work Hours
# Scatterplot
plot(parent$P1HRSWK, parent$involvement_total,
     pch = 19,
     col = "gray40",
     main = "Parent Involvement by Parent Work Hours",
     xlab = "Parent Work Hours per Week",
     ylab = "Parent Involvement Total Score")

# Add red regression line (trend)
abline(lm(involvement_total ~ P1HRSWK, data = parent),
       col = "red", lwd = 2)