Cultivation/framing logic suggests that heavier exposure to certain media can control the judgement of people about the real world. This research studies whether average weekly TV viewing is associated with the estimated percentage of the U.S. population who work in law enforcement/medicine/EMS roles that are regularly shown in TV content.
There is a positive association with the percentage of U.S. citizens between weekly hours of TV connsumption and the estimated percentage of people who work in law enforcement/medicine/EMS roles.
Video: mean hours/week of TV viewing over six months Pct: participant’s estimated percent of U.S. population working in law enforcement/medine/EMS
400 Volunteers
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `geom_smooth()` using formula = 'y ~ x'
| Leverage estimates for 10 largest outliers | |
| Row # | Leverage |
|---|---|
| 164 | 0.0305 |
| 360 | 0.0207 |
| 359 | 0.0194 |
| 371 | 0.0174 |
| 72 | 0.0162 |
| 201 | 0.0159 |
| 265 | 0.0159 |
| 392 | 0.0148 |
| 44 | 0.0144 |
| 97 | 0.0144 |
| Regression Analysis Results | ||||
| Coefficient Estimates | ||||
| Term | Estimate | Std. Error | t | p-value |
|---|---|---|---|---|
| (Intercept) | 23.2076 | 2.2026 | 10.5363 | 0.0000 |
| IV | 0.8440 | 0.0630 | 13.4056 | 0.0000 |
| Model Fit Statistics | |||||
| Overall Regression Performance | |||||
| R-squared | Adj. R-squared | F-statistic | df (model) | df (residual) | Residual Std. Error |
|---|---|---|---|---|---|
| 0.3111 | 0.3093 | 179.7107 | 1.0000 | 398.0000 | 9.7373 |
Code
##################################################
# 1. Install and load required packages
##################################################
if (!require("tidyverse")) install.packages("tidyverse")
if (!require("gt")) install.packages("gt")
if (!require("gtExtras")) install.packages("gtExtras")
library(tidyverse)
library(gt)
library(gtExtras)
##################################################
# 2. Read in the dataset
##################################################
# Replace "YOURFILENAME.csv" with the actual filename
mydata <- read.csv("Cultivation.csv")
# ################################################
# # (Optional) 2b. Remove specific cases by row number
# ################################################
# # Example: remove rows 10 and 25
# rows_to_remove <- c(10, 25) # Edit and uncomment this line
# mydata <- mydata[-rows_to_remove, ] # Uncomment this line
##################################################
# 3. Define dependent variable (DV) and independent variable (IV)
##################################################
# Replace YOURDVNAME and YOURIVNAME with actual column names
mydata$DV <- mydata$pct
mydata$IV <- mydata$video
##################################################
# 4. Explore distributions of DV and IV
##################################################
# Make a histogram for DV
DVGraph <- ggplot(mydata, aes(x = DV)) +
geom_histogram(color = "black", fill = "#1f78b4")
# Make a histogram for IV
IVGraph <- ggplot(mydata, aes(x = IV)) +
geom_histogram(color = "black", fill = "#1f78b4")
##################################################
# 5. Fit and summarize initial regression model
##################################################
# Suppress scientific notation
options(scipen = 999)
# Fit model
myreg <- lm(DV ~ IV, data = mydata)
# Model summary
summary(myreg)
##################################################
# 6. Visualize regression and check for bivariate outliers
##################################################
# Create scatterplot with regression line as a ggplot object
RegressionPlot <- ggplot(mydata, aes(x = IV, y = DV)) +
geom_point(color = "#1f78b4") +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(
title = "Scatterplot of DV vs IV with Regression Line",
x = "Independent Variable (IV)",
y = "Dependent Variable (DV)"
) +
theme_minimal()
##################################################
# 7. Check for potential outliers (high leverage points)
##################################################
# Calculate leverage values
hat_vals <- hatvalues(myreg)
# Rule of thumb: leverage > 2 * (number of predictors + 1) / n may be influential
threshold <- 2 * (length(coef(myreg)) / nrow(mydata))
# Create table showing 10 largest leverage values
outliers <- data.frame(
Obs = 1:nrow(mydata),
Leverage = hatvalues(myreg)
) %>%
arrange(desc(Leverage)) %>%
slice_head(n = 10)
# Format as a gt table
outliers_table <- outliers %>%
gt() %>%
tab_header(
title = "Leverage estimates for 10 largest outliers"
) %>%
cols_label(
Obs = "Row #",
Leverage = "Leverage"
) %>%
fmt_number(
columns = Leverage,
decimals = 4
)
##################################################
# 8. Create nicely formatted regression results tables
##################################################
# --- Coefficient-level results ---
reg_results <- as.data.frame(coef(summary(myreg))) %>%
tibble::rownames_to_column("Term") %>%
rename(
Estimate = Estimate,
`Std. Error` = `Std. Error`,
t = `t value`,
`p-value` = `Pr(>|t|)`
)
reg_table <- reg_results %>%
gt() %>%
tab_header(
title = "Regression Analysis Results",
subtitle = "Coefficient Estimates"
) %>%
fmt_number(
columns = c(Estimate, `Std. Error`, t, `p-value`),
decimals = 4
)
# --- Model fit statistics ---
reg_summary <- summary(myreg)
fit_stats <- tibble::tibble(
`R-squared` = reg_summary$r.squared,
`Adj. R-squared` = reg_summary$adj.r.squared,
`F-statistic` = reg_summary$fstatistic[1],
`df (model)` = reg_summary$fstatistic[2],
`df (residual)` = reg_summary$fstatistic[3],
`Residual Std. Error` = reg_summary$sigma
)
fit_table <- fit_stats %>%
gt() %>%
tab_header(
title = "Model Fit Statistics",
subtitle = "Overall Regression Performance"
) %>%
fmt_number(
columns = everything(),
decimals = 4
)
##################################################
# 9. Final print of key graphics and tables
##################################################
DVGraph
IVGraph
RegressionPlot
outliers_table
reg_table
fit_table