packages <- c("tidyverse", "infer", "fst", "modelsummary", "effects", "survey", "MASS", "aod", "interactions", "flextable", "scales", "dplyr", "performance", "ggeffects", "marginaleffects", "ggplot2", "see" )
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Warning: 패키지 'infer'는 R 버전 4.3.3에서 작성되었습니다
## Warning: 패키지 'effects'는 R 버전 4.3.3에서 작성되었습니다
## 필요한 패키지를 로딩중입니다: carData
## Warning: 패키지 'carData'는 R 버전 4.3.3에서 작성되었습니다
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
## Warning: 패키지 'survey'는 R 버전 4.3.3에서 작성되었습니다
## 필요한 패키지를 로딩중입니다: grid
## 필요한 패키지를 로딩중입니다: Matrix
##
## 다음의 패키지를 부착합니다: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## 필요한 패키지를 로딩중입니다: survival
##
## 다음의 패키지를 부착합니다: 'survey'
##
## The following object is masked from 'package:graphics':
##
## dotchart
##
##
## 다음의 패키지를 부착합니다: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
## Warning: 패키지 'aod'는 R 버전 4.3.3에서 작성되었습니다
##
## 다음의 패키지를 부착합니다: 'aod'
##
## The following object is masked from 'package:survival':
##
## rats
## Warning: 패키지 'interactions'는 R 버전 4.3.3에서 작성되었습니다
## Warning: 패키지 'flextable'는 R 버전 4.3.3에서 작성되었습니다
##
## 다음의 패키지를 부착합니다: 'flextable'
##
## The following object is masked from 'package:purrr':
##
## compose
##
##
## 다음의 패키지를 부착합니다: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
## Warning: 패키지 'performance'는 R 버전 4.3.3에서 작성되었습니다
##
## 다음의 패키지를 부착합니다: 'ggeffects'
##
## The following object is masked from 'package:interactions':
##
## johnson_neyman
## Warning: 패키지 'marginaleffects'는 R 버전 4.3.3에서 작성되었습니다
## Warning: 패키지 'see'는 R 버전 4.3.3에서 작성되었습니다
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "infer" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "fst" "infer" "lubridate" "forcats" "stringr" "dplyr"
## [7] "purrr" "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [13] "stats" "graphics" "grDevices" "utils" "datasets" "methods"
## [19] "base"
##
## [[4]]
## [1] "modelsummary" "fst" "infer" "lubridate" "forcats"
## [6] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [11] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [16] "grDevices" "utils" "datasets" "methods" "base"
##
## [[5]]
## [1] "effects" "carData" "modelsummary" "fst" "infer"
## [6] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [11] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [16] "stats" "graphics" "grDevices" "utils" "datasets"
## [21] "methods" "base"
##
## [[6]]
## [1] "survey" "survival" "Matrix" "grid" "effects"
## [6] "carData" "modelsummary" "fst" "infer" "lubridate"
## [11] "forcats" "stringr" "dplyr" "purrr" "readr"
## [16] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [21] "graphics" "grDevices" "utils" "datasets" "methods"
## [26] "base"
##
## [[7]]
## [1] "MASS" "survey" "survival" "Matrix" "grid"
## [6] "effects" "carData" "modelsummary" "fst" "infer"
## [11] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [16] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [21] "stats" "graphics" "grDevices" "utils" "datasets"
## [26] "methods" "base"
##
## [[8]]
## [1] "aod" "MASS" "survey" "survival" "Matrix"
## [6] "grid" "effects" "carData" "modelsummary" "fst"
## [11] "infer" "lubridate" "forcats" "stringr" "dplyr"
## [16] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [21] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [26] "datasets" "methods" "base"
##
## [[9]]
## [1] "interactions" "aod" "MASS" "survey" "survival"
## [6] "Matrix" "grid" "effects" "carData" "modelsummary"
## [11] "fst" "infer" "lubridate" "forcats" "stringr"
## [16] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [21] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [26] "utils" "datasets" "methods" "base"
##
## [[10]]
## [1] "flextable" "interactions" "aod" "MASS" "survey"
## [6] "survival" "Matrix" "grid" "effects" "carData"
## [11] "modelsummary" "fst" "infer" "lubridate" "forcats"
## [16] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [21] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [26] "grDevices" "utils" "datasets" "methods" "base"
##
## [[11]]
## [1] "scales" "flextable" "interactions" "aod" "MASS"
## [6] "survey" "survival" "Matrix" "grid" "effects"
## [11] "carData" "modelsummary" "fst" "infer" "lubridate"
## [16] "forcats" "stringr" "dplyr" "purrr" "readr"
## [21] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [26] "graphics" "grDevices" "utils" "datasets" "methods"
## [31] "base"
##
## [[12]]
## [1] "scales" "flextable" "interactions" "aod" "MASS"
## [6] "survey" "survival" "Matrix" "grid" "effects"
## [11] "carData" "modelsummary" "fst" "infer" "lubridate"
## [16] "forcats" "stringr" "dplyr" "purrr" "readr"
## [21] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [26] "graphics" "grDevices" "utils" "datasets" "methods"
## [31] "base"
##
## [[13]]
## [1] "performance" "scales" "flextable" "interactions" "aod"
## [6] "MASS" "survey" "survival" "Matrix" "grid"
## [11] "effects" "carData" "modelsummary" "fst" "infer"
## [16] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [21] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [26] "stats" "graphics" "grDevices" "utils" "datasets"
## [31] "methods" "base"
##
## [[14]]
## [1] "ggeffects" "performance" "scales" "flextable" "interactions"
## [6] "aod" "MASS" "survey" "survival" "Matrix"
## [11] "grid" "effects" "carData" "modelsummary" "fst"
## [16] "infer" "lubridate" "forcats" "stringr" "dplyr"
## [21] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [26] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [31] "datasets" "methods" "base"
##
## [[15]]
## [1] "marginaleffects" "ggeffects" "performance" "scales"
## [5] "flextable" "interactions" "aod" "MASS"
## [9] "survey" "survival" "Matrix" "grid"
## [13] "effects" "carData" "modelsummary" "fst"
## [17] "infer" "lubridate" "forcats" "stringr"
## [21] "dplyr" "purrr" "readr" "tidyr"
## [25] "tibble" "ggplot2" "tidyverse" "stats"
## [29] "graphics" "grDevices" "utils" "datasets"
## [33] "methods" "base"
##
## [[16]]
## [1] "marginaleffects" "ggeffects" "performance" "scales"
## [5] "flextable" "interactions" "aod" "MASS"
## [9] "survey" "survival" "Matrix" "grid"
## [13] "effects" "carData" "modelsummary" "fst"
## [17] "infer" "lubridate" "forcats" "stringr"
## [21] "dplyr" "purrr" "readr" "tidyr"
## [25] "tibble" "ggplot2" "tidyverse" "stats"
## [29] "graphics" "grDevices" "utils" "datasets"
## [33] "methods" "base"
##
## [[17]]
## [1] "see" "marginaleffects" "ggeffects" "performance"
## [5] "scales" "flextable" "interactions" "aod"
## [9] "MASS" "survey" "survival" "Matrix"
## [13] "grid" "effects" "carData" "modelsummary"
## [17] "fst" "infer" "lubridate" "forcats"
## [21] "stringr" "dplyr" "purrr" "readr"
## [25] "tidyr" "tibble" "ggplot2" "tidyverse"
## [29] "stats" "graphics" "grDevices" "utils"
## [33] "datasets" "methods" "base"
italy_data <- read_fst("italy_data.fst")
rp <- italy_data # renaming italy_data to rp (research project) for convenience
rp$weight <- rp$dweight * rp$pweight
survey_design <- svydesign(ids = ~1, data = rp, weights = ~weight)
# cleaning each variable of interest in accordance with its maximum relevant value
rp$netusoft <- ifelse(rp$netusoft > 5, NA, rp$netusoft)
rp$happy <- ifelse(rp$happy > 10, NA, rp$happy)
rp$sclmeet <- ifelse(rp$sclmeet > 7, NA, rp$sclmeet)
rp$health <- ifelse(rp$health > 5, NA, rp$health)
rp$netusoft <- scales::rescale(rp$netusoft, to=c(1, 5), na.rm = TRUE)
rp$happy <- scales::rescale(rp$happy, to=c(0, 10), na.rm = TRUE)
rp$sclmeet <- scales::rescale(rp$sclmeet, to=c(1, 7), na.rm = TRUE)
# fliping this variable because it is originally higher value = worse health
rp$healthy <- scales::rescale(rp$health, to=c(5, 1), na.rm = TRUE)
# creating a new numeric variable that encompasses subjective well-being variables
rp$subwel <- scales::rescale(rp$happy + rp$sclmeet + rp$healthy, to=c(0, 100), na.rm = TRUE)
# making sure variables and subsets are cleaned
rp <- rp %>% filter(!is.na(netusoft))
rp <- rp %>% filter(!is.na(happy))
rp <- rp %>% filter(!is.na(sclmeet))
rp <- rp %>% filter(!is.na(healthy))
rp <- rp %>% filter(!is.na(subwel))
table(rp$netusoft)
##
## 1 2 3 4 5
## 1529 630 510 1104 4144
table(rp$happy)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 62 25 96 187 269 541 1074 2083 2305 761 514
table(rp$sclmeet)
##
## 1 2 3 4 5 6 7
## 235 712 617 1793 1515 2170 875
table(rp$healthy)
##
## 1 2 3 4 5
## 72 363 1979 3617 1886
table(rp$subwel)
##
## 0 5 10 15 20 25 30 35 40 45 50 55 60 65 70 75
## 6 8 13 19 28 56 67 128 192 260 346 541 746 986 1090 1083
## 80 85 90 95 100
## 963 748 407 179 51
# re-coding the frequency of internet use into five categories
rp <- rp %>%
mutate(
freqnet = case_when(
netusoft == 1 ~ "Never",
netusoft == 2 ~ "Only occasionally",
netusoft == 3 ~ "A few times a week",
netusoft == 4 ~ "Most days",
netusoft == 5 ~ "Every day",
TRUE ~ as.character(netusoft)
),
freqnet = fct_relevel(factor(freqnet), # putting categories in an descending order
"Never",
"Only occasionally",
"A few times a week",
"Most days",
"Every day")
)
rp <- rp %>% filter(!is.na(freqnet)) # making sure the variable is cleaned
table(rp$freqnet)
##
## Never Only occasionally A few times a week Most days
## 1529 630 510 1104
## Every day
## 4144
# re-coding year of birth into four generations
rp <- rp %>%
mutate(
gen = case_when(
yrbrn %in% 1900:1945 ~ "1",
yrbrn %in% 1946:1964 ~ "2",
yrbrn %in% 1965:1979 ~ "3",
yrbrn %in% 1980:1996 ~ "4",
TRUE ~ as.character(yrbrn)
),
gen = factor(gen,
levels = c("1", "2", "3", "4"),
labels = c("Interwar", "Baby Boomers", "Gen X", "Millennials")), # renaming 1, 2, 3, 4 to correlated generations
)
rp <- rp %>% filter(!is.na(yrbrn)) # making sure the variable is cleaned
rp <- rp %>% filter(!is.na(gen)) # making sure the variable is cleaned
table(rp$gen)
##
## Interwar Baby Boomers Gen X Millennials
## 984 2390 2070 1659
model1 <- lm(subwel ~ freqnet, data = rp, weights = weight)
modelsummary(
list(model1),
fmt = 1,
estimate = c( "{estimate} ({std.error}){stars}"),
statistic = NULL,
title = "Simple Linear Regression Model With freqnet as Predictor",
# not renaming for coefficients because the purpose of this table is to simply identify the number of observations contained in my sample
output = "flextable")
| (1) |
|---|---|
(Intercept) | 57.5 (0.4)*** |
freqnetOnly occasionally | 4.4 (0.7)*** |
freqnetA few times a week | 6.8 (0.7)*** |
freqnetMost days | 9.9 (0.6)*** |
freqnetEvery day | 14.9 (0.4)*** |
Num.Obs. | 7103 |
R2 | 0.155 |
R2 Adj. | 0.154 |
AIC | 57730.8 |
BIC | 57772.0 |
Log.Lik. | -28859.378 |
RMSE | 14.01 |
# Below is what I did for saving Table 1 as Word
# model1_sum <- modelsummary(
# list(model1),
# fmt = 1,
# estimate = c( "{estimate} ({std.error}){stars}"),
# statistic = NULL,
# title = "Simple Linear Regression Model With netusoft as Predictor",
# output = "flextable")
# flextable::save_as_docx(model1_sum, path = "Kim_Eric_Research_Project_Table_0.docx",
# width = 7.0, height = 7.0)
I have 7,103 observations in my dataset.
rp <- rp %>%
mutate(
freqnet = netusoft,
gen = yrbrn,
)
rp <- rp %>% filter(!is.na(freqnet))
rp <- rp %>% filter(!is.na(gen))
table1 <- datasummary_skim(rp %>% dplyr::select(freqnet, gen, subwel), title = "Table 1: Descriptive Statistics of Variables of Interest", output = "flextable", histogram = FALSE, weights = weight)
table1
| Unique (#) | Missing (%) | Mean | SD | Min | Median | Max |
|---|---|---|---|---|---|---|---|
freqnet | 5 | 0 | 3.6 | 1.6 | 1.0 | 4.0 | 5.0 |
gen | 74 | 0 | 1965.4 | 17.0 | 1921.0 | 1966.0 | 1996.0 |
subwel | 21 | 0 | 67.1 | 15.2 | 0.0 | 70.0 | 100.0 |
# Below is what I did for saving Table 1 as Word
# flextable::save_as_docx(table1, path = "Kim_Eric_Research_Project_Table_1.docx", width = 7.0, height = 7.0)
# I have to re-do everything from the start
# This is because I turned the categorical variables freqnet and gen into numeric variables for providing Table 1
# This may seem overworking, but I wanted to prevent any differences to occur in my sample (e.g., number of observations, because if it changes, the mean, SD, etc., would get affected too)
italy_data <- read_fst("italy_data.fst")
rp <- italy_data # renaming italy_data to rp (research project) for convenience
rp$weight <- rp$dweight * rp$pweight
survey_design <- svydesign(ids = ~1, data = rp, weights = ~weight)
# cleaning each variable of interest in accordance with its maximum relevant value
rp$netusoft <- ifelse(rp$netusoft > 5, NA, rp$netusoft)
rp$happy <- ifelse(rp$happy > 10, NA, rp$happy)
rp$sclmeet <- ifelse(rp$sclmeet > 7, NA, rp$sclmeet)
rp$health <- ifelse(rp$health > 5, NA, rp$health)
rp$netusoft <- scales::rescale(rp$netusoft, to=c(1, 5), na.rm = TRUE)
rp$happy <- scales::rescale(rp$happy, to=c(0, 10), na.rm = TRUE)
rp$sclmeet <- scales::rescale(rp$sclmeet, to=c(1, 7), na.rm = TRUE)
# fliping this variable because it is originally higher value = worse health
rp$healthy <- scales::rescale(rp$health, to=c(5,1), na.rm = TRUE)
# creating a new numeric variable that encompasses subjective well-being variables
rp$subwel <- scales::rescale(rp$happy + rp$sclmeet + rp$healthy, to=c(0, 100), na.rm = TRUE)
# making sure variables and subsets are cleaned
rp <- rp %>% filter(!is.na(netusoft))
rp <- rp %>% filter(!is.na(happy))
rp <- rp %>% filter(!is.na(sclmeet))
rp <- rp %>% filter(!is.na(healthy))
rp <- rp %>% filter(!is.na(subwel))
table(rp$netusoft)
##
## 1 2 3 4 5
## 1529 630 510 1104 4144
table(rp$happy)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 62 25 96 187 269 541 1074 2083 2305 761 514
table(rp$sclmeet)
##
## 1 2 3 4 5 6 7
## 235 712 617 1793 1515 2170 875
table(rp$healthy)
##
## 1 2 3 4 5
## 72 363 1979 3617 1886
table(rp$subwel)
##
## 0 5 10 15 20 25 30 35 40 45 50 55 60 65 70 75
## 6 8 13 19 28 56 67 128 192 260 346 541 746 986 1090 1083
## 80 85 90 95 100
## 963 748 407 179 51
# re-coding the frequency of internet use into five categories
rp <- rp %>%
mutate(
freqnet = case_when(
netusoft == 1 ~ "Never",
netusoft == 2 ~ "Only occasionally",
netusoft == 3 ~ "A few times a week",
netusoft == 4 ~ "Most days",
netusoft == 5 ~ "Every day",
TRUE ~ as.character(netusoft)
),
freqnet = fct_relevel(factor(freqnet), # putting categories in an descending order
"Never",
"Only occasionally",
"A few times a week",
"Most days",
"Every day")
)
rp <- rp %>% filter(!is.na(freqnet)) # making sure the variable is cleaned
table(rp$freqnet)
##
## Never Only occasionally A few times a week Most days
## 1529 630 510 1104
## Every day
## 4144
# re-coding year of birth into four generations
rp <- rp %>%
mutate(
gen = case_when(
yrbrn %in% 1900:1945 ~ "1",
yrbrn %in% 1946:1964 ~ "2",
yrbrn %in% 1965:1979 ~ "3",
yrbrn %in% 1980:1996 ~ "4",
TRUE ~ as.character(yrbrn)
),
gen = factor(gen,
levels = c("1", "2", "3", "4"),
labels = c("Interwar", "Baby Boomers", "Gen X", "Millennials")), # renaming 1, 2, 3, 4 to correlated generations
)
rp <- rp %>% filter(!is.na(yrbrn)) # making sure the variable is cleaned
rp <- rp %>% filter(!is.na(gen)) # making sure the variable is cleaned
table(rp$gen)
##
## Interwar Baby Boomers Gen X Millennials
## 984 2390 2070 1659
test_stat <- rp %>%
specify(explanatory = freqnet, # independent
response = subwel) %>% # dependent
hypothesize(null = "independence") %>%
calculate(stat = "F") # using ANOVA because I have the numeric variable subwel as response (dependent) and the categorical variable freqnet as explanatory (independent) with more than three categories (five)
print(test_stat$stat)
## [1] 324.024
A test statistic of 324 is very, very, very large and thus indicates a significant difference in subjective well-being scale between individuals in different groups of frequency of internet use. It suggests that the mean subjective well-being scale is different across the five groups.
null_dist <- rp %>%
specify(explanatory = freqnet,
response = subwel) %>%
hypothesize(null = "independence") %>%
generate(reps = 1000, type = "permute") %>%
calculate(stat = "F")
p_val <- null_dist %>%
get_pvalue(obs_stat = test_stat, direction = "two-sided")
## Warning: Please be cautious in reporting a p-value of 0. This result is an approximation
## based on the number of `reps` chosen in the `generate()` step.
## ℹ See `get_p_value()` (`?infer::get_p_value()`) for more information.
p_val
## # A tibble: 1 × 1
## p_value
## <dbl>
## 1 0
# Below is what I did to obtain the critical value
# c_val
The p-value of 0 (not essentially 0, but very close to) indicates that there is a statistically significant association between frequency of internet use and subjective well-being in my dataset.
Also, the test statistic of 324 overly exceeds the critical value of 7.815, which means it is a strong evidence to reject the null hypothesis of this study
null_dist %>%
visualize() +
shade_p_value(obs_stat = test_stat, direction = "two-sided")
## Warning: F usually corresponds to right-tailed tests. Proceed with caution.
Note: the test statistics is so extreme that the default visualization does not display well.
null_dist
## Response: subwel (numeric)
## Explanatory: freqnet (factor)
## Null Hypothesis: independence
## # A tibble: 1,000 × 2
## replicate stat
## <int> <dbl>
## 1 1 0.329
## 2 2 1.04
## 3 3 0.516
## 4 4 1.17
## 5 5 0.876
## 6 6 0.906
## 7 7 2.54
## 8 8 0.702
## 9 9 1.64
## 10 10 1.16
## # ℹ 990 more rows
test_stat <- 324
conf_int <- null_dist %>%
get_confidence_interval(level = 0.95, type = "percentile")
# visualizing the null distribution with manual x-axis limits
null_dist %>%
visualize(bins = 15, method = "simulation", dens_color = "black") +
geom_vline(aes(xintercept = test_stat), color = "red", linetype = "dashed", size = 1) +
shade_p_value(obs_stat = test_stat, direction = "greater") +
shade_confidence_interval(endpoints = conf_int) +
coord_cartesian(xlim = c(0, max(null_dist$stat) * 1.1)) + # Set manual x-axis limits
labs(title = "Figure 1: Simulation-Based Null Distribution",
x = "ANOVA",
y = "Count")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning in min(diff(unique_loc)): min에 전달되는 인자들 중 누락이 있어 Inf를
## 반환합니다
p_val
## # A tibble: 1 × 1
## p_value
## <dbl>
## 1 0
I will use the plot above for the report and indicate that test statistic falls way outside the simulated null distribution.
reg_model1 <- lm(subwel ~ freqnet, data = rp, weights = weight) # SLR
reg_model2 <- lm(subwel ~ freqnet + gen, data = rp, weights = weight) # MLR
reg_model3 <- lm(subwel ~ freqnet + gen + freqnet*gen, data = rp, weights = weight) # MLR + Interaction
modelsummary(
list(reg_model1, reg_model2, reg_model3),
fmt = 1,
estimate = c("{estimate} ({std.error}){stars}",
"{estimate} ({std.error}){stars}",
"{estimate} ({std.error}){stars}"),
coef_rename = c("freqnetOnly occasionally" = "Only occasionally", "freqnetA few times a week" = "A few times a week", "freqnetMost days" = "Most days", "freqnetEvery day" = "Every day", "genBaby Boomers" = "Baby Boomers", "genGen X" = "Gen X", "genMillennials" = "Millennials"), # renaming for a clearer output
statistic = NULL,
title = "Table 2: Regression models predicting subjective well-being",
output = "flextable")
| (1) | (2) | (3) |
|---|---|---|---|
(Intercept) | 57.5 (0.4)*** | 56.0 (0.5)*** | 55.7 (0.5)*** |
Only occasionally | 4.4 (0.7)*** | 2.6 (0.7)*** | 3.6 (1.7)* |
A few times a week | 6.8 (0.7)*** | 4.5 (0.8)*** | 7.3 (2.5)** |
Most days | 9.9 (0.6)*** | 7.0 (0.6)*** | 7.9 (2.3)*** |
Every day | 14.9 (0.4)*** | 11.2 (0.6)*** | 12.2 (1.6)*** |
Baby Boomers | 2.8 (0.6)*** | 3.7 (0.7)*** | |
Gen X | 3.9 (0.7)*** | 3.4 (1.5)* | |
Millennials | 8.4 (0.7)*** | 2.4 (4.1) | |
Only occasionally:Baby Boomers | -1.6 (1.9) | ||
A few times a week:Baby Boomers | -2.9 (2.7) | ||
Most days:Baby Boomers | -1.0 (2.5) | ||
Every day:Baby Boomers | -2.5 (1.8) | ||
Only occasionally:Gen X | -0.1 (2.4) | ||
A few times a week:Gen X | -1.8 (3.0) | ||
Most days:Gen X | -0.4 (2.8) | ||
Every day:Gen X | -0.2 (2.2) | ||
Only occasionally:Millennials | 4.0 (4.8) | ||
A few times a week:Millennials | 0.2 (5.0) | ||
Most days:Millennials | 4.7 (4.7) | ||
Every day:Millennials | 5.7 (4.4) | ||
Num.Obs. | 7103 | 7103 | 7103 |
R2 | 0.155 | 0.177 | 0.179 |
R2 Adj. | 0.154 | 0.176 | 0.177 |
AIC | 57730.8 | 57547.9 | 57555.3 |
BIC | 57772.0 | 57609.7 | 57699.5 |
Log.Lik. | -28859.378 | -28764.950 | -28756.636 |
RMSE | 14.01 | 13.82 | 13.80 |
# summarizing regression models to see p-values of each coefficient
summary(reg_model1)
##
## Call:
## lm(formula = subwel ~ freqnet, data = rp, weights = weight)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -97.184 -10.345 3.486 11.034 61.718
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 57.5178 0.3622 158.800 < 2e-16 ***
## freqnetOnly occasionally 4.3556 0.6714 6.487 9.32e-11 ***
## freqnetA few times a week 6.7973 0.7398 9.188 < 2e-16 ***
## freqnetMost days 9.9083 0.5667 17.486 < 2e-16 ***
## freqnetEvery day 14.8629 0.4332 34.313 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19.58 on 7098 degrees of freedom
## Multiple R-squared: 0.1547, Adjusted R-squared: 0.1542
## F-statistic: 324.7 on 4 and 7098 DF, p-value: < 2.2e-16
summary(reg_model2)
##
## Call:
## lm(formula = subwel ~ freqnet + gen, data = rp, weights = weight)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -95.391 -9.254 1.542 12.757 63.917
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 56.0044 0.4564 122.704 < 2e-16 ***
## freqnetOnly occasionally 2.6031 0.7012 3.712 0.000207 ***
## freqnetA few times a week 4.4718 0.7808 5.727 1.06e-08 ***
## freqnetMost days 6.9887 0.6381 10.953 < 2e-16 ***
## freqnetEvery day 11.1611 0.5510 20.256 < 2e-16 ***
## genBaby Boomers 2.8475 0.5853 4.865 1.17e-06 ***
## genGen X 3.8796 0.6585 5.891 4.01e-09 ***
## genMillennials 8.4272 0.6961 12.107 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19.33 on 7095 degrees of freedom
## Multiple R-squared: 0.1768, Adjusted R-squared: 0.176
## F-statistic: 217.8 on 7 and 7095 DF, p-value: < 2.2e-16
summary(reg_model3)
##
## Call:
## lm(formula = subwel ~ freqnet + gen + freqnet * gen, data = rp,
## weights = weight)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -95.498 -9.060 1.182 12.573 64.343
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 55.71091 0.50172 111.040 < 2e-16
## freqnetOnly occasionally 3.62070 1.67556 2.161 0.030737
## freqnetA few times a week 7.31588 2.47422 2.957 0.003118
## freqnetMost days 7.94082 2.29462 3.461 0.000542
## freqnetEvery day 12.22854 1.64201 7.447 1.07e-13
## genBaby Boomers 3.73873 0.74573 5.014 5.47e-07
## genGen X 3.36440 1.48635 2.264 0.023633
## genMillennials 2.39896 4.08374 0.587 0.556927
## freqnetOnly occasionally:genBaby Boomers -1.63145 1.92215 -0.849 0.396040
## freqnetA few times a week:genBaby Boomers -2.90843 2.71362 -1.072 0.283851
## freqnetMost days:genBaby Boomers -0.95007 2.46214 -0.386 0.699603
## freqnetEvery day:genBaby Boomers -2.51574 1.79695 -1.400 0.161556
## freqnetOnly occasionally:genGen X -0.07333 2.44714 -0.030 0.976096
## freqnetA few times a week:genGen X -1.81220 3.03869 -0.596 0.550943
## freqnetMost days:genGen X -0.37093 2.78425 -0.133 0.894020
## freqnetEvery day:genGen X -0.17861 2.19143 -0.082 0.935042
## freqnetOnly occasionally:genMillennials 4.03830 4.76305 0.848 0.396556
## freqnetA few times a week:genMillennials 0.20190 5.02833 0.040 0.967972
## freqnetMost days:genMillennials 4.65628 4.73984 0.982 0.325951
## freqnetEvery day:genMillennials 5.68315 4.38987 1.295 0.195498
##
## (Intercept) ***
## freqnetOnly occasionally *
## freqnetA few times a week **
## freqnetMost days ***
## freqnetEvery day ***
## genBaby Boomers ***
## genGen X *
## genMillennials
## freqnetOnly occasionally:genBaby Boomers
## freqnetA few times a week:genBaby Boomers
## freqnetMost days:genBaby Boomers
## freqnetEvery day:genBaby Boomers
## freqnetOnly occasionally:genGen X
## freqnetA few times a week:genGen X
## freqnetMost days:genGen X
## freqnetEvery day:genGen X
## freqnetOnly occasionally:genMillennials
## freqnetA few times a week:genMillennials
## freqnetMost days:genMillennials
## freqnetEvery day:genMillennials
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19.32 on 7083 degrees of freedom
## Multiple R-squared: 0.1788, Adjusted R-squared: 0.1766
## F-statistic: 81.15 on 19 and 7083 DF, p-value: < 2.2e-16
# Below is what I did for saving Table 2 as Word
# supermodel summary_model_sum <- modelsummary(
# list(reg_model1, reg_model2, reg_model3),
# fmt = 1,
# estimate = c("{estimate} ({std.error}){stars}",
# "{estimate} ({std.error}){stars}",
# "{estimate} ({std.error}){stars}"),
# coef_rename = c("freqnetOnly occasionally" = "Only occasionally", "freqnetA few times a week" = "A few times a week", "freqnetMost days" = "Most days", "freqnetEvery day" = "Every day", "genBaby Boomers" = "Baby Boomers", "genGen X" = "Gen X", "genMillennials" = "Millennials"), # renaming for a clearer output
# statistic = NULL,
# title = "Table 2: Regression models predicting subjective well-being",
# output = "flextable")
# flextable::save_as_docx(reg_model_sum, path = "Kim_Eric_Research_Project_Table_2.docx", width = 7.0, height = 7.0)
reg_model4 <- lm(subwel ~ freqnet + gen + freqnet*gen, data = rp, weights = weight) # MLR + Interaction
interaction_plot <- effect("freqnet*gen", reg_model4, na.rm=TRUE)
plot(interaction_plot,
main="Figure 2: Interaction effect of freqnet and gen on subwel",
xlab="Frequency of internet use",
ylab="Subjective well-being scale")
I note that there is almost no, if not at all, interaction effect between the variables freqnet and gen.
# Below is what I did for checking model fit, using the performance package
# check_model(reg_model1)
# check_model(reg_model2)
# check_model(reg_model3)
# check_model(reg_model4)