###
# Project: QMSS 201 Group Project
# Purpose: Group Project
# Author: Leah Adams, Diya Chadha, Ava Morisi
# Date: April 6 2025
# Data:
###
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ ggplot2 3.5.1 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.2.1
## ✔ purrr 1.0.4 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
recreation_data <- read.csv("recreation_data.csv", header=TRUE)
recreation_data1 <- recreation_data %>%
mutate(AGE = as.numeric(AGE),
LIMIT1_A2 = as.factor(x = LIMIT1_A2),
LIMIT1_A2 = recode(.x = LIMIT1_A2, "1" = "Limits Participation", "2" = "Limits Participation", "3" = "Does Not Limit Participation"),
INCOME = na_if(INCOME, 99),
INCOME = as.numeric(INCOME),
EMP1 = as.character(EMP1),
EMP1 = na_if(EMP1, "99"),
EMP1 = recode(EMP1, '1' = 1, '2' = 2, '3' = 3, '4' = 3, '5' = 4, '6' = 4),
EMP1 = as.numeric(EMP1))
renamed_recreation_data <- recreation_data1 %>%
rename("EmploymentStatus" = EMP1)%>%
rename("FinancialLimitations" = LIMIT1_A2)
table(renamed_recreation_data$EmploymentStatus)
##
## 1 2 3 4
## 1186 343 334 1046
levels(renamed_recreation_data$EmploymentStatus)
## NULL
summary(renamed_recreation_data$AGE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.00 39.00 57.00 55.77 69.00 99.00
summary(renamed_recreation_data$INCOME)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.000 3.000 5.000 4.781 6.000 8.000 568
summary(renamed_recreation_data$EmploymentStatus)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.000 1.000 2.000 2.426 4.000 4.000 121
library(descr)
freq(x = renamed_recreation_data$EmploymentStatus, plot = FALSE)
## renamed_recreation_data$EmploymentStatus
## Frequency Percent Valid Percent
## 1 1186 39.142 40.77
## 2 343 11.320 11.79
## 3 334 11.023 11.48
## 4 1046 34.521 35.96
## NA's 121 3.993
## Total 3030 100.000 100.00
freq(x = renamed_recreation_data$INCOME, plot = FALSE)
## renamed_recreation_data$INCOME
## Frequency Percent Valid Percent
## 1 163 5.380 6.621
## 2 226 7.459 9.180
## 3 272 8.977 11.048
## 4 351 11.584 14.257
## 5 521 17.195 21.162
## 6 366 12.079 14.866
## 7 369 12.178 14.988
## 8 194 6.403 7.880
## NA's 568 18.746
## Total 3030 100.000 100.000
freq(x = renamed_recreation_data$FinancialLimitations, plot = FALSE)
## renamed_recreation_data$FinancialLimitations
## Frequency Percent
## Limits Participation 1420 46.86
## Does Not Limit Participation 1610 53.14
## Total 3030 100.00
# recode gender variable
cleanr_recreation_data <- renamed_recreation_data %>%
mutate(AGE = na_if(AGE, 99),AGE = na_if(AGE, 4),AGE = na_if(AGE, 16))%>%
mutate(agecategories = case_when(
AGE >= 18 & AGE <= 29 ~ '18-29',
AGE >= 30 & AGE <= 39 ~ '30-39',
AGE >= 40 & AGE <= 49 ~ '40-49',
AGE >= 50 & AGE <= 59 ~ '50-59',
AGE >= 60 & AGE <= 69 ~ '60-69',
AGE >= 70 & AGE <= 98 ~ '70+',
TRUE ~ NA_character_ )) %>%
mutate(agecategories = as.factor(agecategories))
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:purrr':
##
## some
##
## The following object is masked from 'package:dplyr':
##
## recode
table(cleanr_recreation_data$AGE)
##
## 11 15 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
## 1 1 17 13 23 29 33 18 32 17 36 35 43 30 53 41 28 43 48 47 42 41 41 44 45 30
## 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
## 39 39 32 42 38 51 21 43 51 52 45 48 54 50 51 64 43 61 74 53 64 74 64 76 69 68
## 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
## 62 68 70 66 47 43 46 35 27 26 21 29 14 8 15 8 12 2 4 4 1 3 3 1 1 1
## 94
## 2
table(cleanr_recreation_data$agecategories)
##
## 18-29 30-39 40-49 50-59 60-69 70+
## 326 428 380 519 672 489
class(cleanr_recreation_data$AGE)
## [1] "numeric"
class(cleanr_recreation_data$agecategories)
## [1] "factor"
source("http://pcwww.liv.ac.uk/~william/R/crosstab.r")
crosstab(cleanr_recreation_data, row.vars = "agecategories", col.vars = "FinancialLimitations", type = "c")
## FinancialLimitations Limits Participation Does Not Limit Participation
## agecategories
## 18-29 18.65 5.35
## 30-39 19.71 11.24
## 40-49 15.69 11.57
## 50-59 17.29 19.46
## 60-69 18.35 28.76
## 70+ 10.31 23.61
## Sum 100.00 100.00
source("http://pcwww.liv.ac.uk/~william/R/crosstab.r")
crosstab(cleanr_recreation_data, row.vars = "INCOME", col.vars = "FinancialLimitations", type = "c")
## FinancialLimitations Limits Participation Does Not Limit Participation
## INCOME
## 1 10.68 2.77
## 2 13.94 4.67
## 3 12.85 9.34
## 4 17.11 11.55
## 5 19.12 23.10
## 6 11.19 18.35
## 7 11.60 18.20
## 8 3.51 12.03
## Sum 100.00 100.00
source("http://pcwww.liv.ac.uk/~william/R/crosstab.r")
crosstab(cleanr_recreation_data, row.vars = "EmploymentStatus", col.vars = "FinancialLimitations", type = "c")
## FinancialLimitations Limits Participation Does Not Limit Participation
## EmploymentStatus
## 1 41.07 40.51
## 2 14.23 9.68
## 3 16.53 7.12
## 4 28.17 42.69
## Sum 100.00 100.00
class("EmploymentStatus")
## [1] "character"
chisq.test(table(cleanr_recreation_data$EmploymentStatus, cleanr_recreation_data$FinancialLimitations))
##
## Pearson's Chi-squared test
##
## data: table(cleanr_recreation_data$EmploymentStatus, cleanr_recreation_data$FinancialLimitations)
## X-squared = 111.07, df = 3, p-value < 2.2e-16
class("INCOME")
## [1] "character"
chisq.test(table(cleanr_recreation_data$INCOME, cleanr_recreation_data$FinancialLimitations))
##
## Pearson's Chi-squared test
##
## data: table(cleanr_recreation_data$INCOME, cleanr_recreation_data$FinancialLimitations)
## X-squared = 236.43, df = 7, p-value < 2.2e-16
class("agecategories")
## [1] "character"
chisq.test(table(cleanr_recreation_data$agecategories, cleanr_recreation_data$FinancialLimitations))
##
## Pearson's Chi-squared test
##
## data: table(cleanr_recreation_data$agecategories, cleanr_recreation_data$FinancialLimitations)
## X-squared = 253.87, df = 5, p-value < 2.2e-16
cleanr_recreation_data1 <- cleanr_recreation_data %>%
mutate(INCOME = as.numeric(INCOME))
lm1<- lm(FinancialLimitations ~ EmploymentStatus + INCOME + AGE, data=cleanr_recreation_data1)
## Warning in model.response(mf, "numeric"): using type = "numeric" with a factor
## response will be ignored
## Warning in Ops.factor(y, z$residuals): '-' not meaningful for factors
lm1
##
## Call:
## lm(formula = FinancialLimitations ~ EmploymentStatus + INCOME +
## AGE, data = cleanr_recreation_data1)
##
## Coefficients:
## (Intercept) EmploymentStatus INCOME AGE
## 0.767205 -0.001543 0.068967 0.008255
glm1 <- glm(FinancialLimitations ~ EmploymentStatus + INCOME + AGE, data=cleanr_recreation_data1, family = binomial("logit"))
glm1
##
## Call: glm(formula = FinancialLimitations ~ EmploymentStatus + INCOME +
## AGE, family = binomial("logit"), data = cleanr_recreation_data1)
##
## Coefficients:
## (Intercept) EmploymentStatus INCOME AGE
## -3.42788 -0.01493 0.32151 0.03868
##
## Degrees of Freedom: 2372 Total (i.e. Null); 2369 Residual
## (657 observations deleted due to missingness)
## Null Deviance: 3286
## Residual Deviance: 2870 AIC: 2878
library(odds.n.ends)
odds.n.ends(glm1)
## Waiting for profiling to be done...
## $`Logistic regression model significance`
## Chi-squared d.f. p
## 416.532 3 <.001
##
## $`Contingency tables (model fit): frequency predicted`
## Number observed
## Number predicted 1 0 Sum
## 1 904 449 1353
## 0 327 693 1020
## Sum 1231 1142 2373
##
## $`Count R-squared (model fit): percent correctly predicted`
## [1] 67.29878
##
## $`Model sensitivity`
## [1] 0.7343623
##
## $`Model specificity`
## [1] 0.6068301
##
## $`Predictor odds ratios and 95% CI`
## OR 2.5 % 97.5 %
## (Intercept) 0.03245565 0.02161476 0.04822928
## EmploymentStatus 0.98518288 0.90767114 1.06909381
## INCOME 1.37921425 1.31228609 1.45081579
## AGE 1.03944290 1.03279444 1.04626248
library(pandoc)