# Make sure the `tidyverse` package is installed.
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
df <- read_csv("Data Science Assignment 2 CSV File.csv")
## Rows: 200 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Gender, Employment, Age, Income, Education
## dbl (11): SQ1, SQ2, SQ3, SQ4, SQ5, SQ6, SQ7, SQ8, SQ9, SQ10, SQ11
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# The data set has 64 rows and 16 columns
dim(df)
## [1] 200 16
# I also used this command to display the first six rows of my data set; this is useful for getting a quick glimpse of my data.
head(df)
## # A tibble: 6 × 16
## Gender Employment Age Income Education SQ1 SQ2 SQ3 SQ4 SQ5 SQ6
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Male Employed fu… 35-44 <NA> Master's… 5 4 3 5 3 5
## 2 Male Employed fu… 35-44 <NA> Master's… 5 5 5 5 5 5
## 3 Male Employed pa… 25-34 <NA> Master's… 5 5 4 4 5 5
## 4 Male Employed fu… 35-44 <NA> Master's… 5 5 4 3 2 4
## 5 Male Employed pa… 25-34 Less … Master's… 5 5 4 4 5 5
## 6 Female Employed fu… 45-54 $75,0… Master's… 3 4 3 3 3 3
## # ℹ 5 more variables: SQ7 <dbl>, SQ8 <dbl>, SQ9 <dbl>, SQ10 <dbl>, SQ11 <dbl>
# Using the summary command will help determine missing values in the data set. Even though I did not see NA's using the summary(df) command, I did see incomplete data for three respondents using the head() command in the above R code chuck.
summary(df)
## Gender Employment Age Income
## Length:200 Length:200 Length:200 Length:200
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Education SQ1 SQ2 SQ3
## Length:200 Min. :2.00 Min. :1.000 Min. :1.000
## Class :character 1st Qu.:4.00 1st Qu.:4.000 1st Qu.:3.000
## Mode :character Median :5.00 Median :4.000 Median :4.000
## Mean :4.51 Mean :4.165 Mean :3.945
## 3rd Qu.:5.00 3rd Qu.:5.000 3rd Qu.:5.000
## Max. :5.00 Max. :5.000 Max. :5.000
## SQ4 SQ5 SQ6 SQ7 SQ8
## Min. :1.000 Min. :1.0 Min. :1.00 Min. :1.000 Min. :1.00
## 1st Qu.:4.000 1st Qu.:3.0 1st Qu.:4.00 1st Qu.:4.000 1st Qu.:3.75
## Median :4.000 Median :4.0 Median :4.00 Median :5.000 Median :4.00
## Mean :4.115 Mean :3.9 Mean :4.09 Mean :4.305 Mean :3.94
## 3rd Qu.:5.000 3rd Qu.:5.0 3rd Qu.:5.00 3rd Qu.:5.000 3rd Qu.:5.00
## Max. :5.000 Max. :5.0 Max. :5.00 Max. :5.000 Max. :5.00
## SQ9 SQ10 SQ11
## Min. :1.00 Min. :1 Min. :1.000
## 1st Qu.:3.00 1st Qu.:4 1st Qu.:4.000
## Median :4.00 Median :4 Median :4.000
## Mean :3.92 Mean :4 Mean :4.155
## 3rd Qu.:5.00 3rd Qu.:5 3rd Qu.:5.000
## Max. :5.00 Max. :5 Max. :5.000
# The R command will eliminate rows with missing values.
df_no_na <- na.omit(df)
# 3 rows were eliminated due to missing values.
dim(df_no_na)
## [1] 196 16
summary(df_no_na)
## Gender Employment Age Income
## Length:196 Length:196 Length:196 Length:196
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Education SQ1 SQ2 SQ3
## Length:196 Min. :2.0 Min. :1.000 Min. :1.000
## Class :character 1st Qu.:4.0 1st Qu.:4.000 1st Qu.:3.000
## Mode :character Median :5.0 Median :4.000 Median :4.000
## Mean :4.5 Mean :4.153 Mean :3.944
## 3rd Qu.:5.0 3rd Qu.:5.000 3rd Qu.:5.000
## Max. :5.0 Max. :5.000 Max. :5.000
## SQ4 SQ5 SQ6 SQ7
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:3.000 1st Qu.:4.000 1st Qu.:4.000
## Median :4.000 Median :4.000 Median :4.000 Median :5.000
## Mean :4.112 Mean :3.903 Mean :4.077 Mean :4.316
## 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:5.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
## SQ8 SQ9 SQ10 SQ11
## Min. :1.000 Min. :1.000 Min. :1.00 Min. :1.000
## 1st Qu.:3.000 1st Qu.:3.000 1st Qu.:4.00 1st Qu.:4.000
## Median :4.000 Median :4.000 Median :4.00 Median :4.000
## Mean :3.929 Mean :3.903 Mean :3.99 Mean :4.143
## 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:5.00 3rd Qu.:5.000
## Max. :5.000 Max. :5.000 Max. :5.00 Max. :5.000
unique() command for each categorical
variable (Gender, Employment, Age, Income, and Education) to identify
the values in each column.unique(df_no_na$Gender)
## [1] "Male" "Female"
## [3] "Prefer not to say" "Non-binary/third gender"
unique(df_no_na$Employment)
## [1] "Employed part-time" "Employed full-time" "Other"
unique(df_no_na$Age)
## [1] "25-34" "45-54" "55-64" "35-44" "18-24" "65+"
unique(df_no_na$Income)
## [1] "Less than $25000" "$75,000-$99,999+" "$25,000-$49,999" "$50,000-$74,999"
unique(df_no_na$Education)
## [1] "Master's Degree" "Doctorate/Professional Degree"
## [3] "Bachelor's Degree" "High School/GED"
## [5] "Some College/Associate Degree"
factor(df_no_na$Gender)
## [1] Male Female Male
## [4] Male Female Female
## [7] Male Female Male
## [10] Female Male Male
## [13] Male Female Male
## [16] Prefer not to say Female Male
## [19] Female Male Female
## [22] Female Male Female
## [25] Female Female Female
## [28] Male Male Male
## [31] Female Female Male
## [34] Male Male Male
## [37] Male Female Male
## [40] Female Female Non-binary/third gender
## [43] Male Female Male
## [46] Female Female Female
## [49] Female Female Male
## [52] Male Female Female
## [55] Female Female Female
## [58] Female Male Female
## [61] Male Male Female
## [64] Male Male Female
## [67] Female Male Female
## [70] Male Female Male
## [73] Male Male Female
## [76] Male Prefer not to say Female
## [79] Male Female Male
## [82] Female Female Male
## [85] Male Male Female
## [88] Male Prefer not to say Female
## [91] Male Female Male
## [94] Female Female Male
## [97] Female Female Female
## [100] Female Male Male
## [103] Male Female Female
## [106] Male Male Male
## [109] Male Male Female
## [112] Male Female Female
## [115] Non-binary/third gender Male Female
## [118] Male Female Female
## [121] Female Female Female
## [124] Male Male Female
## [127] Female Female Female
## [130] Female Female Male
## [133] Male Female Female
## [136] Female Female Female
## [139] Female Male Female
## [142] Male Male Female
## [145] Male Male Female
## [148] Female Male Female
## [151] Male Female Male
## [154] Male Male Female
## [157] Male Prefer not to say Female
## [160] Male Female Male
## [163] Female Female Male
## [166] Male Male Female
## [169] Male Prefer not to say Female
## [172] Male Female Male
## [175] Female Female Male
## [178] Female Female Female
## [181] Female Male Male
## [184] Male Female Female
## [187] Male Male Female
## [190] Female Female Female
## [193] Female Male Male
## [196] Female
## Levels: Female Male Non-binary/third gender Prefer not to say
factor(df_no_na$Employment)
## [1] Employed part-time Employed full-time Employed full-time
## [4] Employed full-time Employed full-time Employed full-time
## [7] Employed full-time Employed full-time Employed full-time
## [10] Employed full-time Employed full-time Employed full-time
## [13] Employed full-time Employed full-time Employed full-time
## [16] Employed full-time Employed full-time Employed full-time
## [19] Employed full-time Employed full-time Employed part-time
## [22] Other Employed full-time Employed part-time
## [25] Employed part-time Employed full-time Employed full-time
## [28] Employed full-time Employed full-time Employed full-time
## [31] Employed full-time Employed full-time Employed part-time
## [34] Employed full-time Employed full-time Employed full-time
## [37] Employed full-time Employed full-time Employed full-time
## [40] Employed full-time Employed part-time Employed full-time
## [43] Employed full-time Employed full-time Employed full-time
## [46] Employed full-time Employed full-time Employed full-time
## [49] Employed full-time Employed part-time Employed full-time
## [52] Employed full-time Employed full-time Employed part-time
## [55] Employed full-time Employed full-time Employed full-time
## [58] Employed full-time Employed full-time Employed full-time
## [61] Employed full-time Employed part-time Employed full-time
## [64] Employed full-time Employed full-time Employed full-time
## [67] Employed full-time Employed full-time Employed full-time
## [70] Employed full-time Employed full-time Employed full-time
## [73] Employed full-time Employed full-time Employed full-time
## [76] Employed full-time Employed full-time Employed full-time
## [79] Employed full-time Employed full-time Employed full-time
## [82] Employed part-time Other Employed full-time
## [85] Employed full-time Employed full-time Employed full-time
## [88] Employed full-time Employed full-time Employed full-time
## [91] Employed full-time Employed full-time Employed full-time
## [94] Employed part-time Other Employed full-time
## [97] Employed part-time Employed part-time Employed full-time
## [100] Employed full-time Employed full-time Employed full-time
## [103] Employed full-time Employed full-time Employed full-time
## [106] Employed part-time Employed full-time Employed full-time
## [109] Employed full-time Employed full-time Employed full-time
## [112] Employed full-time Employed full-time Employed part-time
## [115] Employed full-time Employed full-time Employed full-time
## [118] Employed full-time Employed full-time Employed full-time
## [121] Employed full-time Employed full-time Employed part-time
## [124] Employed full-time Employed full-time Employed full-time
## [127] Employed part-time Employed full-time Employed full-time
## [130] Employed full-time Employed full-time Employed full-time
## [133] Employed full-time Employed full-time Employed part-time
## [136] Employed full-time Employed full-time Employed full-time
## [139] Employed full-time Employed full-time Employed full-time
## [142] Employed full-time Employed part-time Employed full-time
## [145] Employed full-time Employed full-time Employed full-time
## [148] Employed full-time Employed full-time Employed full-time
## [151] Employed full-time Employed full-time Employed full-time
## [154] Employed full-time Employed full-time Employed full-time
## [157] Employed full-time Employed full-time Employed full-time
## [160] Employed full-time Employed full-time Employed full-time
## [163] Employed part-time Other Employed full-time
## [166] Employed full-time Employed full-time Employed full-time
## [169] Employed full-time Employed full-time Employed full-time
## [172] Employed full-time Employed full-time Employed full-time
## [175] Employed part-time Other Employed full-time
## [178] Employed part-time Employed part-time Employed full-time
## [181] Employed full-time Employed full-time Employed full-time
## [184] Employed full-time Employed full-time Employed full-time
## [187] Employed part-time Employed full-time Employed full-time
## [190] Employed full-time Employed full-time Employed full-time
## [193] Employed part-time Employed full-time Employed full-time
## [196] Employed full-time
## Levels: Employed full-time Employed part-time Other
factor(df_no_na$Age)
## [1] 25-34 45-54 55-64 35-44 55-64 45-54 35-44 45-54 35-44 35-44 35-44 35-44
## [13] 35-44 35-44 55-64 25-34 35-44 45-54 35-44 55-64 18-24 25-34 35-44 45-54
## [25] 35-44 45-54 35-44 35-44 45-54 35-44 55-64 45-54 25-34 35-44 35-44 35-44
## [37] 45-54 45-54 45-54 35-44 65+ 35-44 35-44 45-54 55-64 45-54 55-64 25-34
## [49] 55-64 65+ 25-34 35-44 35-44 18-24 45-54 25-34 35-44 18-24 35-44 55-64
## [61] 35-44 25-34 45-54 55-64 35-44 55-64 45-54 35-44 45-54 35-44 35-44 35-44
## [73] 35-44 35-44 35-44 55-64 25-34 35-44 45-54 35-44 55-64 18-24 25-34 35-44
## [85] 35-44 35-44 35-44 55-64 25-34 35-44 45-54 35-44 55-64 18-24 25-34 35-44
## [97] 45-54 35-44 45-54 35-44 35-44 45-54 35-44 55-64 45-54 25-34 35-44 35-44
## [109] 35-44 45-54 45-54 45-54 35-44 65+ 35-44 35-44 45-54 55-64 45-54 55-64
## [121] 25-34 55-64 65+ 25-34 35-44 35-44 18-24 45-54 25-34 35-44 18-24 25-34
## [133] 35-44 35-44 18-24 45-54 25-34 35-44 18-24 35-44 55-64 35-44 25-34 45-54
## [145] 55-64 35-44 55-64 45-54 35-44 45-54 35-44 35-44 35-44 35-44 35-44 35-44
## [157] 55-64 25-34 35-44 45-54 35-44 55-64 18-24 25-34 35-44 35-44 35-44 35-44
## [169] 55-64 25-34 35-44 45-54 35-44 55-64 18-24 25-34 35-44 45-54 35-44 45-54
## [181] 35-44 35-44 45-54 35-44 55-64 45-54 25-34 55-64 45-54 55-64 25-34 55-64
## [193] 65+ 25-34 35-44 35-44
## Levels: 18-24 25-34 35-44 45-54 55-64 65+
factor(df_no_na$Income)
## [1] Less than $25000 $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+
## [5] $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+
## [9] $25,000-$49,999 $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+
## [13] $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+ $25,000-$49,999
## [17] $50,000-$74,999 $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+
## [21] Less than $25000 $50,000-$74,999 $75,000-$99,999+ $50,000-$74,999
## [25] $50,000-$74,999 $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+
## [29] $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+
## [33] $50,000-$74,999 $50,000-$74,999 $75,000-$99,999+ $75,000-$99,999+
## [37] $50,000-$74,999 $75,000-$99,999+ $50,000-$74,999 $75,000-$99,999+
## [41] $75,000-$99,999+ $50,000-$74,999 $75,000-$99,999+ $50,000-$74,999
## [45] $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+
## [49] $75,000-$99,999+ $75,000-$99,999+ $50,000-$74,999 $50,000-$74,999
## [53] $50,000-$74,999 Less than $25000 $75,000-$99,999+ $75,000-$99,999+
## [57] $50,000-$74,999 Less than $25000 $75,000-$99,999+ $75,000-$99,999+
## [61] $75,000-$99,999+ Less than $25000 $75,000-$99,999+ $75,000-$99,999+
## [65] $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+
## [69] $75,000-$99,999+ $25,000-$49,999 $75,000-$99,999+ $75,000-$99,999+
## [73] $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+
## [77] $25,000-$49,999 $50,000-$74,999 $75,000-$99,999+ $75,000-$99,999+
## [81] $75,000-$99,999+ Less than $25000 $50,000-$74,999 $75,000-$99,999+
## [85] $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+
## [89] $25,000-$49,999 $50,000-$74,999 $75,000-$99,999+ $75,000-$99,999+
## [93] $75,000-$99,999+ Less than $25000 $50,000-$74,999 $75,000-$99,999+
## [97] $50,000-$74,999 $50,000-$74,999 $75,000-$99,999+ $75,000-$99,999+
## [101] $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+
## [105] $75,000-$99,999+ $50,000-$74,999 $50,000-$74,999 $75,000-$99,999+
## [109] $75,000-$99,999+ $50,000-$74,999 $75,000-$99,999+ $50,000-$74,999
## [113] $75,000-$99,999+ $75,000-$99,999+ $50,000-$74,999 $75,000-$99,999+
## [117] $50,000-$74,999 $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+
## [121] $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+ $50,000-$74,999
## [125] $50,000-$74,999 $50,000-$74,999 Less than $25000 $75,000-$99,999+
## [129] $75,000-$99,999+ $50,000-$74,999 Less than $25000 $50,000-$74,999
## [133] $50,000-$74,999 $50,000-$74,999 Less than $25000 $75,000-$99,999+
## [137] $75,000-$99,999+ $50,000-$74,999 Less than $25000 $75,000-$99,999+
## [141] $75,000-$99,999+ $75,000-$99,999+ Less than $25000 $75,000-$99,999+
## [145] $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+
## [149] $75,000-$99,999+ $75,000-$99,999+ $25,000-$49,999 $75,000-$99,999+
## [153] $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+
## [157] $75,000-$99,999+ $25,000-$49,999 $50,000-$74,999 $75,000-$99,999+
## [161] $75,000-$99,999+ $75,000-$99,999+ Less than $25000 $50,000-$74,999
## [165] $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+
## [169] $75,000-$99,999+ $25,000-$49,999 $50,000-$74,999 $75,000-$99,999+
## [173] $75,000-$99,999+ $75,000-$99,999+ Less than $25000 $50,000-$74,999
## [177] $75,000-$99,999+ $50,000-$74,999 $50,000-$74,999 $75,000-$99,999+
## [181] $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+
## [185] $75,000-$99,999+ $75,000-$99,999+ $50,000-$74,999 $75,000-$99,999+
## [189] $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+
## [193] $75,000-$99,999+ $50,000-$74,999 $50,000-$74,999 $50,000-$74,999
## 4 Levels: $25,000-$49,999 $50,000-$74,999 ... Less than $25000
factor(df_no_na$Education)
## [1] Master's Degree Master's Degree
## [3] Doctorate/Professional Degree Doctorate/Professional Degree
## [5] Doctorate/Professional Degree Master's Degree
## [7] Master's Degree Master's Degree
## [9] Doctorate/Professional Degree Doctorate/Professional Degree
## [11] Master's Degree Doctorate/Professional Degree
## [13] Doctorate/Professional Degree Master's Degree
## [15] Bachelor's Degree High School/GED
## [17] Master's Degree Master's Degree
## [19] Doctorate/Professional Degree Master's Degree
## [21] Some College/Associate Degree Bachelor's Degree
## [23] Master's Degree Bachelor's Degree
## [25] Bachelor's Degree Master's Degree
## [27] Master's Degree Master's Degree
## [29] Master's Degree Master's Degree
## [31] Master's Degree Master's Degree
## [33] Master's Degree Master's Degree
## [35] Master's Degree Doctorate/Professional Degree
## [37] Bachelor's Degree Master's Degree
## [39] Doctorate/Professional Degree Doctorate/Professional Degree
## [41] Doctorate/Professional Degree Master's Degree
## [43] Doctorate/Professional Degree Master's Degree
## [45] Master's Degree Master's Degree
## [47] Doctorate/Professional Degree Master's Degree
## [49] Doctorate/Professional Degree Doctorate/Professional Degree
## [51] Bachelor's Degree Doctorate/Professional Degree
## [53] Master's Degree Some College/Associate Degree
## [55] Master's Degree Master's Degree
## [57] Master's Degree Bachelor's Degree
## [59] Master's Degree Doctorate/Professional Degree
## [61] Master's Degree Master's Degree
## [63] Master's Degree Doctorate/Professional Degree
## [65] Doctorate/Professional Degree Doctorate/Professional Degree
## [67] Master's Degree Master's Degree
## [69] Master's Degree Doctorate/Professional Degree
## [71] Doctorate/Professional Degree Master's Degree
## [73] Doctorate/Professional Degree Doctorate/Professional Degree
## [75] Master's Degree Bachelor's Degree
## [77] High School/GED Master's Degree
## [79] Master's Degree Doctorate/Professional Degree
## [81] Master's Degree Some College/Associate Degree
## [83] Bachelor's Degree Master's Degree
## [85] Doctorate/Professional Degree Doctorate/Professional Degree
## [87] Master's Degree Bachelor's Degree
## [89] High School/GED Master's Degree
## [91] Master's Degree Doctorate/Professional Degree
## [93] Master's Degree Some College/Associate Degree
## [95] Bachelor's Degree Master's Degree
## [97] Bachelor's Degree Bachelor's Degree
## [99] Master's Degree Master's Degree
## [101] Master's Degree Master's Degree
## [103] Master's Degree Master's Degree
## [105] Master's Degree Master's Degree
## [107] Master's Degree Master's Degree
## [109] Doctorate/Professional Degree Bachelor's Degree
## [111] Master's Degree Doctorate/Professional Degree
## [113] Doctorate/Professional Degree Doctorate/Professional Degree
## [115] Master's Degree Doctorate/Professional Degree
## [117] Master's Degree Master's Degree
## [119] Master's Degree Doctorate/Professional Degree
## [121] Master's Degree Doctorate/Professional Degree
## [123] Doctorate/Professional Degree Bachelor's Degree
## [125] Doctorate/Professional Degree Master's Degree
## [127] Some College/Associate Degree Master's Degree
## [129] Master's Degree Master's Degree
## [131] Bachelor's Degree Bachelor's Degree
## [133] Doctorate/Professional Degree Master's Degree
## [135] Some College/Associate Degree Master's Degree
## [137] Master's Degree Master's Degree
## [139] Bachelor's Degree Master's Degree
## [141] Doctorate/Professional Degree Master's Degree
## [143] Master's Degree Master's Degree
## [145] Doctorate/Professional Degree Doctorate/Professional Degree
## [147] Doctorate/Professional Degree Master's Degree
## [149] Master's Degree Master's Degree
## [151] Doctorate/Professional Degree Doctorate/Professional Degree
## [153] Master's Degree Doctorate/Professional Degree
## [155] Doctorate/Professional Degree Master's Degree
## [157] Bachelor's Degree High School/GED
## [159] Master's Degree Master's Degree
## [161] Doctorate/Professional Degree Master's Degree
## [163] Some College/Associate Degree Bachelor's Degree
## [165] Master's Degree Doctorate/Professional Degree
## [167] Doctorate/Professional Degree Master's Degree
## [169] Bachelor's Degree High School/GED
## [171] Master's Degree Master's Degree
## [173] Doctorate/Professional Degree Master's Degree
## [175] Some College/Associate Degree Bachelor's Degree
## [177] Master's Degree Bachelor's Degree
## [179] Bachelor's Degree Master's Degree
## [181] Master's Degree Master's Degree
## [183] Master's Degree Master's Degree
## [185] Master's Degree Master's Degree
## [187] Master's Degree Master's Degree
## [189] Master's Degree Doctorate/Professional Degree
## [191] Master's Degree Doctorate/Professional Degree
## [193] Doctorate/Professional Degree Bachelor's Degree
## [195] Doctorate/Professional Degree Master's Degree
## 5 Levels: Bachelor's Degree Doctorate/Professional Degree ... Some College/Associate Degree
factor(df_no_na$Income, levels = c("Less than $25000", "$25,000-$49,999", "$50,000-$74,999", "$75,000-$99,999+"))
## [1] Less than $25000 $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+
## [5] $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+
## [9] $25,000-$49,999 $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+
## [13] $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+ $25,000-$49,999
## [17] $50,000-$74,999 $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+
## [21] Less than $25000 $50,000-$74,999 $75,000-$99,999+ $50,000-$74,999
## [25] $50,000-$74,999 $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+
## [29] $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+
## [33] $50,000-$74,999 $50,000-$74,999 $75,000-$99,999+ $75,000-$99,999+
## [37] $50,000-$74,999 $75,000-$99,999+ $50,000-$74,999 $75,000-$99,999+
## [41] $75,000-$99,999+ $50,000-$74,999 $75,000-$99,999+ $50,000-$74,999
## [45] $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+
## [49] $75,000-$99,999+ $75,000-$99,999+ $50,000-$74,999 $50,000-$74,999
## [53] $50,000-$74,999 Less than $25000 $75,000-$99,999+ $75,000-$99,999+
## [57] $50,000-$74,999 Less than $25000 $75,000-$99,999+ $75,000-$99,999+
## [61] $75,000-$99,999+ Less than $25000 $75,000-$99,999+ $75,000-$99,999+
## [65] $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+
## [69] $75,000-$99,999+ $25,000-$49,999 $75,000-$99,999+ $75,000-$99,999+
## [73] $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+
## [77] $25,000-$49,999 $50,000-$74,999 $75,000-$99,999+ $75,000-$99,999+
## [81] $75,000-$99,999+ Less than $25000 $50,000-$74,999 $75,000-$99,999+
## [85] $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+
## [89] $25,000-$49,999 $50,000-$74,999 $75,000-$99,999+ $75,000-$99,999+
## [93] $75,000-$99,999+ Less than $25000 $50,000-$74,999 $75,000-$99,999+
## [97] $50,000-$74,999 $50,000-$74,999 $75,000-$99,999+ $75,000-$99,999+
## [101] $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+
## [105] $75,000-$99,999+ $50,000-$74,999 $50,000-$74,999 $75,000-$99,999+
## [109] $75,000-$99,999+ $50,000-$74,999 $75,000-$99,999+ $50,000-$74,999
## [113] $75,000-$99,999+ $75,000-$99,999+ $50,000-$74,999 $75,000-$99,999+
## [117] $50,000-$74,999 $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+
## [121] $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+ $50,000-$74,999
## [125] $50,000-$74,999 $50,000-$74,999 Less than $25000 $75,000-$99,999+
## [129] $75,000-$99,999+ $50,000-$74,999 Less than $25000 $50,000-$74,999
## [133] $50,000-$74,999 $50,000-$74,999 Less than $25000 $75,000-$99,999+
## [137] $75,000-$99,999+ $50,000-$74,999 Less than $25000 $75,000-$99,999+
## [141] $75,000-$99,999+ $75,000-$99,999+ Less than $25000 $75,000-$99,999+
## [145] $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+
## [149] $75,000-$99,999+ $75,000-$99,999+ $25,000-$49,999 $75,000-$99,999+
## [153] $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+
## [157] $75,000-$99,999+ $25,000-$49,999 $50,000-$74,999 $75,000-$99,999+
## [161] $75,000-$99,999+ $75,000-$99,999+ Less than $25000 $50,000-$74,999
## [165] $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+
## [169] $75,000-$99,999+ $25,000-$49,999 $50,000-$74,999 $75,000-$99,999+
## [173] $75,000-$99,999+ $75,000-$99,999+ Less than $25000 $50,000-$74,999
## [177] $75,000-$99,999+ $50,000-$74,999 $50,000-$74,999 $75,000-$99,999+
## [181] $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+
## [185] $75,000-$99,999+ $75,000-$99,999+ $50,000-$74,999 $75,000-$99,999+
## [189] $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+ $75,000-$99,999+
## [193] $75,000-$99,999+ $50,000-$74,999 $50,000-$74,999 $50,000-$74,999
## 4 Levels: Less than $25000 $25,000-$49,999 ... $75,000-$99,999+
factor(df_no_na$Education, levels = c("High School/GED", "Some College/Associate Degree", "Bachelor's Degree", "Master's Degree", "Doctorate/Professional Degree", "Other"))
## [1] Master's Degree Master's Degree
## [3] Doctorate/Professional Degree Doctorate/Professional Degree
## [5] Doctorate/Professional Degree Master's Degree
## [7] Master's Degree Master's Degree
## [9] Doctorate/Professional Degree Doctorate/Professional Degree
## [11] Master's Degree Doctorate/Professional Degree
## [13] Doctorate/Professional Degree Master's Degree
## [15] Bachelor's Degree High School/GED
## [17] Master's Degree Master's Degree
## [19] Doctorate/Professional Degree Master's Degree
## [21] Some College/Associate Degree Bachelor's Degree
## [23] Master's Degree Bachelor's Degree
## [25] Bachelor's Degree Master's Degree
## [27] Master's Degree Master's Degree
## [29] Master's Degree Master's Degree
## [31] Master's Degree Master's Degree
## [33] Master's Degree Master's Degree
## [35] Master's Degree Doctorate/Professional Degree
## [37] Bachelor's Degree Master's Degree
## [39] Doctorate/Professional Degree Doctorate/Professional Degree
## [41] Doctorate/Professional Degree Master's Degree
## [43] Doctorate/Professional Degree Master's Degree
## [45] Master's Degree Master's Degree
## [47] Doctorate/Professional Degree Master's Degree
## [49] Doctorate/Professional Degree Doctorate/Professional Degree
## [51] Bachelor's Degree Doctorate/Professional Degree
## [53] Master's Degree Some College/Associate Degree
## [55] Master's Degree Master's Degree
## [57] Master's Degree Bachelor's Degree
## [59] Master's Degree Doctorate/Professional Degree
## [61] Master's Degree Master's Degree
## [63] Master's Degree Doctorate/Professional Degree
## [65] Doctorate/Professional Degree Doctorate/Professional Degree
## [67] Master's Degree Master's Degree
## [69] Master's Degree Doctorate/Professional Degree
## [71] Doctorate/Professional Degree Master's Degree
## [73] Doctorate/Professional Degree Doctorate/Professional Degree
## [75] Master's Degree Bachelor's Degree
## [77] High School/GED Master's Degree
## [79] Master's Degree Doctorate/Professional Degree
## [81] Master's Degree Some College/Associate Degree
## [83] Bachelor's Degree Master's Degree
## [85] Doctorate/Professional Degree Doctorate/Professional Degree
## [87] Master's Degree Bachelor's Degree
## [89] High School/GED Master's Degree
## [91] Master's Degree Doctorate/Professional Degree
## [93] Master's Degree Some College/Associate Degree
## [95] Bachelor's Degree Master's Degree
## [97] Bachelor's Degree Bachelor's Degree
## [99] Master's Degree Master's Degree
## [101] Master's Degree Master's Degree
## [103] Master's Degree Master's Degree
## [105] Master's Degree Master's Degree
## [107] Master's Degree Master's Degree
## [109] Doctorate/Professional Degree Bachelor's Degree
## [111] Master's Degree Doctorate/Professional Degree
## [113] Doctorate/Professional Degree Doctorate/Professional Degree
## [115] Master's Degree Doctorate/Professional Degree
## [117] Master's Degree Master's Degree
## [119] Master's Degree Doctorate/Professional Degree
## [121] Master's Degree Doctorate/Professional Degree
## [123] Doctorate/Professional Degree Bachelor's Degree
## [125] Doctorate/Professional Degree Master's Degree
## [127] Some College/Associate Degree Master's Degree
## [129] Master's Degree Master's Degree
## [131] Bachelor's Degree Bachelor's Degree
## [133] Doctorate/Professional Degree Master's Degree
## [135] Some College/Associate Degree Master's Degree
## [137] Master's Degree Master's Degree
## [139] Bachelor's Degree Master's Degree
## [141] Doctorate/Professional Degree Master's Degree
## [143] Master's Degree Master's Degree
## [145] Doctorate/Professional Degree Doctorate/Professional Degree
## [147] Doctorate/Professional Degree Master's Degree
## [149] Master's Degree Master's Degree
## [151] Doctorate/Professional Degree Doctorate/Professional Degree
## [153] Master's Degree Doctorate/Professional Degree
## [155] Doctorate/Professional Degree Master's Degree
## [157] Bachelor's Degree High School/GED
## [159] Master's Degree Master's Degree
## [161] Doctorate/Professional Degree Master's Degree
## [163] Some College/Associate Degree Bachelor's Degree
## [165] Master's Degree Doctorate/Professional Degree
## [167] Doctorate/Professional Degree Master's Degree
## [169] Bachelor's Degree High School/GED
## [171] Master's Degree Master's Degree
## [173] Doctorate/Professional Degree Master's Degree
## [175] Some College/Associate Degree Bachelor's Degree
## [177] Master's Degree Bachelor's Degree
## [179] Bachelor's Degree Master's Degree
## [181] Master's Degree Master's Degree
## [183] Master's Degree Master's Degree
## [185] Master's Degree Master's Degree
## [187] Master's Degree Master's Degree
## [189] Master's Degree Doctorate/Professional Degree
## [191] Master's Degree Doctorate/Professional Degree
## [193] Doctorate/Professional Degree Bachelor's Degree
## [195] Doctorate/Professional Degree Master's Degree
## 6 Levels: High School/GED Some College/Associate Degree ... Other
df2 <- df_no_na %>%
mutate(Gender = factor(df_no_na$Gender)) %>%
mutate(Employment = factor(df_no_na$Employment)) %>%
mutate(Age = factor(df_no_na$Age)) %>%
mutate(Income = factor(df_no_na$Income, levels = c("Less than $25000", "$25,000-$49,999", "$50,000-$74,999", "$75,000-$99,999+"))) %>%
mutate(Education = factor(df_no_na$Education, levels = c("High School/GED", "Some College/Associate Degree", "Bachelor's Degree", "Master's Degree", "Doctorate/Professional Degree", "Other")))
head(df2)
## # A tibble: 6 × 16
## Gender Employment Age Income Education SQ1 SQ2 SQ3 SQ4 SQ5 SQ6
## <fct> <fct> <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Male Employed pa… 25-34 Less … Master's… 5 5 4 4 5 5
## 2 Female Employed fu… 45-54 $75,0… Master's… 3 4 3 3 3 3
## 3 Male Employed fu… 55-64 $75,0… Doctorat… 4 2 2 2 4 4
## 4 Male Employed fu… 35-44 $75,0… Doctorat… 5 5 4 3 4 5
## 5 Female Employed fu… 55-64 $75,0… Doctorat… 5 5 4 4 4 4
## 6 Female Employed fu… 45-54 $75,0… Master's… 5 5 5 4 4 4
## # ℹ 5 more variables: SQ7 <dbl>, SQ8 <dbl>, SQ9 <dbl>, SQ10 <dbl>, SQ11 <dbl>
summary(df2)
## Gender Employment Age
## Female :102 Employed full-time:166 18-24:11
## Male : 87 Employed part-time: 25 25-34:26
## Non-binary/third gender: 2 Other : 5 35-44:84
## Prefer not to say : 5 45-54:40
## 55-64:30
## 65+ : 5
## Income Education SQ1
## Less than $25000: 14 High School/GED : 5 Min. :2.0
## $25,000-$49,999 : 8 Some College/Associate Degree: 8 1st Qu.:4.0
## $50,000-$74,999 : 44 Bachelor's Degree : 25 Median :5.0
## $75,000-$99,999+:130 Master's Degree :103 Mean :4.5
## Doctorate/Professional Degree: 55 3rd Qu.:5.0
## Other : 0 Max. :5.0
## SQ2 SQ3 SQ4 SQ5
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:3.000 1st Qu.:4.000 1st Qu.:3.000
## Median :4.000 Median :4.000 Median :4.000 Median :4.000
## Mean :4.153 Mean :3.944 Mean :4.112 Mean :3.903
## 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:5.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
## SQ6 SQ7 SQ8 SQ9 SQ10
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.00
## 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:3.000 1st Qu.:3.000 1st Qu.:4.00
## Median :4.000 Median :5.000 Median :4.000 Median :4.000 Median :4.00
## Mean :4.077 Mean :4.316 Mean :3.929 Mean :3.903 Mean :3.99
## 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:5.00
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.00
## SQ11
## Min. :1.000
## 1st Qu.:4.000
## Median :4.000
## Mean :4.143
## 3rd Qu.:5.000
## Max. :5.000
apply(df2, 2, table)
## $Gender
##
## Female Male Non-binary/third gender
## 102 87 2
## Prefer not to say
## 5
##
## $Employment
##
## Employed full-time Employed part-time Other
## 166 25 5
##
## $Age
##
## 18-24 25-34 35-44 45-54 55-64 65+
## 11 26 84 40 30 5
##
## $Income
##
## $25,000-$49,999 $50,000-$74,999 $75,000-$99,999+ Less than $25000
## 8 44 130 14
##
## $Education
##
## Bachelor's Degree Doctorate/Professional Degree
## 25 55
## High School/GED Master's Degree
## 5 103
## Some College/Associate Degree
## 8
##
## $SQ1
##
## 2 3 4 5
## 3 12 65 116
##
## $SQ2
##
## 1 2 3 4 5
## 9 8 16 74 89
##
## $SQ3
##
## 1 2 3 4 5
## 5 21 24 76 70
##
## $SQ4
##
## 1 2 3 4 5
## 5 6 31 74 80
##
## $SQ5
##
## 1 2 3 4 5
## 5 10 39 87 55
##
## $SQ6
##
## 1 2 3 4 5
## 5 8 18 101 64
##
## $SQ7
##
## 1 2 3 4 5
## 5 10 15 54 112
##
## $SQ8
##
## 1 2 3 4 5
## 10 15 25 75 71
##
## $SQ9
##
## 1 2 3 4 5
## 8 14 35 71 68
##
## $SQ10
##
## 1 2 3 4 5
## 5 19 21 79 72
##
## $SQ11
##
## 1 2 3 4 5
## 5 12 16 80 83
apply(df2, 2, mean)
## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA
## Gender Employment Age Income Education SQ1 SQ2
## NA NA NA NA NA NA NA
## SQ3 SQ4 SQ5 SQ6 SQ7 SQ8 SQ9
## NA NA NA NA NA NA NA
## SQ10 SQ11
## NA NA
# Since Gender, Employment, Age, Income, and Education are non-numeric variables, we exclude them to compute mean correctly.
df3 <- df2 %>% select(-Gender, -Employment, -Age, -Income, -Education)
head(df3)
## # A tibble: 6 × 11
## SQ1 SQ2 SQ3 SQ4 SQ5 SQ6 SQ7 SQ8 SQ9 SQ10 SQ11
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 5 5 4 4 5 5 5 5 5 5 5
## 2 3 4 3 3 3 3 4 3 3 3 4
## 3 4 2 2 2 4 4 2 4 2 2 4
## 4 5 5 4 3 4 5 4 4 3 4 5
## 5 5 5 4 4 4 4 4 4 4 4 4
## 6 5 5 5 4 4 4 5 4 4 4 5
# We use the sort command to list the means listing the values from largest to smallest.
sort(apply(df3, 2, mean), decreasing = TRUE)
## SQ1 SQ7 SQ2 SQ11 SQ4 SQ6 SQ10 SQ3
## 4.500000 4.316327 4.153061 4.142857 4.112245 4.076531 3.989796 3.943878
## SQ8 SQ5 SQ9
## 3.928571 3.903061 3.903061
# Composite Score. For example, a 3.8 composite score indicates moderately comfortable with technology per person in the class.
df4 <- df2 %>%
mutate(composite_score = (SQ1+SQ2+SQ3+SQ4+SQ5+SQ6+SQ7+SQ8+SQ9+SQ10+SQ11)/11)
head(df4)
## # A tibble: 6 × 17
## Gender Employment Age Income Education SQ1 SQ2 SQ3 SQ4 SQ5 SQ6
## <fct> <fct> <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Male Employed pa… 25-34 Less … Master's… 5 5 4 4 5 5
## 2 Female Employed fu… 45-54 $75,0… Master's… 3 4 3 3 3 3
## 3 Male Employed fu… 55-64 $75,0… Doctorat… 4 2 2 2 4 4
## 4 Male Employed fu… 35-44 $75,0… Doctorat… 5 5 4 3 4 5
## 5 Female Employed fu… 55-64 $75,0… Doctorat… 5 5 4 4 4 4
## 6 Female Employed fu… 45-54 $75,0… Master's… 5 5 5 4 4 4
## # ℹ 6 more variables: SQ7 <dbl>, SQ8 <dbl>, SQ9 <dbl>, SQ10 <dbl>, SQ11 <dbl>,
## # composite_score <dbl>
# We will look at a summary of the data
summary(df4)
## Gender Employment Age
## Female :102 Employed full-time:166 18-24:11
## Male : 87 Employed part-time: 25 25-34:26
## Non-binary/third gender: 2 Other : 5 35-44:84
## Prefer not to say : 5 45-54:40
## 55-64:30
## 65+ : 5
## Income Education SQ1
## Less than $25000: 14 High School/GED : 5 Min. :2.0
## $25,000-$49,999 : 8 Some College/Associate Degree: 8 1st Qu.:4.0
## $50,000-$74,999 : 44 Bachelor's Degree : 25 Median :5.0
## $75,000-$99,999+:130 Master's Degree :103 Mean :4.5
## Doctorate/Professional Degree: 55 3rd Qu.:5.0
## Other : 0 Max. :5.0
## SQ2 SQ3 SQ4 SQ5
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:3.000 1st Qu.:4.000 1st Qu.:3.000
## Median :4.000 Median :4.000 Median :4.000 Median :4.000
## Mean :4.153 Mean :3.944 Mean :4.112 Mean :3.903
## 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:5.000
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000
## SQ6 SQ7 SQ8 SQ9 SQ10
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.00
## 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:3.000 1st Qu.:3.000 1st Qu.:4.00
## Median :4.000 Median :5.000 Median :4.000 Median :4.000 Median :4.00
## Mean :4.077 Mean :4.316 Mean :3.929 Mean :3.903 Mean :3.99
## 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:5.00
## Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.000 Max. :5.00
## SQ11 composite_score
## Min. :1.000 Min. :1.727
## 1st Qu.:4.000 1st Qu.:3.795
## Median :4.000 Median :4.182
## Mean :4.143 Mean :4.088
## 3rd Qu.:5.000 3rd Qu.:4.636
## Max. :5.000 Max. :5.000
# Box and Whisker Plot of Gender to Composite Score
df4 %>%
ggplot(aes(x=Gender, y=composite_score))+
geom_boxplot()+
labs(x="Gender", y="Composite Score")
# Box and Whisker Plot of Employment to Composite Score
df4 %>%
ggplot(aes(x=Employment, y=composite_score))+
geom_boxplot()+
labs(x="Employment", y="Composite Score")
# Box and Whisker Plot of Age to Composite Score
df4 %>%
ggplot(aes(x=Age, y=composite_score))+
geom_boxplot()+
labs(x="Age", y="Composite Score")
# Box and Whisker Plot of Income to Composite Score
df4 %>%
ggplot(aes(x=Income, y=composite_score))+
geom_boxplot()+
labs(x="Income", y="Composite Score")
# Histogram of participants answering survey question Number 5
df4 %>%
ggplot(aes(x=SQ5))+
geom_histogram(fill="white", color="black")+
labs(x="SQ5", y="Count", title="Number of Participants")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Histogram of participants answering survey question Number 6
df4 %>%
ggplot(aes(x=SQ6))+
geom_histogram(fill="white", color="black")+
labs(x="SQ6", y="Count", title="Number of Participants")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Box and Whisker Plot of Gender and how each person answered the Survey Question number 5
df4 %>%
ggplot(aes(x= Gender, y= SQ5))+
geom_boxplot()+
labs(x="Gender", y="Survey Question 5 Responses", title="Survey Question Number 5 across Gender")
# We will look at mean, median, and standard deviation of Survey Question 5
df4 %>%
group_by(Gender) %>%
summarize(mean = mean(SQ5),
median = median(SQ5),
sd = sd(SQ5))
## # A tibble: 4 × 4
## Gender mean median sd
## <fct> <dbl> <dbl> <dbl>
## 1 Female 3.86 4 1.03
## 2 Male 4.02 4 0.849
## 3 Non-binary/third gender 3 3 0
## 4 Prefer not to say 3 3 0
# We use the Shapiro test to see if the data is normally distributed
shapiro.test(df4$SQ5)
##
## Shapiro-Wilk normality test
##
## data: df4$SQ5
## W = 0.84542, p-value = 3.68e-13
# We use the Shapiro test to see if the data is normally distributed
shapiro.test(df4$SQ6)
##
## Shapiro-Wilk normality test
##
## data: df4$SQ6
## W = 0.77563, p-value = 4.911e-16
# Given that the data for both survey questions are not normally distributed, we will compare the medians instead using the Wilcox test.
wilcox.test(df4$SQ5, df4$SQ6)
##
## Wilcoxon rank sum test with continuity correction
##
## data: df4$SQ5 and df4$SQ6
## W = 17071, p-value = 0.03981
## alternative hypothesis: true location shift is not equal to 0
# Since I have 4 levels and not 2, I will use an ANOVA test to compare survey questions across Gender.
anova_result1 <- aov(SQ5 ~ Gender, data = df4)
summary(anova_result1)
## Df Sum Sq Mean Sq F value Pr(>F)
## Gender 3 7.13 2.3752 2.682 0.0481 *
## Residuals 192 170.03 0.8856
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
male_SQ5 <- df4 %>%
filter(Gender == "Male") %>%
pull(SQ5)
male_SQ5
## [1] 5 4 4 4 5 4 5 4 5 4 4 4 2 4 3 5 5 4 5 4 3 2 4 3 4 5 2 5 4 4 4 5 4 5 4 5 4 4
## [39] 4 5 4 5 4 4 4 2 4 3 5 5 4 5 4 3 2 4 3 4 3 4 5 2 5 4 4 4 5 4 5 4 5 4 4 4 5 4
## [77] 5 4 4 4 2 4 3 5 4 3 4
# We use the Shapiro test to see if the data is normally distributed
shapiro.test(male_SQ5)
##
## Shapiro-Wilk normality test
##
## data: male_SQ5
## W = 0.80292, p-value = 1.977e-09
female_SQ5 <- df4 %>%
filter(Gender == "Female") %>%
pull(SQ5)
female_SQ5
## [1] 3 4 4 4 5 4 5 1 4 5 4 3 5 5 4 2 3 4 3 4 5 3 5 4 3 3 4 5 3 4 4 5 3 4 4 4 5
## [38] 4 5 1 4 5 4 5 1 4 5 4 3 5 5 4 2 3 4 3 4 5 3 5 4 3 3 4 5 3 4 4 3 4 5 3 4 4
## [75] 5 3 4 4 4 5 4 5 1 4 5 4 5 1 4 5 4 3 5 5 4 2 5 3 5 4 3 3
# We use the Shapiro test to see if the data is normally distributed
shapiro.test(female_SQ5)
##
## Shapiro-Wilk normality test
##
## data: female_SQ5
## W = 0.83828, p-value = 3.472e-09
# Given that the groups are not normally distributed, we will compare the medians instead using the Wilcox test.
wilcox.test(male_SQ5, female_SQ5)
##
## Wilcoxon rank sum test with continuity correction
##
## data: male_SQ5 and female_SQ5
## W = 4765, p-value = 0.3497
## alternative hypothesis: true location shift is not equal to 0
# Since I have 4 levels and not 2, I will use an ANOVA test to compare the composite scores.
anova_result <- aov(composite_score ~ Gender, data = df4)
summary(anova_result)
## Df Sum Sq Mean Sq F value Pr(>F)
## Gender 3 15.47 5.156 10.76 1.44e-06 ***
## Residuals 192 92.02 0.479
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# I will use an ANOVA test again since I have 4 levels to compare the composite scores.
anova_results2 <- aov(composite_score ~ Education, data = df4)
summary(anova_results2)
## Df Sum Sq Mean Sq F value Pr(>F)
## Education 4 20.39 5.098 11.18 3.6e-08 ***
## Residuals 191 87.09 0.456
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# We will compute the effect size since the p value of 0.0662 gives us a marginal significance result given a the data set provided.
library(effectsize)
effectsize(anova_results2, type="omega")
## For one-way between subjects designs, partial omega squared is
## equivalent to omega squared. Returning omega squared.
## # Effect Size for ANOVA
##
## Parameter | Omega2 | 95% CI
## ---------------------------------
## Education | 0.17 | [0.09, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
# We will compute ANOVA TO compare the composite scores and income.
anova_result3 <- aov(composite_score ~ Income, data = df4)
summary(anova_result3)
## Df Sum Sq Mean Sq F value Pr(>F)
## Income 3 8.43 2.8109 5.449 0.00129 **
## Residuals 192 99.05 0.5159
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# We will compute the effect size.
library(effectsize)
effectsize(anova_result3, type="omega")
## For one-way between subjects designs, partial omega squared is
## equivalent to omega squared. Returning omega squared.
## # Effect Size for ANOVA
##
## Parameter | Omega2 | 95% CI
## ---------------------------------
## Income | 0.06 | [0.01, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
# We will compute ANOVA TO compare the composite scores and Age
anova_result4 <- aov(composite_score ~ Age, data = df4)
summary(anova_result4)
## Df Sum Sq Mean Sq F value Pr(>F)
## Age 5 1.36 0.2723 0.487 0.785
## Residuals 190 106.12 0.5586
# We will compute the effect size.
library(effectsize)
effectsize(anova_result4, type="omega")
## For one-way between subjects designs, partial omega squared is
## equivalent to omega squared. Returning omega squared.
## # Effect Size for ANOVA
##
## Parameter | Omega2 | 95% CI
## ---------------------------------
## Age | 0.00 | [0.00, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].