GSS_assignment

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(readxl)                   # package to open excel files
library(plotly)                   # a graphics package, and alternative to ggplot2


Attaching package: 'plotly'

The following object is masked from 'package:ggplot2':

    last_plot

The following object is masked from 'package:stats':

    filter

The following object is masked from 'package:graphics':

    layout

survey <- read_excel("R Class/GSS.xlsx")

glimpse(survey)

Rows: 6,309
Columns: 16
$ year     <dbl> 2002, 2002, 2002, 2002, 2002, 2002, 2002, 2002, 2002, 2002, 2…
$ id_      <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18…
$ hrs2     <chr> ".i:  Inapplicable", ".i:  Inapplicable", ".i:  Inapplicable"…
$ childs   <chr> "0", "1", "1", "1", "2", "1", "2", "2", "2", "0", "2", "3", "…
$ age      <chr> "25", "43", "30", "55", "37", "47", "57", "71", "46", "19", "…
$ sex      <chr> "FEMALE", "MALE", "FEMALE", "FEMALE", "MALE", "MALE", "FEMALE…
$ race     <chr> "White", "White", "White", "White", "White", "White", "White"…
$ courts   <chr> "About right", "Not harshly enough", ".i:  Inapplicable", ".i…
$ relig    <chr> "Inter-nondenominational", "Protestant", "Protestant", "Prote…
$ attend   <chr> "About once or twice a year", "About once a month", "Every we…
$ hapmar   <chr> ".i:  Inapplicable", "PRETTY HAPPY", ".i:  Inapplicable", ".i…
$ class_   <chr> "Middle class", "Middle class", "Working class", "Upper class…
$ premarsx <chr> "ALWAYS WRONG", ".i:  Inapplicable", ".i:  Inapplicable", ".i…
$ xmarsex  <chr> "ALWAYS WRONG", "ALWAYS WRONG", ".i:  Inapplicable", ".i:  In…
$ spanking <chr> "STRONGLY AGREE", ".i:  Inapplicable", ".i:  Inapplicable", "…
$ ballot   <chr> "Ballot a", "Ballot c", "Ballot a", "Ballot b", "Ballot c", "…

survey %>% 
  mutate(race = as_factor(race)) %>%  
  
  mutate(race = fct_recode(race,
                            NULL = ".d:  Do not Know/Cannot Choose",
                            NULL = ".n:  No answer",
                            NULL = ".s:  Skipped on Web",
                            NULL = ".i:  Inapplicable")) %>% 

  drop_na(race) %>% 
  
  mutate(race = fct_infreq(race)) %>% 
  
  plot_ly(x = ~race) %>% 
  add_histogram()

The histogram shows that the majority of survey respondents are white (4702) followed by black (975), and other (579).

survey %>% 
  mutate(spanking = as_factor(spanking)) %>%  
  
  mutate(spanking = fct_recode(spanking,
                            NULL = ".d:  Do not Know/Cannot Choose",
                            NULL = ".n:  No answer",
                            NULL = ".s:  Skipped on Web", 
                            NULL = ".i:  Inapplicable")) %>% 

  drop_na(spanking) %>% 
  
  mutate(spanking = fct_infreq(spanking)) %>% 
  mutate(race =fct_recode(race,
                          NULL = ".i:  Inapplicable")) %>% 
  drop_na(race) %>% 
  
  plot_ly(x = ~spanking, color = ~ race) %>% 
  add_histogram()

The histogram shows that white respondents have more varied opinions both for and against spanking than other races. Whereas black respondants are the least likely to strongly disagree with spanking.

survey %>% 
  
   mutate(race = as_factor(race)) %>% 
  
  mutate(race = fct_recode(race,
                            NULL = ".d:  Do not Know/Cannot Choose",
                            NULL = ".n:  No answer",
                            NULL = ".s:  Skipped on Web", 
                            NULL = ".i:  Inapplicable")) %>%
  
   mutate(race = fct_collapse(race,
                              "Other" = c("white","black", "other", "inapplicable"))) %>%
  drop_na(race) %>%
  
  mutate(race = fct_infreq(race)) %>%
  
  
  mutate(spanking = as_factor(spanking)) %>%
    mutate(spanking = fct_recode(spanking,
                            NULL = ".d:  Do not Know/Cannot Choose",
                            NULL = ".i:  Inapplicable",
                            NULL = ".n:  No answer",
                            NULL = ".s:  Skipped on Web")) %>%
  
    mutate(spanking = fct_relevel(spanking,
                              c("inapplicable", "agree", "disagree", "strongly disagree", "strongly agree"))) %>%
  drop_na(spanking) %>% 
  
  mutate(spanking = as.numeric(spanking)) %>%
  

plot_ly(x = ~race, y = ~spanking) %>%
  add_boxplot()

Warning: There was 1 warning in `mutate()`.
ℹ In argument: `race = fct_collapse(race, Other = c("white", "black", "other",
  "inapplicable"))`.
Caused by warning:
! Unknown levels in `f`: white, black, other, inapplicable

Warning: There was 1 warning in `mutate()`.
ℹ In argument: `spanking = fct_relevel(...)`.
Caused by warning:
! 5 unknown levels in `f`: inapplicable, agree, disagree, strongly disagree, and
strongly agree

The box plot shows that black respondents are more likely to strongly agree with spanking as a form of punishment than both white or other races.

survey %>% 
  mutate(childs = as_factor(childs)) %>%  
  
   mutate(childs = fct_recode(childs,
                          "8 or more" = "8",
                            NULL = ".d:  Do not Know/Cannot Choose",
                            NULL = ".n:  No answer",
                            NULL = ".s:  Skipped on Web",
                            NULL = ".i:  Inapplicable")) %>% 

  drop_na(childs) %>% 
  
  mutate(childs = fct_infreq(childs)) %>% 
  mutate(childs = as.numeric(childs)) %>%
  
  plot_ly(x = ~childs) %>% 
  add_histogram()

Warning: There was 1 warning in `mutate()`.
ℹ In argument: `childs = fct_recode(...)`.
Caused by warning:
! Unknown levels in `f`: 8

The histogram shows people are more likely to have lesser amounts of children. In this study the most popular amount was 1 child (1906 respondents) and the least popular amount of children was 10 (11 respondents).

survey %>% 
  
   mutate(spanking = as_factor(spanking)) %>% 
  
  mutate(spanking = fct_recode(spanking,
                            NULL = ".d:  Do not Know/Cannot Choose",
                            NULL = ".n:  No answer",
                            NULL = ".s:  Skipped on Web", 
                            NULL = ".i:  Inapplicable")) %>%
  
   mutate(spanking = fct_collapse(spanking,
                              "Other" = c("inapplicable", "agree", "disagree", "strongly agree", "strongly disagree"))) %>%
  drop_na(spanking) %>%
  
  mutate(childs = fct_infreq(childs)) %>%
  
  
  mutate(childs = as_factor(childs)) %>%
    mutate(childs = fct_recode(childs,
                            NULL = ".d:  Do not Know/Cannot Choose",
                            NULL = ".i:  Inapplicable",
                            NULL = ".n:  No answer",
                            NULL = ".s:  Skipped on Web")) %>%
  
    mutate(childs = fct_relevel(childs,
                              c("inapplicable", "agree", "disagree", "strongly disagree", "strongly agree"))) %>%
  drop_na(childs) %>% 
  
  mutate(childs = as.numeric(childs)) %>%
  

plot_ly(x = ~spanking, y = ~childs) %>%
  add_boxplot()

Warning: There was 1 warning in `mutate()`.
ℹ In argument: `spanking = fct_collapse(...)`.
Caused by warning:
! Unknown levels in `f`: inapplicable, agree, disagree, strongly agree, strongly disagree

Warning: There was 1 warning in `mutate()`.
ℹ In argument: `childs = fct_relevel(...)`.
Caused by warning:
! 5 unknown levels in `f`: inapplicable, agree, disagree, strongly disagree, and
strongly agree

The box-plot reports that people with less children are more likely to disagree with spanking than those with more children.

survey %>% 
  mutate(hapmar = as_factor(hapmar)) %>%  
  
  mutate(hapmar = fct_recode(hapmar,
                            NULL = ".d:  Do not Know/Cannot Choose",
                            NULL = ".n:  No answer",
                            NULL = ".s:  Skipped on Web", 
                            NULL = ".i:  Inapplicable")) %>% 

  drop_na(hapmar) %>% 
  
  mutate(hapmar = fct_infreq(hapmar)) %>% 
  
  plot_ly(x = ~hapmar) %>% 
  add_histogram()

The histogram shows that the majority of respondents report that they are very happy (1251), while 717 respondents are pretty happy, and 90 respondents are not too happy.

survey %>% 
mutate(class_ = factor(class_, levels = c("Lower class", "Working class", "Middle class", "Upper class"))) %>%  
  
  mutate(class_ = fct_recode(class_,
                            NULL = ".d:  Do not Know/Cannot Choose",
                            NULL = ".n:  No answer",
                            NULL = ".s:  Skipped on Web", 
                            NULL = ".i:  Inapplicable")) %>% 
 

  drop_na(class_) %>% 
  
  mutate(class_ = fct_infreq(class_)) %>% 
  
  plot_ly(x = ~class_) %>% 
  add_histogram()

The histogram shows that the majority of participants are in the middle class (2749) range with working class (2702), lower class (547), and upper class (246) trailing behind.

survey %>% 
  mutate(class_ = as_factor(class_)) %>% 
  mutate(class_ = fct_recode(class_,
                            NULL = ".d:  Do not Know/Cannot Choose",
                            NULL = ".n:  No answer",
                            NULL = ".s:  Skipped on Web", 
                            NULL = ".i:  Inapplicable")) %>% 
  
  
  drop_na(class_) %>% 
  
  mutate(class_ = fct_infreq(class_)) %>% 
  
  mutate(hapmar = as_factor(hapmar)) %>% 
  
  mutate(hapmar = fct_recode(hapmar,
                          NULL = ".d:  Do not Know/Cannot Choose",
                          NULL = ".s:  Skipped on Web",
                          NULL = ".n:  No answer", 
                          NULL = ".i:  Inapplicable")) %>% 
  
  drop_na(hapmar) %>% 

  plot_ly(x = ~class_, color = ~hapmar) %>% 
  add_histogram()

The histogram reports that the middle class has the most happiness within their marriage followed by the working class, upper class, and finally lower class.

survey %>% 
  
mutate(age = as_factor(age)) %>%  
mutate(age = fct_recode(age,
                            NULL = ".n:  No answer",
                        NULL = "89 or older")) %>% 
mutate(age = fct_collapse(age,
                              "Under 30" = c("18",
                                          "19",
                                          "20",
                                          "21",
                                          "22",
                                          "23",
                                          "24",
                                          "25",
                                          "26",
                                          "27",
                                          "28",
                                          "29"),
                              "30s" = c("30", "31", "32", "33", "34", "35", "36", "37", "38", "39"),
                          "40s" = c("40", "41", "42", "43", "44", "45", "46", "47", "48", "49"),
                          "50s" = c("50", "51", "52", "53", "54", "55", "56", "57", "58", "59"),
                          "60s" = c("60", "61", "62", "63", "64", "65", "66", "67", "68", "69"),
                          "70s"= c("70", "71", "72", "73", "74", "75", "76", "77", "78", "79"),
                          "80 and up" = c("80", "81", "82", "83", "84", "85", "86", "87", "88", "89"))) %>%  
  
  plot_ly(x=~age) %>% 
  add_histogram()

Warning: There was 1 warning in `mutate()`.
ℹ In argument: `age = fct_collapse(...)`.
Caused by warning:
! Unknown levels in `f`: 89

Warning: Ignoring 254 observations

The histogram shows the various age groups of participants in the study.

survey %>% 
  
mutate(age = as_factor(age)) %>%  
mutate(age = fct_recode(age,
                            NULL = ".n:  No answer",
                        NULL = "89 or older")) %>% 
mutate(age = fct_collapse(age,
                              "Under 30" = c("18",
                                          "19",
                                          "20",
                                          "21",
                                          "22",
                                          "23",
                                          "24",
                                          "25",
                                          "26",
                                          "27",
                                          "28",
                                          "29"),
                              "30s" = c("30", "31", "32", "33", "34", "35", "36", "37", "38", "39"),
                          "40s" = c("40", "41", "42", "43", "44", "45", "46", "47", "48", "49"),
                          "50s" = c("50", "51", "52", "53", "54", "55", "56", "57", "58", "59"),
                          "60s" = c("60", "61", "62", "63", "64", "65", "66", "67", "68", "69"),
                          "70s"= c("70", "71", "72", "73", "74", "75", "76", "77", "78", "79"),
                          "80 and up" = c("80", "81", "82", "83", "84", "85", "86", "87", "88", "89"))) %>% 
  
  
  
  mutate(hapmar = as_factor(hapmar)) %>% 
  
  mutate(hapmar = fct_recode(hapmar,
                            NULL = ".d:  Do not Know/Cannot Choose",
                            NULL = ".i:  Inapplicable",
                            NULL = ".n:  No answer",
                            NULL = ".s:  Skipped on Web")) %>% 

  drop_na(hapmar) %>%  
  mutate(class_=fct_infreq(hapmar)) %>% 
  

  plot_ly(x = ~hapmar, y = ~age) %>% 
  add_histogram2d()

Warning: There was 1 warning in `mutate()`.
ℹ In argument: `age = fct_collapse(...)`.
Caused by warning:
! Unknown levels in `f`: 89

The heatmap is displaying the correlation between age and happiness within their marriage. Participants in there 30s reported being the happiest with their spouse.