GSS_assignment

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)                   # package to open excel files
library(plotly)                   # a graphics package, and alternative to ggplot2 

Attaching package: 'plotly'

The following object is masked from 'package:ggplot2':

    last_plot

The following object is masked from 'package:stats':

    filter

The following object is masked from 'package:graphics':

    layout
survey <- read_excel("GSS_assignment.xlsx")
survey |> 
  mutate(race = as_factor(race)) |> 
  
  mutate(race = fct_recode(race,
                            NULL = ".i:  Inapplicable",)) |>

  drop_na(race) |> 
  
  mutate(race = fct_infreq(race)) |> 
  
  plot_ly(x = ~race) |> 
  add_histogram()
survey |> 
  mutate(spanking = as_factor(spanking)) |> 
  
  mutate(spanking = fct_recode(spanking,
                            NULL = ".d:  Do not Know/Cannot Choose",
                            NULL = ".i:  Inapplicable",
                            NULL = ".n:  No answer",
                            NULL = ".s:  Skipped on Web")) |> 

  drop_na(spanking) |> 
  
  mutate(spanking = fct_infreq(spanking)) |> 
  
  plot_ly(x = ~spanking) |> 
  add_histogram()
survey |> 
    mutate(spanking = as_factor(spanking)) |> 
  
    mutate(spanking = fct_recode(spanking,
                            NULL = ".d:  Do not Know/Cannot Choose",
                            NULL = ".i:  Inapplicable",
                            NULL = ".n:  No answer",
                            NULL = ".s:  Skipped on Web")) |> 
  
    mutate(spanking = fct_relevel(spanking,
                              c("STRONGLY AGREE",
                                "AGREE",
                                "DISAGREE",
                                "STRONGLY DISAGREE"))) |>  
  drop_na(spanking) |> 
  
  mutate(spanking = as.numeric(spanking)) |> 
  
  count(spanking)
# A tibble: 4 × 2
  spanking     n
     <dbl> <int>
1        1   706
2        2  1274
3        3   810
4        4   453
survey |> 
  
   mutate(race = as_factor(race)) |> 
  
  mutate(race = fct_recode(race,
                            NULL = ".i:  Inapplicable",)) |> 
  
  
  drop_na(race) |> 
  
  mutate(race = fct_infreq(race)) |> 
  
  
  mutate(spanking = as_factor(spanking)) |> 
    mutate(spanking = fct_recode(spanking,
                            NULL = ".d:  Do not Know/Cannot Choose",
                            NULL = ".i:  Inapplicable",
                            NULL = ".n:  No answer",
                            NULL = ".s:  Skipped on Web")) |> 
  
    mutate(spanking = fct_relevel(spanking,
                              c("STRONGLY AGREE",
                                "AGREE",
                                "DISAGREE",
                                "STRONGLY DISAGREE"))) |> 
  drop_na(spanking) |> 
  
  mutate(spanking = as.numeric(spanking)) |> 
  

plot_ly(x = ~race, y = ~spanking) |> 
  add_boxplot()
survey |> 
  mutate(childs = as_factor(childs)) |> 
  
  mutate(childs = fct_recode(childs,
                             "8" = "8 or more",
                            NULL = ".d:  Do not Know/Cannot Choose",
                            NULL = ".i:  Inapplicable")) |> 
  
    mutate(childs = fct_relevel(childs,
                              c("0",
                                "1",
                                "2",
                                "3",
                                "4",
                                "5",
                                "6",
                                "7",
                                "8"))) |> 
  
  drop_na(childs) |> 
  
 
  
  plot_ly(x = ~childs) |> 
  add_histogram()
survey |> 
  
   mutate(spanking = as_factor(spanking)) |> 
  
  mutate(spanking = fct_recode(spanking,
                            NULL = ".d:  Do not Know/Cannot Choose",
                            NULL = ".i:  Inapplicable",
                            NULL = ".n:  No answer",
                            NULL = ".s:  Skipped on Web")) |> 
  
  
  drop_na(spanking) |> 
  
  
  
  mutate(childs = as_factor(childs)) |> 
                             
  
    mutate(childs = fct_relevel(childs,
                              c("0",
                                "1",
                                "2",
                                "3",
                                "4",
                                "5",
                                "6",
                                "7",
                                "8"))) |> 
  drop_na(childs) |> 
  
  mutate(childs = as.numeric(childs)) |> 
  

plot_ly(x = ~spanking, y = ~childs) |> 
  add_boxplot()
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `childs = fct_relevel(...)`.
Caused by warning:
! 1 unknown level in `f`: 8
survey |> 
  mutate(hapmar = as_factor(hapmar)) |> 
  
  mutate(hapmar = fct_recode(hapmar,
                            NULL = ".d:  Do not Know/Cannot Choose",
                            NULL = ".i:  Inapplicable",
                            NULL = ".n:  No answer",
                            NULL = ".s:  Skipped on Web")) |> 

  drop_na(hapmar) |> 
  
  mutate(hapmar = fct_infreq(hapmar)) |> 
  
  plot_ly(x = ~hapmar) |> 
  add_histogram()
survey |> 
  mutate(class_ = as_factor(class_)) |> 
  
  mutate(class_ = fct_recode(class_,
                            NULL = ".d:  Do not Know/Cannot Choose",
                            NULL = ".n:  No answer",
                            NULL = ".s:  Skipped on Web")) |> 

  mutate(class_ = fct_relevel(class_,
                              c("Lower class",
                                "Working class",
                                "Middle class",
                                "Upper class"))) |> 
  drop_na(class_) |> 
  

  plot_ly(x = ~class_) |> 
  add_histogram()
survey |> 
  mutate(class_ = as_factor(class_)) |> 
  
  mutate(class_ = fct_recode(class_,
                            NULL = ".d:  Do not Know/Cannot Choose",
                            NULL = ".n:  No answer",
                            NULL = ".s:  Skipped on Web")) |> 
  
  mutate(class_ = fct_relevel(class_,
                              c("Lower class",
                                "Working class",
                                "Middle class",
                                "Upper class"))) |> 
  
  drop_na(class_) |> 
  

  mutate(hapmar = as_factor(hapmar)) |> 
  
  mutate(hapmar = fct_recode(hapmar,
                            NULL = ".d:  Do not Know/Cannot Choose",
                            NULL = ".i:  Inapplicable",
                            NULL = ".n:  No answer",
                            NULL = ".s:  Skipped on Web")) |> 
  
  drop_na(hapmar) |> 

  plot_ly(x = ~class_, color = ~hapmar) |> 
  add_histogram()
survey |> 
  mutate(age = as_factor(age)) |> 
  
  mutate(age = fct_recode(age,
                       NULL = ".n:  No answer")) |>
                            
                            
  
   mutate(age = fct_collapse(age,
    "under 30" = c("21", "22", "24", "23", "20", "18", "19", "25", "26", "27", "28", "29"), 
          "30s" = c("30", "31", "32", "33", "34","35", "36", "37", "38", "39"),
          "40s" = c("40", "41", "42", "43", "44","45", "46", "47", "48", "49"),
          "50s" = c("50", "51", "52", "53", "54","55", "56", "57", "58", "59"),
          "60s" = c("60", "61", "62", "63", "64","65", "66", "67", "68", "69"),
          "70s" = c("70", "71", "72", "73", "74","75", "76", "77", "78", "79"),  
          "80 and up" = c("80", "81", "82", "83", "84","85", "86", "87", "88", "89", "89 or older"))) |>
  
    mutate(age = fct_relevel(age,
                              c("under 30",
                                "30s",
                                "40s",
                                "50s",
                                "60s",
                                "70s",
                                "80 and up"))) |> 

  drop_na(age) |> 
  

  plot_ly(x = ~age) |> 
  add_histogram()
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `age = fct_collapse(...)`.
Caused by warning:
! Unknown levels in `f`: 89
survey |> 
  mutate(age = as_factor(age)) |> 
  
  mutate(age = fct_recode(age,
                       NULL = ".n:  No answer")) |>
                            
                            
  
   mutate(age = fct_collapse(age,
    "under 30" = c("21", "22", "24", "23", "20", "18", "19", "25", "26", "27", "28", "29"), 
          "30s" = c("30", "31", "32", "33", "34","35", "36", "37", "38", "39"),
          "40s" = c("40", "41", "42", "43", "44","45", "46", "47", "48", "49"),
          "50s" = c("50", "51", "52", "53", "54","55", "56", "57", "58", "59"),
          "60s" = c("60", "61", "62", "63", "64","65", "66", "67", "68", "69"),
          "70s" = c("70", "71", "72", "73", "74","75", "76", "77", "78", "79"),  
          "80 and up" = c("80", "81", "82", "83", "84","85", "86", "87", "88", "89", "89 or older"))) |>
  
    mutate(age = fct_relevel(age,
                              c("under 30",
                                "30s",
                                "40s",
                                "50s",
                                "60s",
                                "70s",
                                "80 and up"))) |> 

  drop_na(age) |> 
  
  
  
  mutate(hapmar = as_factor(hapmar)) |> 
    mutate(hapmar = fct_recode(hapmar,
                            NULL = ".d:  Do not Know/Cannot Choose",
                            NULL = ".i:  Inapplicable",
                            NULL = ".n:  No answer",
                            NULL = ".s:  Skipped on Web")) |> 
  

  drop_na(hapmar) |> 
  

  plot_ly(x = ~age, y = ~hapmar) |> 
  add_histogram2d()
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `age = fct_collapse(...)`.
Caused by warning:
! Unknown levels in `f`: 89