GSS_assignment

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(readxl)                   # package to open excel files
library(plotly)                   # a graphics package, and alternative to ggplot2


Attaching package: 'plotly'

The following object is masked from 'package:ggplot2':

    last_plot

The following object is masked from 'package:stats':

    filter

The following object is masked from 'package:graphics':

    layout

survey <- read_excel("~/Desktop/Stats/GSS survey /GSSassignment.xlsx")

survey |> 
  mutate(race = as_factor(race)) |> 
  
  mutate(race = fct_recode(race,
                            NULL = ".d:  Do not Know/Cannot Choose",
                            NULL = ".n:  No answer",
                            NULL = ".s:  Skipped on Web", 
                             NULL = ".i:  Inapplicable")) |> 

  drop_na(race) |> 
  
  mutate(race = fct_infreq(race)) |> 
  
  plot_ly(x = ~race) |> 
  add_histogram()

Above is a histogram on race. With this histogram there 4,702 white, 975 Black, and 579 other.

survey |> 
  mutate(spanking = as_factor(spanking)) |> 
  
  mutate(spanking = fct_recode(spanking,
                            NULL = ".d:  Do not Know/Cannot Choose",
                            NULL = ".n:  No answer",
                            NULL = ".s:  Skipped on Web", 
                            NULL = ".i:  Inapplicable")) |> 

  drop_na(spanking) |> 
  
  mutate(spanking = fct_infreq(spanking)) |> 
  
  plot_ly(x = ~spanking) |> 
  add_histogram()

Above is a histogram over spanking and the data shows people who agree vs disagree when it comes to spanking. 1,274 people agree, 810 disagree, 706 strongly agree, and 453 strongly disagree.

survey |> 
  
   mutate(race = as_factor(race)) |> 
  
  mutate(race = fct_recode(race,
                            NULL = ".d:  Do not Know/Cannot Choose",
                            NULL = ".n:  No answer",
                            NULL = ".s:  Skipped on Web", 
                            NULL = ".i:  Inapplicable")) |> 
  
 
                            
  drop_na(race) |> 
  
  mutate(race = fct_infreq(race)) |> 
  
  
  mutate(spanking = as_factor(spanking)) |> 
    mutate(spanking = fct_recode(spanking,
                            NULL = ".d:  Do not Know/Cannot Choose",
                            NULL = ".i:  Inapplicable",
                            NULL = ".n:  No answer",
                            NULL = ".s:  Skipped on Web")) |> 
  
    
                            
  drop_na(spanking) |> 
  
  mutate(spanking = as.numeric(spanking)) |> 
  

plot_ly(x = ~race, y = ~spanking) |> 
  add_boxplot()

Above is a boxplot of race and spanking. This demonstrates the correlation between race and spanking. White (Max: 4) (q3:3) (median: 2) (min: 1), Black (Max: 4) (q3:3) (median: 2) (min: 1), and Other (Max: 4) (q3:3) (median: 2) (min: 1).

survey |> 
  mutate(childs = as_factor(childs)) |> 
  
  mutate(childs = fct_recode(childs,
                            NULL = ".d:  Do not Know/Cannot Choose",
                            NULL = ".i:  Inapplicable",
                            "8" = "8 or more")) |> 
  
                              
  
  drop_na(childs) |> 
  
  
  mutate(childs = as.numeric(childs)) |> 
  plot_ly(x = ~childs) |> 
  add_histogram()

Above is a histogram for childs, where childs is converted to a numeric value. (1 = 1,906), (2 = 1,041), (3 = 1,571), (4 = 1000), (5 = 81), (6 = 430), (7 = 75), (8 = 169), (9 = 47).

survey |> 
  
   mutate(spanking = as_factor(spanking)) |> 
  
  mutate(spanking = fct_recode(spanking,
                            NULL = ".d:  Do not Know/Cannot Choose",
                            NULL = ".n:  No answer",
                            NULL = ".s:  Skipped on Web", 
                            NULL = ".i:  Inapplicable")) |> 
  
   mutate(spanking = fct_infreq(spanking)) |> 
                            
  drop_na(spanking) |> 
  
  
  
  mutate(childs = as_factor(childs)) |> 
    mutate(childs = fct_recode(childs,
                            NULL = ".d:  Do not Know/Cannot Choose",
                            NULL = ".i:  Inapplicable",
                            NULL = ".n:  No answer",
                            NULL = ".s:  Skipped on Web")) |> 
  
    
                            
  drop_na(childs) |> 
  
  mutate(childs = as.numeric(childs)) |> 
  

plot_ly(x = ~spanking, y = ~childs) |> 
  add_boxplot()

Above is a box plot of spanking and childs. The chart displays how many agreed (max: 9) (q3:6) (median: 3) (q1:1), disagreed (max: 9) (q3:6) (median: 3) (q1:1), strongly agreed (max: 9) (q3:6) (median: 3) (q1:1), and strongly disagreed (max: 9) (q3:6) (median: 2) (q1:1).

survey |> 
  mutate(hapmar = as_factor(hapmar)) |> 
  
  mutate(hapmar = as_factor(hapmar)) |> 
    mutate(hapmar = fct_recode(hapmar,
                            NULL = ".d:  Do not Know/Cannot Choose",
                            NULL = ".i:  Inapplicable",
                            NULL = ".n:  No answer",
                            NULL = ".s:  Skipped on Web")) |> 
  

  
  drop_na(hapmar) |> 
  
  mutate(childs = fct_infreq(hapmar)) |> 
  
  mutate(childs = as.numeric(hapmar)) |> 
  
  plot_ly(x = ~hapmar) |> 
  add_histogram()

Above is a histogram of hapmar. The results concluded that 717 are pretty happy, 1,251 are very happy, and 90 are not too happy.

survey |> 
  mutate(class_ = as_factor(class_)) |> 
  
  mutate(class_ = fct_recode(class_,
                            NULL = ".d:  Do not Know/Cannot Choose",
                            NULL = ".n:  No answer",
                            NULL = ".s:  Skipped on Web")) |> 
  
   mutate(class_ = fct_relevel(class_, "Lower class", "Working class", "Middle class", "Upper class")) |> 
                             
                              
  
  drop_na(class_) |> 
  
  plot_ly(x = ~class_) |> 
  add_histogram()

Above is a histogram of class and the data shows the amount in each class. Lower class is 547, working class is 2,702, middle class is 2,749, and upper class is 256.

survey |> 
  mutate(class_ = as_factor(class_)) |> 
  
  mutate(class_ = fct_recode(class_,
                            NULL = ".d:  Do not Know/Cannot Choose",
                            NULL = ".n:  No answer",
                            NULL = ".i:  Inapplicable",
                            NULL = ".s:  Skipped on Web")) |> 
  
  mutate(class_ = fct_relevel(class_, "Lower class", "Working class", "Middle class", "Upper class")) |> 
  
  drop_na(class_) |> 
  

  
  mutate(hapmar = as_factor(hapmar)) |> 
  
    mutate(hapmar = fct_recode(hapmar,
                            NULL = ".d:  Do not Know/Cannot Choose",
                            NULL = ".n:  No answer",
                            NULL = ".i:  Inapplicable",
                            NULL = ".s:  Skipped on Web")) |> 
  

  drop_na(hapmar) |> 

  plot_ly(x = ~class_, color = ~hapmar) |> 
  add_histogram()

Above is a histogram of class and hepmar. The chart demonstrates who is pretty happy, very happy, and not too happy. Lower class (39 pretty happy, 36 very happy, 7 not too happy), Working class (310 pretty happy, 452 very happy, 36 not too happy) Middle class (333 pretty happy, 676 very happy, and 41 not too happy), and Upper class (31 pretty happy, 83 very happy, 6 not too happy).

survey |> 
  mutate(age = as_factor(age)) |> 
  
  mutate(age = fct_recode(age,
                            NULL = ".d:  Do not Know/Cannot Choose",
                            NULL = ".n:  No answer",
                            NULL = ".i:  Inapplicable",
                            NULL = ".s:  Skipped on Web")) |> 
  
   mutate(age = fct_collapse(age,
                               "under 30" = c( "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29")))  |>   
   mutate(age = fct_collapse(age,
                               "30s" = c("30", "31", "32", "33", "34", "35", "36", "37", "38", "39")))  |> 
   mutate(age = fct_collapse(age,
                               "40s" = c("40", "41", "42", "43", "44", "45", "46", "47", "48", "49")))  |> 
   mutate(age = fct_collapse(age,
                               "50s" = c("50", "51", "52", "53", "54", "55", "56", "57", "58", "59")))  |> 
   mutate(age = fct_collapse(age,
                               "60s" = c("60", "61", "62", "63", "64", "65", "66", "67", "68", "69")))  |> 
   mutate(age = fct_collapse(age,
                               "70s" = c("70", "71", "72", "73", "74", "75", "76", "77", "78", "79")))  |> 
   mutate(age = fct_collapse(age,
                               "80s and older" = c("80", "81", "82", "83", "84", "85", "86", "87", "88", "89 or older")))  |> 
  
  mutate(age = fct_relevel(age, "under 30", "30s", "40s", "50s", "60s", "70s", "80 and up")) |> 
  
  drop_na(age) |> 
  
  plot_ly(x = ~age) |> 
  add_histogram()

Warning: There was 1 warning in `mutate()`.
ℹ In argument: `age = fct_relevel(...)`.
Caused by warning:
! 1 unknown level in `f`: 80 and up

Above is a histogram of a survey for age for people under 30 (1,086), 30s (1,227), 40s (1,090), 50s (928), 60s (888), 70s (630), and 80 years and older (248).

survey |> 
  
   mutate(age = as_factor(age)) |> 
  
  mutate(age = fct_recode(age,
                            NULL = ".d:  Do not Know/Cannot Choose",
                            NULL = ".n:  No answer",
                            NULL = ".s:  Skipped on Web")) |> 
  
                          
  drop_na(age) |> 
  
  mutate(age = fct_infreq(age)) |> 
  
  
  mutate(hapmar = as_factor(hapmar)) |> 
    mutate(hapmar = fct_recode(hapmar,
                            NULL = ".d:  Do not Know/Cannot Choose",
                            NULL = ".i:  Inapplicable",
                            NULL = ".n:  No answer",
                            NULL = ".s:  Skipped on Web")) |> 
  
   mutate(age = fct_collapse(age,
                               "under 30" = c( "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29")))  |>   
   mutate(age = fct_collapse(age,
                               "30s" = c("30", "31", "32", "33", "34", "35", "36", "37", "38", "39")))  |> 
   mutate(age = fct_collapse(age,
                               "40s" = c("40", "41", "42", "43", "44", "45", "46", "47", "48", "49")))  |> 
   mutate(age = fct_collapse(age,
                               "50s" = c("50", "51", "52", "53", "54", "55", "56", "57", "58", "59")))  |> 
   mutate(age = fct_collapse(age,
                               "60s" = c("60", "61", "62", "63", "64", "65", "66", "67", "68", "69")))  |> 
   mutate(age = fct_collapse(age,
                               "70s" = c("70", "71", "72", "73", "74", "75", "76", "77", "78", "79")))  |> 
   mutate(age = fct_collapse(age,
                               "80s and older" = c("80", "81", "82", "83", "84", "85", "86", "87", "88", "89 or older")))  |> 
  
  mutate(age = fct_relevel(age, "under 30", "30s", "40s", "50s", "60s", "70s", "80 and up")) |> 

  drop_na(hapmar) |> 
  

  plot_ly(x = ~age, y = ~hapmar) |> 
  add_histogram2d()

Warning: There was 1 warning in `mutate()`.
ℹ In argument: `age = fct_relevel(...)`.
Caused by warning:
! 1 unknown level in `f`: 80 and up

Above is a heatmap. With this heatmap the data shows the age that are not happy, very happy, and pretty happy when it comes to hapmar. For people under 30 the data shows not too happy (3), very happy (101), and pretty happy (42). For people in there 30s the data shows not too happy (19), very happy (255), and pretty happy (138). For people in there 40s the data shows not too happy (18), very happy (242), and pretty happy (167). Looking at people in there 50s the data shows not too happy (15), very happy (203), and pretty happy (130). For people in there 60s the data shows not too happy (19), very happy (220), and pretty happy (99). Looking at people in there 70s the data shows not too happy (10), very happy (149), and pretty happy (75). Finally looking at 80 years and older we can see who is not too happy (2), very happy (33), and pretty happy (26).