library(tidyverse)
library(readxl) # package to open excel files
library(plotly) # a graphics package, and alternative to ggplot2
These are the packages that I used for this assignment.
glimpse(GSS)
Rows: 6,309
Columns: 15
$ year <dbl> 2002, 2002, 2002, 2002, 2002, 2002, 2002, 2002, 2002, 2002, 20…
$ id_ <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,…
$ hrs2 <chr> ".i: Inapplicable", ".i: Inapplicable", ".i: Inapplicable",…
$ childs <chr> "0", "1", "1", "1", "2", "1", "2", "2", "2", "0", "2", "3", "3…
$ age <chr> "25", "43", "30", "55", "37", "47", "57", "71", "46", "19", "5…
$ sex <chr> "FEMALE", "MALE", "FEMALE", "FEMALE", "MALE", "MALE", "FEMALE"…
$ race <chr> "White", "White", "White", "White", "White", "White", "White",…
$ courts <chr> "About right", "Not harshly enough", ".i: Inapplicable", ".i:…
$ relig <chr> "Inter-nondenominational", "Protestant", "Protestant", "Protes…
$ attend <chr> "About once or twice a year", "About once a month", "Every wee…
$ hapmar <chr> ".i: Inapplicable", "PRETTY HAPPY", ".i: Inapplicable", ".i:…
$ class_ <chr> "Middle class", "Middle class", "Working class", "Upper class"…
$ premarsx <chr> "ALWAYS WRONG", ".i: Inapplicable", ".i: Inapplicable", ".i:…
$ xmarsex <chr> "ALWAYS WRONG", "ALWAYS WRONG", ".i: Inapplicable", ".i: Ina…
$ spanking <chr> "STRONGLY AGREE", ".i: Inapplicable", ".i: Inapplicable", ".…
Question 1.
GSS |>
mutate(race = as_factor(race)) |>
mutate(race = fct_recode(race,
NULL = ".i: Inapplicable")) |>
mutate(race = fct_infreq(race)) |>
plot_ly(x = ~race) |>
add_histogram()
Warning: Ignoring 53 observationsWarning: Ignoring 53 observations
This is a histogram for the variable race. The majority of the people who took this survey are white.
Question 2.
GSS |>
mutate(spanking = as_factor(spanking)) |>
mutate(spanking = fct_recode(spanking,
NULL = ".i: Inapplicable",
NULL = ".d: Do not Know/Cannot Choose",
NULL = ".n: No answer",
NULL = ".s: Skipped on Web")) |>
plot_ly(x = ~spanking) |>
add_histogram()
Warning: Ignoring 3066 observationsWarning: Ignoring 3066 observations
This is a histogram showing whether people favor spanking their children as a form of discipline. A good percentage of the respondents agree that spanking is okay when disciplining kids.
Question 3.
GSS |>
mutate(spanking = as_factor(spanking)) |>
mutate(spanking = fct_recode(spanking,
NULL = ".i: Inapplicable",
NULL = ".d: Do not Know/Cannot Choose",
NULL = ".n: No answer",
NULL = ".s: Skipped on Web")) |>
mutate(spanking = as.numeric(spanking)) |>
mutate(race = as_factor(race)) |>
mutate(race = fct_recode(race,
NULL = ".i: Inapplicable")) |>
plot_ly(x = ~race, y = ~spanking) |>
add_boxplot()
Warning: Ignoring 3097 observationsWarning: Ignoring 3097 observations
This is a boxplot comparing favor of spanking as discipline and different races.
Question 4.
GSS |>
mutate(childs = as_factor(childs)) |>
mutate(childs = fct_recode(childs,
NULL = ".i: Inapplicable",
NULL = ".d: Do not Know/Cannot Choose")) |>
mutate(childs = as.numeric(childs)) |>
plot_ly(x = ~childs) |>
add_histogram()
Warning: Ignoring 16 observationsWarning: Ignoring 16 observations
This is a histogram showing how many children the people that took the survey have. Most people that took this survey either claimed having zero or three children.
Question 5.
GSS |>
mutate(spanking = as_factor(spanking)) |>
mutate(spanking = fct_recode(spanking,
NULL = ".i: Inapplicable",
NULL = ".d: Do not Know/Cannot Choose",
NULL = ".n: No answer",
NULL = ".s: Skipped on Web")) |>
mutate(childs = as_factor(childs)) |>
mutate(childs = fct_recode(childs,
NULL = ".i: Inapplicable",
NULL = ".d: Do not Know/Cannot Choose")) |>
mutate(childs = as.numeric(childs)) |>
plot_ly(x = ~spanking, y = ~childs) |>
add_boxplot()
Warning: Ignoring 3074 observationsWarning: Ignoring 3074 observations
This is a boxplot with favor of spanking as the categorical variable compared to participants number of children as the numerical variable. It appears that those who strongly disagreed with spanking as a form of discipline had less kids.
Question 6.
GSS |>
mutate(hapmar = as_factor(hapmar)) |>
mutate(hapmar = fct_recode(hapmar,
NULL = ".i: Inapplicable",
NULL = ".d: Do not Know/Cannot Choose",
NULL = ".n: No answer",
NULL = ".s: Skipped on Web")) |>
mutate(hapmar = fct_infreq(hapmar)) |>
plot_ly(x = ~hapmar) |>
add_histogram()
Warning: Ignoring 4251 observationsWarning: Ignoring 4251 observations
This is a histogram showing the survey’s participants opinions on their marriages. Most of the participants expressed being very happy with their marriage.
Question 7.
GSS |>
mutate(class_ = as_factor(class_)) |>
mutate(class_ = fct_recode(class_,
NULL = ".d: Do not Know/Cannot Choose",
NULL = ".n: No answer",
NULL = ".s: Skipped on Web")) |>
mutate(class_ = fct_infreq(class_)) |>
mutate(class_ = fct_relevel(class_,
c("Lower class",
"Working class",
"Middle class",
"Upper class"))) |>
plot_ly(x = ~class_) |>
add_histogram()
Warning: Ignoring 55 observationsWarning: Ignoring 55 observations
This is a histogram showing what socioeconomic class the participants of this survey rated themselves. This graph shows that most participants rated themselves either middle or working class.
Question 8.
GSS |>
mutate(class_ = as_factor(class_)) |>
mutate(class_ = fct_recode(class_,
NULL = ".d: Do not Know/Cannot Choose",
NULL = ".n: No answer",
NULL = ".s: Skipped on Web")) |>
mutate(class_ = fct_infreq(class_)) |>
mutate(class_ = fct_relevel(class_,
c("Lower class",
"Working class",
"Middle class",
"Upper class"))) |>
mutate(hapmar = as_factor(hapmar)) |>
mutate(hapmar = fct_recode(hapmar,
NULL = ".i: Inapplicable",
NULL = ".d: Do not Know/Cannot Choose",
NULL = ".n: No answer",
NULL = ".s: Skipped on Web")) |>
mutate(hapmar = fct_infreq(hapmar)) |>
plot_ly(x = ~class_, color = ~hapmar) |>
add_histogram()
Warning: Ignoring 55 observationsWarning: Ignoring 55 observations
This is a graph comparing the participants happiness with their marriages and their socioeconomic status. The graph shows that participants that are middle and working class tend to be happier in their marriages.
Question 9.
GSS |>
mutate(age = as_factor(age)) |>
mutate(age = fct_recode(age,
NULL = ".n: No answer")) |>
mutate(age = fct_collapse(age,
"Under 30" = c("18","19","20",
"21",
"22",
"23",
"24",
"25",
"26",
"27",
"28",
"29"),
"30s" = c("30",
"31",
"32",
"33",
"34",
"35",
"36",
"37",
"38",
"39"),
"40s" = c("40",
"41",
"42",
"43",
"44",
"45",
"46",
"47",
"48",
"49"),
"50s" = c ("50",
"51",
"52",
"53",
"54",
"55",
"56",
"57",
"58",
"59"),
"60s" = c ("60",
"61",
"62",
"63",
"64",
"65",
"66",
"67",
"68",
"69"),
"70s" = c ("70",
"71",
"72",
"73",
"74",
"75",
"76",
"77",
"78",
"79"),
"80 and up" = c ("80",
"81",
"82",
"83",
"84",
"85",
"86",
"87",
"88",
"89 or older"))) |>
mutate(age = fct_infreq(age)) |>
drop_na(age) |>
plot_ly(x = ~age) |>
add_histogram()
This is a graph showing the ages of the participants and how many participants are in each age group.
Question 10.
GSS |>
mutate(age = as_factor(age)) |>
mutate(age = fct_recode(age,
NULL = ".n: No answer")) |>
mutate(age = fct_collapse(age,
"Under 30" = c("18","19","20",
"21",
"22",
"23",
"24",
"25",
"26",
"27",
"28",
"29"),
"30s" = c("30",
"31",
"32",
"33",
"34",
"35",
"36",
"37",
"38",
"39"),
"40s" = c("40",
"41",
"42",
"43",
"44",
"45",
"46",
"47",
"48",
"49"),
"50s" = c ("50",
"51",
"52",
"53",
"54",
"55",
"56",
"57",
"58",
"59"),
"60s" = c ("60",
"61",
"62",
"63",
"64",
"65",
"66",
"67",
"68",
"69"),
"70s" = c ("70",
"71",
"72",
"73",
"74",
"75",
"76",
"77",
"78",
"79"),
"80 and up" = c ("80",
"81",
"82",
"83",
"84",
"85",
"86",
"87",
"88",
"89 or older"))) |>
mutate(age = fct_infreq(age)) |>
drop_na(age) |>
mutate(hapmar = as_factor(hapmar)) |>
mutate(hapmar = fct_recode(hapmar,
NULL = ".i: Inapplicable",
NULL = ".d: Do not Know/Cannot Choose",
NULL = ".n: No answer",
NULL = ".s: Skipped on Web")) |>
mutate(hapmar = fct_infreq(hapmar)) |>
plot_ly(x = ~hapmar, y = ~age) |>
add_histogram2d()
This is a heatmap comparing the participants happiness with their marriages and their different ages. The heatmap shows that there are are more people that report being very happy in their marriages than pretty happy or not too happy.