Question 1

a. How many rows and columns of data are there in the data set?

rows <- nrow(religiousdata)
columns <- ncol(religiousdata)
a <- list("rows" = rows, "columns" = columns)
a
## $rows
## [1] 138
## 
## $columns
## [1] 373

b. Are there any missing values in the participants’ ages? What is the average age of the participants? What is the most frequent age?

religiousdata %>%
  summarise(
    missing_values = sum(is.na(age)),
    mean_age = mean(age, na.rm = TRUE),
    mode_age = age %>%
      na.omit() %>%
      table() %>%
      which.max()
  )
##   missing_values mean_age mode_age
## 1              0  23.5942        7

c. In the data set, the variables depress1, depress2, …, depress21 are measures of depression. Select these columns and save as a subset called relig_depress.

relig_depress <- religiousdata %>%
  select(starts_with("depress"))
glimpse(relig_depress)
## Rows: 138
## Columns: 21
## $ depress1  <int> 2, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, …
## $ depress2  <int> 2, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, …
## $ depress3  <int> 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, …
## $ depress4  <int> 2, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, …
## $ depress5  <int> 2, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, …
## $ depress6  <int> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 0, 0, 1, 0, 0, 0, 0, …
## $ depress7  <int> 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 0, 0, 0, 0, 0, 0, 0, …
## $ depress8  <int> 2, 3, 1, 0, 2, 0, 0, 0, 2, 1, 0, 1, 2, 0, 2, 0, 0, 1, 0, 0, …
## $ depress9  <int> 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, …
## $ depress10 <int> 5, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, …
## $ depress11 <int> 2, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 2, 0, 1, 0, 0, 1, 0, 0, …
## $ depress12 <int> 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 3, 0, 0, 0, 0, 1, 0, 0, …
## $ depress13 <int> 0, 3, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 3, 1, 1, 0, 0, 1, 0, 0, …
## $ depress14 <int> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, …
## $ depress15 <int> 2, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 2, 0, 0, 0, 1, 2, 1, 1, …
## $ depress16 <int> 2, 3, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, …
## $ depress17 <int> 0, 0, 2, 0, 0, 0, 0, 2, 1, 1, 1, 1, 2, 1, 0, 0, 0, 2, 0, 0, …
## $ depress18 <int> 2, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, …
## $ depress19 <int> 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 2, 0, 2, 0, 1, 2, 0, 2, …
## $ depress20 <int> 1, 1, 0, 0, 0, 0, 0, 2, 1, 1, 1, 1, 3, 0, 0, 0, 0, 1, 1, 1, …
## $ depress21 <int> 2, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, …

d. Calculate the total score of depression by adding the 21 depression variables together for each participant (set na.rm = TRUE) and add this total score as a new variable to the relig_depress subset, and save the new subset as relig_depress_total.

relig_depress_total <- relig_depress %>%
  rowwise() %>%
  mutate(depress_total = sum(c_across(everything()), na.rm = TRUE)) %>%
  ungroup()
glimpse(relig_depress_total)
## Rows: 138
## Columns: 22
## $ depress1      <int> 2, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,…
## $ depress2      <int> 2, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0,…
## $ depress3      <int> 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0,…
## $ depress4      <int> 2, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1,…
## $ depress5      <int> 2, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,…
## $ depress6      <int> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 0, 0, 1, 0, 0, 0,…
## $ depress7      <int> 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 0, 0, 0, 0, 0, 0,…
## $ depress8      <int> 2, 3, 1, 0, 2, 0, 0, 0, 2, 1, 0, 1, 2, 0, 2, 0, 0, 1, 0,…
## $ depress9      <int> 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0,…
## $ depress10     <int> 5, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0,…
## $ depress11     <int> 2, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 2, 0, 1, 0, 0, 1, 0,…
## $ depress12     <int> 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 3, 0, 0, 0, 0, 1, 0,…
## $ depress13     <int> 0, 3, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 3, 1, 1, 0, 0, 1, 0,…
## $ depress14     <int> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0,…
## $ depress15     <int> 2, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 2, 0, 0, 0, 1, 2, 1,…
## $ depress16     <int> 2, 3, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,…
## $ depress17     <int> 0, 0, 2, 0, 0, 0, 0, 2, 1, 1, 1, 1, 2, 1, 0, 0, 0, 2, 0,…
## $ depress18     <int> 2, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0,…
## $ depress19     <int> 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 2, 0, 2, 0, 1, 2, 0,…
## $ depress20     <int> 1, 1, 0, 0, 0, 0, 0, 2, 1, 1, 1, 1, 3, 0, 0, 0, 0, 1, 1,…
## $ depress21     <int> 2, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0,…
## $ depress_total <int> 31, 15, 9, 2, 4, 0, 0, 14, 10, 6, 8, 17, 32, 2, 14, 4, 3…

e. Save the relig_depress_total subset into .csv data file. [Use write.csv function here.

write.csv(
  relig_depress_total,
  file = "relig_depress_total.csv",
  row.names = FALSE
)

Question 2

In this case study, you’re going to work with a famous dataset, Iris. Please use the Tidyverse to solve the following questions:
a. The iris dataset is pre-installed in R. Please convert it to a tibble and save the tibble as an object called iris_dat.

irisdat <- as_tibble(iris)
irisdat
## # A tibble: 150 × 5
##    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
##           <dbl>       <dbl>        <dbl>       <dbl> <fct>  
##  1          5.1         3.5          1.4         0.2 setosa 
##  2          4.9         3            1.4         0.2 setosa 
##  3          4.7         3.2          1.3         0.2 setosa 
##  4          4.6         3.1          1.5         0.2 setosa 
##  5          5           3.6          1.4         0.2 setosa 
##  6          5.4         3.9          1.7         0.4 setosa 
##  7          4.6         3.4          1.4         0.3 setosa 
##  8          5           3.4          1.5         0.2 setosa 
##  9          4.4         2.9          1.4         0.2 setosa 
## 10          4.9         3.1          1.5         0.1 setosa 
## # ℹ 140 more rows

b. Keep only three variables–Sepal.Length, Petal.Length, and Species–and arrange them in this order: Species, Sepal.Length, Petal.Length. Save this subset as an object called iris_subset.

iris_subset <- irisdat %>%
  dplyr::select(Species, Sepal.Length, Petal.Length)

iris_subset
## # A tibble: 150 × 3
##    Species Sepal.Length Petal.Length
##    <fct>          <dbl>        <dbl>
##  1 setosa           5.1          1.4
##  2 setosa           4.9          1.4
##  3 setosa           4.7          1.3
##  4 setosa           4.6          1.5
##  5 setosa           5            1.4
##  6 setosa           5.4          1.7
##  7 setosa           4.6          1.4
##  8 setosa           5            1.5
##  9 setosa           4.4          1.4
## 10 setosa           4.9          1.5
## # ℹ 140 more rows

c. Within iris_subset, filter out rows where Sepal.Length is greater than 6.

iris_subset <- iris_subset %>%
  filter(Sepal.Length <= 6)

iris_subset
## # A tibble: 89 × 3
##    Species Sepal.Length Petal.Length
##    <fct>          <dbl>        <dbl>
##  1 setosa           5.1          1.4
##  2 setosa           4.9          1.4
##  3 setosa           4.7          1.3
##  4 setosa           4.6          1.5
##  5 setosa           5            1.4
##  6 setosa           5.4          1.7
##  7 setosa           4.6          1.4
##  8 setosa           5            1.5
##  9 setosa           4.4          1.4
## 10 setosa           4.9          1.5
## # ℹ 79 more rows

d. Within iris_subset, compute average Petal.Length.

iris_mean <- iris_subset %>%
  group_by(Species) %>%
  summarize(mean_petal_length = mean(Petal.Length))

iris_mean
## # A tibble: 3 × 2
##   Species    mean_petal_length
##   <fct>                  <dbl>
## 1 setosa                  1.46
## 2 versicolor              4.04
## 3 virginica               4.96

e. Add the average Petal.Length as a new variable to iris_subset

iris_subset <- iris_subset %>%
  group_by(Species) %>%
  mutate(mean_petal_length = mean(Petal.Length))
  
iris_subset
## # A tibble: 89 × 4
## # Groups:   Species [3]
##    Species Sepal.Length Petal.Length mean_petal_length
##    <fct>          <dbl>        <dbl>             <dbl>
##  1 setosa           5.1          1.4              1.46
##  2 setosa           4.9          1.4              1.46
##  3 setosa           4.7          1.3              1.46
##  4 setosa           4.6          1.5              1.46
##  5 setosa           5            1.4              1.46
##  6 setosa           5.4          1.7              1.46
##  7 setosa           4.6          1.4              1.46
##  8 setosa           5            1.5              1.46
##  9 setosa           4.4          1.4              1.46
## 10 setosa           4.9          1.5              1.46
## # ℹ 79 more rows