Chapter 6 Exercises

Exercise 2

mtcars %>%
  filter(cyl == 4) %>%
  select(mpg, cyl)
##                 mpg cyl
## Datsun 710     22.8   4
## Merc 240D      24.4   4
## Merc 230       22.8   4
## Fiat 128       32.4   4
## Honda Civic    30.4   4
## Toyota Corolla 33.9   4
## Toyota Corona  21.5   4
## Fiat X1-9      27.3   4
## Porsche 914-2  26.0   4
## Lotus Europa   30.4   4
## Volvo 142E     21.4   4

We can make each car be shown in one line with this code

mtcars %>%
  filter(cyl == 4) %>%
  group_by(mpg, cyl) %>%
  nest()
## # A tibble: 9 × 3
## # Groups:   mpg, cyl [9]
##     mpg   cyl data            
##   <dbl> <dbl> <list>          
## 1  22.8     4 <tibble [2 × 9]>
## 2  24.4     4 <tibble [1 × 9]>
## 3  32.4     4 <tibble [1 × 9]>
## 4  30.4     4 <tibble [2 × 9]>
## 5  33.9     4 <tibble [1 × 9]>
## 6  21.5     4 <tibble [1 × 9]>
## 7  27.3     4 <tibble [1 × 9]>
## 8  26       4 <tibble [1 × 9]>
## 9  21.4     4 <tibble [1 × 9]>

Exercise 4

url <- "https://en.wikipedia.org/wiki/Asheville,_North_Carolina"
kept_columns <- c("Record high °F (°C)","Record low °F (°C)")
webpage <- read_html(url)
tbls <- html_nodes(webpage, "table")
tbls[grep("Climate data",tbls,ignore.case = T)]
## {xml_nodeset (1)}
## [1] <table class="wikitable mw-collapsible" style="width:auto; text-align:cen ...
df <- html_table(tbls[grep("Climate data",tbls,ignore.case = T)],fill = T)[[1]]

names(df) <- df[1,]
## Warning: The `value` argument of `names<-` must be a character vector as of
## tibble 3.0.0.
names(df)[1] <- "Measurement"

df <- df %>% 
    melt(id.vars = c("Measurement")) %>%
    filter(Measurement %in% kept_columns)

df$value <- gsub('(\\(.*\\))','',df$value)
df$Measurement <- trimws(gsub('(\\(.*\\))','',df$Measurement))
df$value <- gsub('−','-',df$value)
df$value <- as.numeric(as.character(df$value))

nc_climate <- ggplot(df, aes(variable, value)) +
    geom_line(aes(variable, value)) +
    geom_point(aes(colour = Measurement),size = 5)
nc_climate <- nc_climate + theme_fivethirtyeight() + 
    scale_y_continuous(breaks = seq(-60,130,10)) + 
    labs(title = "Asheville Temperatures", subtitle = url)
nc_climate

This is a cool graph to look and is a much better visual than the climate table shown on the wiki. What is interesting to note is that Asheville can reach the negatives, but many people don’t take it for that cold of a place. It is a good mild area with four full seasons based in part on this graph.

Exercise 6

Part (a)

HELPfull %>%
  filter(ID <= 3) %>%
  select(ID, TIME, DRUGRISK, SEXRISK)
##   ID TIME DRUGRISK SEXRISK
## 1  1    0        0       4
## 2  1    6        0       1
## 3  1   18        0       1
## 4  1   24        0       3
## 5  2    0        0       7
## 6  2    6        0       0
## 7  3    0       20       2
## 8  3    6       13       4
## 9  3   24       19       4

Part (b)

Measurements were only taken at time periods of (0), (6), and (24) for subject 3.

Part (c)

HELPfull_wide <- HELPfull %>%
  pivot_wider(names_from = TIME, ID, values_from = c(DRUGRISK, SEXRISK)) %>%
  filter(ID <= 3) %>%
  group_by(ID) %>%
  select(ID, DRUGRISK_0, DRUGRISK_6, SEXRISK_0, SEXRISK_6)
HELPfull_wide
## # A tibble: 3 × 5
## # Groups:   ID [3]
##      ID DRUGRISK_0 DRUGRISK_6 SEXRISK_0 SEXRISK_6
##   <int>      <int>      <int>     <int>     <int>
## 1     1          0          0         4         1
## 2     2          0          0         7         0
## 3     3         20         13         2         4

Part (d)

HELPfull_partd <- HELPfull %>%
  pivot_wider(names_from = TIME, ID, values_from = c(DRUGRISK, SEXRISK)) %>%
  group_by(ID) %>%
  select(ID, DRUGRISK_0, DRUGRISK_6, SEXRISK_0, SEXRISK_6)

 cor(x=HELPfull_partd$DRUGRISK_0, y=HELPfull_partd$DRUGRISK_6, use = "complete.obs", method = "pearson")
## [1] 0.5991146
 cor(x=HELPfull_partd$SEXRISK_0, y=HELPfull_partd$SEXRISK_6, use = "complete.obs", method = "pearson")
## [1] 0.5076615

Exercise 7

ds1 <- data.frame(id = c(1,2,3,1,2,3),
                  group = c("T", "T", "T", "C", "C", "C"),
                  vals = c(4, 6, 8, 5, 6, 10))
ds1
##   id group vals
## 1  1     T    4
## 2  2     T    6
## 3  3     T    8
## 4  1     C    5
## 5  2     C    6
## 6  3     C   10
Treat <- filter(ds1, group == "T")
Control <- filter(ds1, group == "C")
all <- mutate(Treat, diff = Treat$vals - Control$vals)
all
##   id group vals diff
## 1  1     T    4   -1
## 2  2     T    6    0
## 3  3     T    8   -2

This code works and creates the values of -1, 0, -2. If there are NA’s in the dataset then you cannot calculate this without providing a function to remove those values.

ds1_wider <- ds1 %>%
  pivot_wider(names_from = group, values_from = vals) %>%
  mutate(diff = T-C)
ds1_wider
## # A tibble: 3 × 4
##      id     T     C  diff
##   <dbl> <dbl> <dbl> <dbl>
## 1     1     4     5    -1
## 2     2     6     6     0
## 3     3     8    10    -2

Exercise 8

count_seasons <- function(team) {
  Teams %>%
    filter(teamID == team) %>%
    nrow()
}

Exercise 10

Teams %>%
  gather(key=type, value = home_runs, HR, HRA) %>%
  filter(teamID == "CHN") %>%
  select(yearID, type, home_runs) %>%
  mutate(type= recode(type, "HR" = "hit", "HRA" = "allowed")) %>%
  ggplot(aes(x=yearID, y=home_runs)) +
  geom_line(aes(color = type)) +
  theme(legend.position="top")