Beaches

  1. What season has the greatest rainfall grouped by year?
  2. What month has the greatest rainfall on average?

Pseudo code Q1:

  1. Group_by season
  2. Filter by year
  3. Summarize by rainfall

Pseudo code Q2:

  1. Group by month, month_name
  2. Filter by year
  3. Summarize mean rainfall

Beaches Q1:

beaches <- read.csv("https://raw.githubusercontent.com/araastat/BIOF339/master/slides/lectures/data/sydneybeaches3.csv")

beaches%>%
  group_by(season, season_name)%>%
  filter(year == 2013)%>%
  summarize(mean(rainfall))
## # A tibble: 4 x 3
## # Groups:   season [4]
##   season season_name `mean(rainfall)`
##    <int> <fct>                  <dbl>
## 1      1 Summer                  2.11
## 2      2 Autumn                  3.8 
## 3      3 Winter                  8.45
## 4      4 Spring                 NA

Function Q1

beaches_func <- function(x){
  beaches%>%
  group_by(season, season_name)%>%
  filter(year == x)%>%
  summarize(mean(rainfall))
}

Beaches Q2

beaches%>%
  group_by(month, month_name)%>%
  filter(year == 2013)%>%
  summarise(mean(rainfall))
## # A tibble: 12 x 3
## # Groups:   month [12]
##    month month_name `mean(rainfall)`
##    <int> <fct>                 <dbl>
##  1     1 January                0.10
##  2     2 February               7.22
##  3     3 March                  5.72
##  4     4 April                  4.08
##  5     5 May                    1.97
##  6     6 June                  23.3 
##  7     7 July                   0   
##  8     8 August                 2.08
##  9     9 September              0.36
## 10    10 October                9.2 
## 11    11 November              NA   
## 12    12 December               0

Function Q2

beaches_func_month <- function(x){
  beaches%>%
  group_by(month, month_name)%>%
  filter(year == x)%>%
  summarise(mean(rainfall)) }
beaches_func_month(2017)
## # A tibble: 12 x 3
## # Groups:   month [12]
##    month month_name `mean(rainfall)`
##    <int> <fct>                 <dbl>
##  1     1 January               2.08 
##  2     2 February              6.42 
##  3     3 March                 7.62 
##  4     4 April                11.7  
##  5     5 May                   0.317
##  6     6 June                 11.5  
##  7     7 July                  0    
##  8     8 August                1.56 
##  9     9 September             0.3  
## 10    10 October               4.97 
## 11    11 November              0.12 
## 12    12 December              0

NFL

nfl <- read.csv("nfl2008_fga.csv")

Questions

  1. What was the farthest that Denver kicked?
  2. What is the average distance a kicker kicks the football on a fourth down?

Pseudo code Q1

  1. Group by AwayTeam, HomeTeam
  2. Filter by Kickteam == Denver
  3. Summarize the average distance of the kick

Pseudo code Q2

  1. Group by kickteam
  2. Filter by 4th down.
  3. Mean Distance of kick

NFL DPLYR Q1

nfl%>%
  group_by(AwayTeam,HomeTeam)%>%
  filter(kickteam == "DEN")%>%
  summarize(mean(distance))
## # A tibble: 14 x 3
## # Groups:   AwayTeam [9]
##    AwayTeam HomeTeam `mean(distance)`
##    <fct>    <fct>               <dbl>
##  1 BUF      DEN                  37.5
##  2 DEN      ATL                  34.5
##  3 DEN      CAR                  45  
##  4 DEN      CLE                  34.3
##  5 DEN      KC                   38  
##  6 DEN      NYJ                  30  
##  7 DEN      OAK                  34.5
##  8 JAC      DEN                  39  
##  9 KC       DEN                  40.5
## 10 MIA      DEN                  49.5
## 11 NO       DEN                  30.5
## 12 OAK      DEN                  44.7
## 13 SD       DEN                  52  
## 14 TB       DEN                  40.7

Function NFL Q2

nfl_team_kick <- function(x){
  nfl%>%
  group_by(AwayTeam,HomeTeam)%>%
  filter(kickteam == x )%>%
  summarize(mean(distance))}

nfl_team_kick('STL')
## # A tibble: 15 x 3
## # Groups:   AwayTeam [8]
##    AwayTeam HomeTeam `mean(distance)`
##    <fct>    <fct>               <dbl>
##  1 BUF      STL                  51  
##  2 CHI      STL                  41.5
##  3 DAL      STL                  47  
##  4 MIA      STL                  36.2
##  5 NYG      STL                  54  
##  6 SEA      STL                  42  
##  7 SF       STL                  34.3
##  8 STL      ARI                  46.5
##  9 STL      ATL                  29  
## 10 STL      NE                   29.7
## 11 STL      NYJ                  37  
## 12 STL      PHI                  46  
## 13 STL      SEA                  36  
## 14 STL      SF                   41  
## 15 STL      WAS                  42.2

NFL Q2

nfl%>%
  group_by(kickteam, name)%>%
  filter(down == 4)%>%
  summarize(mean(distance))
## # A tibble: 39 x 3
## # Groups:   kickteam [32]
##    kickteam name       `mean(distance)`
##    <fct>    <fct>                 <dbl>
##  1 ARI      N.Rackers              33.3
##  2 ATL      J.Elam                 34.7
##  3 BAL      M.Stover               33.7
##  4 BAL      S.Hauschka             53.5
##  5 BUF      R.Lindell              37.9
##  6 CAR      J.Kasay                36.5
##  7 CHI      R.Gould                37.5
##  8 CIN      D.Rayner               26  
##  9 CIN      S.Graham               35.5
## 10 CLE      P.Dawson               35.9
## # … with 29 more rows

NFL Q2 Function

kickteam <- function(x){
  nfl%>%
  group_by(kickteam, name)%>%
  filter(down == 4)%>%
  summarize(mean(distance))
}

Wine Selection

redwine <- read.csv("winequality-red.csv")
head(redwine)
##   fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1           7.4             0.70        0.00            1.9     0.076
## 2           7.8             0.88        0.00            2.6     0.098
## 3           7.8             0.76        0.04            2.3     0.092
## 4          11.2             0.28        0.56            1.9     0.075
## 5           7.4             0.70        0.00            1.9     0.076
## 6           7.4             0.66        0.00            1.8     0.075
##   free.sulfur.dioxide total.sulfur.dioxide density   pH sulphates alcohol
## 1                  11                   34  0.9978 3.51      0.56     9.4
## 2                  25                   67  0.9968 3.20      0.68     9.8
## 3                  15                   54  0.9970 3.26      0.65     9.8
## 4                  17                   60  0.9980 3.16      0.58     9.8
## 5                  11                   34  0.9978 3.51      0.56     9.4
## 6                  13                   40  0.9978 3.51      0.56     9.4
##   quality
## 1       5
## 2       5
## 3       5
## 4       6
## 5       5
## 6       5

Questions

  1. What is the mean amount of sulfates in this red wine list, given the quality?
  2. What is the average amount of sugar, given specific qualities?

Pseudo code Q1

  1. Group by quality
  2. Filter by quality 5
  3. Summarize the mean sulfate amount

Pseudo code Q2

  1. Group by quality
  2. Filter by quality == 5
  3. Summarize the mean of residual.sugar

Red Wine Q1

redwine%>%
  group_by(quality)%>%
  filter(quality == 5)%>%
  summarize(mean(sulphates))
## # A tibble: 1 x 2
##   quality `mean(sulphates)`
##     <int>             <dbl>
## 1       5             0.621

Red Wine Q1 Function

sulphate_quality <- function(x){redwine%>%
  group_by(quality)%>%
  filter(quality == x)%>%
  summarize(mean(sulphates))
}

sulphate_quality(3)
## # A tibble: 1 x 2
##   quality `mean(sulphates)`
##     <int>             <dbl>
## 1       3              0.57

Red Wine Q2

redwine%>%
  group_by(quality)%>%
  filter(quality == 5)%>%
  summarize(mean(residual.sugar))
## # A tibble: 1 x 2
##   quality `mean(residual.sugar)`
##     <int>                  <dbl>
## 1       5                   2.53

Red Wine Q2 Function

redwine_residualsugar <- function(x){
  redwine%>%
    group_by(quality)%>%
    filter(quality == x)%>%
    summarize(mean(residual.sugar))
}

redwine_residualsugar(4)
## # A tibble: 1 x 2
##   quality `mean(residual.sugar)`
##     <int>                  <dbl>
## 1       4                   2.69