1. Create a new dataset that contains only the city, year, month, and sales variables.

select(txhousing, city:sales)
## # A tibble: 8,602 x 4
##    city     year month sales
##    <chr>   <int> <int> <dbl>
##  1 Abilene  2000     1    72
##  2 Abilene  2000     2    98
##  3 Abilene  2000     3   130
##  4 Abilene  2000     4    98
##  5 Abilene  2000     5   141
##  6 Abilene  2000     6   156
##  7 Abilene  2000     7   152
##  8 Abilene  2000     8   131
##  9 Abilene  2000     9   104
## 10 Abilene  2000    10   101
## # … with 8,592 more rows
select(txhousing, city, year, month, sales)
## # A tibble: 8,602 x 4
##    city     year month sales
##    <chr>   <int> <int> <dbl>
##  1 Abilene  2000     1    72
##  2 Abilene  2000     2    98
##  3 Abilene  2000     3   130
##  4 Abilene  2000     4    98
##  5 Abilene  2000     5   141
##  6 Abilene  2000     6   156
##  7 Abilene  2000     7   152
##  8 Abilene  2000     8   131
##  9 Abilene  2000     9   104
## 10 Abilene  2000    10   101
## # … with 8,592 more rows

2. Create a new dataset that contains all of the variables except volume and date.

select(txhousing, -volume, -date)
## # A tibble: 8,602 x 7
##    city     year month sales median listings inventory
##    <chr>   <int> <int> <dbl>  <dbl>    <dbl>     <dbl>
##  1 Abilene  2000     1    72  71400      701       6.3
##  2 Abilene  2000     2    98  58700      746       6.6
##  3 Abilene  2000     3   130  58100      784       6.8
##  4 Abilene  2000     4    98  68600      785       6.9
##  5 Abilene  2000     5   141  67300      794       6.8
##  6 Abilene  2000     6   156  66900      780       6.6
##  7 Abilene  2000     7   152  73500      742       6.2
##  8 Abilene  2000     8   131  75000      765       6.4
##  9 Abilene  2000     9   104  64500      771       6.5
## 10 Abilene  2000    10   101  59300      764       6.6
## # … with 8,592 more rows
select(txhousing, -c("volume", "date"))
## # A tibble: 8,602 x 7
##    city     year month sales median listings inventory
##    <chr>   <int> <int> <dbl>  <dbl>    <dbl>     <dbl>
##  1 Abilene  2000     1    72  71400      701       6.3
##  2 Abilene  2000     2    98  58700      746       6.6
##  3 Abilene  2000     3   130  58100      784       6.8
##  4 Abilene  2000     4    98  68600      785       6.9
##  5 Abilene  2000     5   141  67300      794       6.8
##  6 Abilene  2000     6   156  66900      780       6.6
##  7 Abilene  2000     7   152  73500      742       6.2
##  8 Abilene  2000     8   131  75000      765       6.4
##  9 Abilene  2000     9   104  64500      771       6.5
## 10 Abilene  2000    10   101  59300      764       6.6
## # … with 8,592 more rows

3. Create a new dataset that contains only the data from Dallas.

filter(txhousing, city == "Dallas")
## # A tibble: 187 x 9
##    city    year month sales    volume median listings inventory  date
##    <chr>  <int> <int> <dbl>     <dbl>  <dbl>    <dbl>     <dbl> <dbl>
##  1 Dallas  2000     1  2286 375389865 124400    13316       3.7 2000 
##  2 Dallas  2000     2  3247 555124812 127700    13495       3.7 2000.
##  3 Dallas  2000     3  4244 702148377 128500    13752       3.7 2000.
##  4 Dallas  2000     4  3977 667331427 132000    13752       3.7 2000.
##  5 Dallas  2000     5  4545 783197806 137100    14018       3.7 2000.
##  6 Dallas  2000     6  4738 846254912 138800    14392       3.8 2000.
##  7 Dallas  2000     7  4276 763793916 139600    14705       3.9 2000.
##  8 Dallas  2000     8  4373 770735708 134500    14607       3.9 2001.
##  9 Dallas  2000     9  3654 643046932 136000    15109       4   2001.
## 10 Dallas  2000    10  3601 626302634 134600    15212       4   2001.
## # … with 177 more rows

4. Create a new dataset of only the months with sales of at least 100 units.

filter(txhousing, sales >= 100)
## # A tibble: 5,582 x 9
##    city     year month sales   volume median listings inventory  date
##    <chr>   <int> <int> <dbl>    <dbl>  <dbl>    <dbl>     <dbl> <dbl>
##  1 Abilene  2000     3   130  9285000  58100      784       6.8 2000.
##  2 Abilene  2000     5   141 10590000  67300      794       6.8 2000.
##  3 Abilene  2000     6   156 13910000  66900      780       6.6 2000.
##  4 Abilene  2000     7   152 12635000  73500      742       6.2 2000.
##  5 Abilene  2000     8   131 10710000  75000      765       6.4 2001.
##  6 Abilene  2000     9   104  7615000  64500      771       6.5 2001.
##  7 Abilene  2000    10   101  7040000  59300      764       6.6 2001.
##  8 Abilene  2000    11   100  7890000  70900      721       6.2 2001.
##  9 Abilene  2001     2   112  8670000  68900      700       6   2001.
## 10 Abilene  2001     3   118  9550000  72300      738       6.4 2001.
## # … with 5,572 more rows

5. Create a new dataset of all the data beginning in June of 2006.

filter(txhousing, year > 2006 | (year == 2006 & month >=6))
## # A tibble: 5,060 x 9
##    city     year month sales   volume median listings inventory  date
##    <chr>   <int> <int> <dbl>    <dbl>  <dbl>    <dbl>     <dbl> <dbl>
##  1 Abilene  2006     6   225 26620000  99800      786       4.8 2006.
##  2 Abilene  2006     7   169 19160000  93200      804       4.9 2006.
##  3 Abilene  2006     8   220 25735000 104000      812       5   2007.
##  4 Abilene  2006     9   159 19850000 109500      817       5   2007.
##  5 Abilene  2006    10   156 17100000  97100      797       4.9 2007.
##  6 Abilene  2006    11   146 15930000  99400      792       4.8 2007.
##  7 Abilene  2006    12   157 17985000 103100      724       4.4 2007.
##  8 Abilene  2007     1   132 13335000  91700      746       4.4 2007 
##  9 Abilene  2007     2   140 16105000 100000      774       4.6 2007.
## 10 Abilene  2007     3   212 20410000  86500      816       4.7 2007.
## # … with 5,050 more rows
filter(txhousing, date > 2006.4)
## # A tibble: 5,060 x 9
##    city     year month sales   volume median listings inventory  date
##    <chr>   <int> <int> <dbl>    <dbl>  <dbl>    <dbl>     <dbl> <dbl>
##  1 Abilene  2006     6   225 26620000  99800      786       4.8 2006.
##  2 Abilene  2006     7   169 19160000  93200      804       4.9 2006.
##  3 Abilene  2006     8   220 25735000 104000      812       5   2007.
##  4 Abilene  2006     9   159 19850000 109500      817       5   2007.
##  5 Abilene  2006    10   156 17100000  97100      797       4.9 2007.
##  6 Abilene  2006    11   146 15930000  99400      792       4.8 2007.
##  7 Abilene  2006    12   157 17985000 103100      724       4.4 2007.
##  8 Abilene  2007     1   132 13335000  91700      746       4.4 2007 
##  9 Abilene  2007     2   140 16105000 100000      774       4.6 2007.
## 10 Abilene  2007     3   212 20410000  86500      816       4.7 2007.
## # … with 5,050 more rows

6. Create a new dataset that removes all of the months where the median sale price or the total number of sales are missing.

filter(txhousing, !is.na(median) | !is.na(sales))
## # A tibble: 8,035 x 9
##    city     year month sales   volume median listings inventory  date
##    <chr>   <int> <int> <dbl>    <dbl>  <dbl>    <dbl>     <dbl> <dbl>
##  1 Abilene  2000     1    72  5380000  71400      701       6.3 2000 
##  2 Abilene  2000     2    98  6505000  58700      746       6.6 2000.
##  3 Abilene  2000     3   130  9285000  58100      784       6.8 2000.
##  4 Abilene  2000     4    98  9730000  68600      785       6.9 2000.
##  5 Abilene  2000     5   141 10590000  67300      794       6.8 2000.
##  6 Abilene  2000     6   156 13910000  66900      780       6.6 2000.
##  7 Abilene  2000     7   152 12635000  73500      742       6.2 2000.
##  8 Abilene  2000     8   131 10710000  75000      765       6.4 2001.
##  9 Abilene  2000     9   104  7615000  64500      771       6.5 2001.
## 10 Abilene  2000    10   101  7040000  59300      764       6.6 2001.
## # … with 8,025 more rows

7. Sort the entire dataset so that the fewest number of sales are at the top.

arrange(txhousing, sales)
## # A tibble: 8,602 x 9
##    city                year month sales  volume median listings inventory  date
##    <chr>              <int> <int> <dbl>   <dbl>  <dbl>    <dbl>     <dbl> <dbl>
##  1 San Marcos          2011    10     6 1156999 180000      163       8.3 2012.
##  2 Harlingen           2000     7     9 1110000  87500      719      30.8 2000.
##  3 South Padre Island  2011     1     9 2088500 225000     1258      55.7 2011 
##  4 San Marcos          2011     1    10 1482310 140000      165       7.5 2011 
##  5 San Marcos          2011    12    10 1561250 140000      148       8   2012.
##  6 San Marcos          2014    11    10 1506878 146700       96       4   2015.
##  7 South Padre Island  2010     1    10 2543721 200000     1290      NA   2010 
##  8 South Padre Island  2010    11    10 2098500 160000     1280      55.9 2011.
##  9 San Marcos          2001     2    11 1445000 126000      260       8.7 2001.
## 10 San Marcos          2008    10    11 1675000 143300      160       7.4 2009.
## # … with 8,592 more rows

8. Sort the entire dataset, first by city alphabetically, and then so that the largest volume is at the top for each city.

arrange(txhousing, city, desc(volume))
## # A tibble: 8,602 x 9
##    city     year month sales   volume median listings inventory  date
##    <chr>   <int> <int> <dbl>    <dbl>  <dbl>    <dbl>     <dbl> <dbl>
##  1 Abilene  2015     7   268 45845730 148700      986       5   2016.
##  2 Abilene  2015     6   260 41396230 141500      965       5   2015.
##  3 Abilene  2014     7   231 35861350 145800     1033       5.8 2014.
##  4 Abilene  2014     6   230 34398506 135200     1023       5.8 2014.
##  5 Abilene  2013     7   218 32547446 140000      969       5.4 2014.
##  6 Abilene  2013     5   225 31901380 130000      923       5.3 2013.
##  7 Abilene  2015     3   198 31869437 136800      821       4.4 2015.
##  8 Abilene  2012     8   220 31687638 130600     1132       7.1 2013.
##  9 Abilene  2015     5   199 31385757 144700      919       4.8 2015.
## 10 Abilene  2014     9   201 30904319 135900     1012       5.8 2015.
## # … with 8,592 more rows

9. Create a dataset that contains only the city, sales, and date variables for Dallas and Houston, and use it to create a line plot of sales versus date with lines colored for each city.

txhousing %>%
  select(city, sales, date) %>%
  filter(city == "Dallas" | city == "Houston") %>%
#  filter(city %in% c("Dallas", "Houston")) %>%  # equivalent filter
  ggplot() +
    geom_line(mapping = aes(x = date, y = sales, color = city))