1 Set working directory

First things first, always set the working directory before you start a new project.The working directory is the file folder on your file system that R can read and write files from.

Let’s set up the working directory to be the one where the current .Rmd is. Use install.package("here") if you don’t have it in your library.

library(here)

In a more classic framework, you would specify the file path this way:

# setwd("/Users/XXXX/XXX.../PLBZOOL558/labs/01")

2 Load the packages

Don’t forget to load the packages you will need to complete this task. A good practice is to have them all at the beginning of your document.

Tidyverse:

rent.US <- read_csv("price2.csv")

## Rows: 12918 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): City, Metro, County, State
## dbl (4): Population.Rank, Jan.16, May.16, Sep.16
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

class(rent.US)

## [1] "spec_tbl_df" "tbl_df"      "tbl"         "data.frame"

Tidyverse:

glimpse(rent.US)

## Rows: 12,918
## Columns: 8
## $ City            <chr> "New York", "Los Angeles", "Chicago", "Houston", "Phil…
## $ Metro           <chr> "New York", "Los Angeles", "Chicago", "Houston", "Phil…
## $ County          <chr> "Queens", "Los Angeles", "Cook", "Harris", "Philadelph…
## $ State           <chr> "NY", "CA", "IL", "TX", "PA", "AZ", "NV", "TX", "CA", …
## $ Population.Rank <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,…
## $ Jan.16          <dbl> 2335, 2596, 1668, 1436, 1196, 1198, 1204, 1230, 2360, …
## $ May.16          <dbl> 2339, 2662, 1686, 1446, 1211, 1236, 1225, 1245, 2428, …
## $ Sep.16          <dbl> 2324, 2723, 1675, 1438, 1220, 1238, 1228, 1234, 2442, …

Exercise 1

Pick any 3 columns one at a time from the rent.US data.

rent.US %>% select(City)

## # A tibble: 12,918 × 1
##    City        
##    <chr>       
##  1 New York    
##  2 Los Angeles 
##  3 Chicago     
##  4 Houston     
##  5 Philadelphia
##  6 Phoenix     
##  7 Las Vegas   
##  8 San Antonio 
##  9 San Diego   
## 10 Dallas      
## # ℹ 12,908 more rows

rent.US %>% select(Metro)

## # A tibble: 12,918 × 1
##    Metro            
##    <chr>            
##  1 New York         
##  2 Los Angeles      
##  3 Chicago          
##  4 Houston          
##  5 Philadelphia     
##  6 Phoenix          
##  7 Las Vegas        
##  8 San Antonio      
##  9 San Diego        
## 10 Dallas-Fort Worth
## # ℹ 12,908 more rows

rent.US %>% select(County)

## # A tibble: 12,918 × 1
##    County      
##    <chr>       
##  1 Queens      
##  2 Los Angeles 
##  3 Cook        
##  4 Harris      
##  5 Philadelphia
##  6 Maricopa    
##  7 Clark       
##  8 Bexar       
##  9 San Diego   
## 10 Dallas      
## # ℹ 12,908 more rows

Pick any 3 rows one at a time from the rent.us data.

rent.US %>% slice(1)

## # A tibble: 1 × 8
##   City     Metro    County State Population.Rank Jan.16 May.16 Sep.16
##   <chr>    <chr>    <chr>  <chr>           <dbl>  <dbl>  <dbl>  <dbl>
## 1 New York New York Queens NY                  1   2335   2339   2324

rent.US %>% slice(2)

## # A tibble: 1 × 8
##   City        Metro       County      State Population.Rank Jan.16 May.16 Sep.16
##   <chr>       <chr>       <chr>       <chr>           <dbl>  <dbl>  <dbl>  <dbl>
## 1 Los Angeles Los Angeles Los Angeles CA                  2   2596   2662   2723

rent.US %>% slice(3)

## # A tibble: 1 × 8
##   City    Metro   County State Population.Rank Jan.16 May.16 Sep.16
##   <chr>   <chr>   <chr>  <chr>           <dbl>  <dbl>  <dbl>  <dbl>
## 1 Chicago Chicago Cook   IL                  3   1668   1686   1675

Select any 3 columns all at once.

rent.US %>% select(City, Metro, County)

## # A tibble: 12,918 × 3
##    City         Metro             County      
##    <chr>        <chr>             <chr>       
##  1 New York     New York          Queens      
##  2 Los Angeles  Los Angeles       Los Angeles 
##  3 Chicago      Chicago           Cook        
##  4 Houston      Houston           Harris      
##  5 Philadelphia Philadelphia      Philadelphia
##  6 Phoenix      Phoenix           Maricopa    
##  7 Las Vegas    Las Vegas         Clark       
##  8 San Antonio  San Antonio       Bexar       
##  9 San Diego    San Diego         San Diego   
## 10 Dallas       Dallas-Fort Worth Dallas      
## # ℹ 12,908 more rows

Select any 3 rows all at once.

rent.US %>% slice(1,2,3)

## # A tibble: 3 × 8
##   City        Metro       County      State Population.Rank Jan.16 May.16 Sep.16
##   <chr>       <chr>       <chr>       <chr>           <dbl>  <dbl>  <dbl>  <dbl>
## 1 New York    New York    Queens      NY                  1   2335   2339   2324
## 2 Los Angeles Los Angeles Los Angeles CA                  2   2596   2662   2723
## 3 Chicago     Chicago     Cook        IL                  3   1668   1686   1675

Select an individual row and an individual column at the same time.

rent.US %>% select(City)

## # A tibble: 12,918 × 1
##    City        
##    <chr>       
##  1 New York    
##  2 Los Angeles 
##  3 Chicago     
##  4 Houston     
##  5 Philadelphia
##  6 Phoenix     
##  7 Las Vegas   
##  8 San Antonio 
##  9 San Diego   
## 10 Dallas      
## # ℹ 12,908 more rows

rent.US %>% slice(1)

## # A tibble: 1 × 8
##   City     Metro    County State Population.Rank Jan.16 May.16 Sep.16
##   <chr>    <chr>    <chr>  <chr>           <dbl>  <dbl>  <dbl>  <dbl>
## 1 New York New York Queens NY                  1   2335   2339   2324

Now try selecting 3 rows and 2 columns at once.

rent.US %>% select(City, Metro, County)

## # A tibble: 12,918 × 3
##    City         Metro             County      
##    <chr>        <chr>             <chr>       
##  1 New York     New York          Queens      
##  2 Los Angeles  Los Angeles       Los Angeles 
##  3 Chicago      Chicago           Cook        
##  4 Houston      Houston           Harris      
##  5 Philadelphia Philadelphia      Philadelphia
##  6 Phoenix      Phoenix           Maricopa    
##  7 Las Vegas    Las Vegas         Clark       
##  8 San Antonio  San Antonio       Bexar       
##  9 San Diego    San Diego         San Diego   
## 10 Dallas       Dallas-Fort Worth Dallas      
## # ℹ 12,908 more rows

rent.US %>% slice(1,2,3)

## # A tibble: 3 × 8
##   City        Metro       County      State Population.Rank Jan.16 May.16 Sep.16
##   <chr>       <chr>       <chr>       <chr>           <dbl>  <dbl>  <dbl>  <dbl>
## 1 New York    New York    Queens      NY                  1   2335   2339   2324
## 2 Los Angeles Los Angeles Los Angeles CA                  2   2596   2662   2723
## 3 Chicago     Chicago     Cook        IL                  3   1668   1686   1675

Optional Challenge Create a copy of the rent.US data frame (assign it to a new variable name) and try changing the column names using the rename() function. You can use ?rename() to get help on how to use it if needed.

Exercise 2

Sort rent.US from least to greatest rent from May 2016. Save this sorted data frame as a new object called rent_May_ord.

rent.US %>% 
  arrange(Jan.16)

## # A tibble: 12,918 × 8
##    City               Metro    County State Population.Rank Jan.16 May.16 Sep.16
##    <chr>              <chr>    <chr>  <chr>           <dbl>  <dbl>  <dbl>  <dbl>
##  1 Beecher            Flint    Genes… MI               4084    516    548    532
##  2 Warren             Youngst… Trumb… OH               1457    541    592    614
##  3 Flint              Flint    Genes… MI                224    545    558    546
##  4 Leavittsburg       Youngst… Trumb… OH              12261    547    588    633
##  5 Youngstown         Youngst… Mahon… OH                614    550    572    547
##  6 West End-Cobb Town Anniston Calho… AL               8194    579    552    552
##  7 Mount Morris       Flint    Genes… MI              10104    586    599    553
##  8 Muskegon Heights   Muskegon Muske… MI               3843    594    622    630
##  9 Princeville        Rocky M… Edgec… NC              10550    601    708    720
## 10 Springfield        Springf… Clark  OH                778    606    625    625
## # ℹ 12,908 more rows

Now sort rent_May_ord from greatest to least Population.Rank and save this as a new object called rent_pop_rev.

rent.US %>% 
  arrange(desc(Jan.16))

## # A tibble: 12,918 × 8
##    City                  Metro County State Population.Rank Jan.16 May.16 Sep.16
##    <chr>                 <chr> <chr>  <chr>           <dbl>  <dbl>  <dbl>  <dbl>
##  1 Jupiter Island        Port… Martin FL              12235  22113  20547  20615
##  2 Atherton              San … San M… CA               5152  16955  17705  17059
##  3 Fisher Island         Miam… Miami… FL              12781  14098  13836  15007
##  4 Belvedere             San … Marin  CA               9490  12320  12614  12580
##  5 Hidden Hills          Los … Los A… CA              11053  12168  12389  12734
##  6 Rolling Hills         Los … Los A… CA              10254  11985  11918  11906
##  7 Hillsborough          San … San M… CA               3848  11465  11961  11405
##  8 Westlake              Dall… Tarra… TX               8437  11164  10704  11199
##  9 Hunters Creek Village Hous… Harris TX               6768  11121  10703  10279
## 10 Los Altos Hills       San … Santa… CA               4929  11096  11326  10632
## # ℹ 12,908 more rows

Exercise 3

Identify the top 3 most expensive cities in Illinois (using September 2016 rent prices).

rent.IL <- rent.US %>% 
  filter(State == "IL")
glimpse(rent.IL)

## Rows: 559
## Columns: 8
## $ City            <chr> "Chicago", "Rockford", "Aurora", "Naperville", "Joliet…
## $ Metro           <chr> "Chicago", "Rockford", "Chicago", "Chicago", "Chicago"…
## $ County          <chr> "Cook", "Winnebago", "Kane", "Du Page", "Will", "Peori…
## $ State           <chr> "IL", "IL", "IL", "IL", "IL", "IL", "IL", "IL", "IL", …
## $ Population.Rank <dbl> 3, 136, 153, 177, 245, 282, 381, 385, 403, 425, 490, 5…
## $ Jan.16          <dbl> 1668, 898, 1547, 2337, 1385, 869, 1102, 1578, 834, 139…
## $ May.16          <dbl> 1686, 926, 1545, 2318, 1391, 909, 1172, 1597, 868, 141…
## $ Sep.16          <dbl> 1675, 894, 1526, 2313, 1381, 826, 1124, 1554, 825, 140…

Subset the data to include cities in CA, OR and WA that have rent less than 1500 on January 2016.

rent.IMT.1500 <- subset(rent.US, 
                       State %in% c('CA','OR','WA') | Jan.16<1500)
head(rent.IMT.1500)

## # A tibble: 6 × 8
##   City         Metro        County    State Population.Rank Jan.16 May.16 Sep.16
##   <chr>        <chr>        <chr>     <chr>           <dbl>  <dbl>  <dbl>  <dbl>
## 1 Los Angeles  Los Angeles  Los Ange… CA                  2   2596   2662   2723
## 2 Houston      Houston      Harris    TX                  4   1436   1446   1438
## 3 Philadelphia Philadelphia Philadel… PA                  5   1196   1211   1220
## 4 Phoenix      Phoenix      Maricopa  AZ                  6   1198   1236   1238
## 5 Las Vegas    Las Vegas    Clark     NV                  7   1204   1225   1228
## 6 San Antonio  San Antonio  Bexar     TX                  8   1230   1245   1234

dim(rent.IMT.1500)

## [1] 9673    8

Subset the data to include cities in PA or cities that have rent less than 1000 on September 2016.

rent.IMT.1000 <- subset(rent.US, 
                       State %in% c('CA','OR','WA') | Sep.16<1000, 
                       select=c(City, State, Sep.16))
head(rent.IMT.1000)

## # A tibble: 6 × 3
##   City          State Sep.16
##   <chr>         <chr>  <dbl>
## 1 Los Angeles   CA      2723
## 2 San Diego     CA      2442
## 3 San Jose      CA      3363
## 4 San Francisco CA      4488
## 5 Detroit       MI       746
## 6 Memphis       TN       846

dim(rent.IMT.1000)

## [1] 3980    3

Exercise 4

Create a new vector that has cat, dog, cow, bird and name it animals. Next create a new vector that has meow, woof, moo, chirp and name it sounds.

vec1 <- c('cat', 'dog', 'cow', 'bird')
vec2 <- c('meow', 'woof', 'moo', 'chrip')

Join these two vectors by stacking them on top of one another.

rbind(vec1,vec2)

##      [,1]   [,2]   [,3]  [,4]   
## vec1 "cat"  "dog"  "cow" "bird" 
## vec2 "meow" "woof" "moo" "chrip"

Now join them by placing them next to each other as columns.

cbind (vec1, vec2)

##      vec1   vec2   
## [1,] "cat"  "meow" 
## [2,] "dog"  "woof" 
## [3,] "cow"  "moo"  
## [4,] "bird" "chrip"

Create a third vector that has the numbers 1, 2, 3, 4 and name this numbers. Join numbers with animals.

## data frame
toydata.df <- data.frame(cbind(vec1, vec2))
toydata.df

##   vec1  vec2
## 1  cat  meow
## 2  dog  woof
## 3  cow   moo
## 4 bird chrip

PLB/ZOOL 558 Advanced Biostatistics

Original version written by Meha Jain, Oscar Feng-Hsun Chang & Arthur Endsley. Now modified by Madison Schmidt.

Week 1

1 Set working directory

2 Load the packages