# ref: Express Intro to dplyr
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(readr)
url <- "http://steviep42.bitbucket.org/YOUTUBE.DIR/weather.csv"
download.file(url,"weather.csv")
system("head -5 weather.csv")
## Warning: running command 'head -5 weather.csv' had status 127
weather <- read_csv("weather.csv")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   date = col_character(),
##   precipitation_inches = col_character(),
##   events = col_character(),
##   zip_code = col_integer()
## )
## See spec(...) for full column specifications.
weather
## # A tibble: 3,665 × 24
##         date max_temperature_f mean_temperature_f min_temperature_f
##        <chr>             <dbl>              <dbl>             <dbl>
## 1  8/29/2013                74                 68                61
## 2  8/30/2013                78                 69                60
## 3  8/31/2013                71                 64                57
## 4   9/1/2013                74                 66                58
## 5   9/2/2013                75                 69                62
## 6   9/3/2013                73                 67                60
## 7   9/4/2013                74                 68                61
## 8   9/5/2013                72                 66                60
## 9   9/6/2013                85                 71                56
## 10  9/7/2013                88                 73                58
## # ... with 3,655 more rows, and 20 more variables: max_dew_point_f <dbl>,
## #   mean_dew_point_f <dbl>, min_dew_point_f <dbl>, max_humidity <dbl>,
## #   mean_humidity <dbl>, min_humidity <dbl>,
## #   max_sea_level_pressure_inches <dbl>,
## #   mean_sea_level_pressure_inches <dbl>,
## #   min_sea_level_pressure_inches <dbl>, max_visibility_miles <dbl>,
## #   mean_visibility_miles <dbl>, min_visibility_miles <dbl>,
## #   max_wind_Speed_mph <dbl>, mean_wind_speed_mph <dbl>,
## #   max_gust_speed_mph <dbl>, precipitation_inches <chr>,
## #   cloud_cover <dbl>, events <chr>, wind_dir_degrees <dbl>,
## #   zip_code <int>
select(weather,precipitation_inches, max_temperature_f,zip_code)
## # A tibble: 3,665 × 3
##    precipitation_inches max_temperature_f zip_code
##                   <chr>             <dbl>    <int>
## 1                     0                74    94107
## 2                     0                78    94107
## 3                     0                71    94107
## 4                     0                74    94107
## 5                     0                75    94107
## 6                     0                73    94107
## 7                     0                74    94107
## 8                     0                72    94107
## 9                     0                85    94107
## 10                    0                88    94107
## # ... with 3,655 more rows
dp_mtcars <- tbl_df(mtcars)
dp_mtcars
## # A tibble: 32 × 11
##      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
## *  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1   21.0     6 160.0   110  3.90 2.620 16.46     0     1     4     4
## 2   21.0     6 160.0   110  3.90 2.875 17.02     0     1     4     4
## 3   22.8     4 108.0    93  3.85 2.320 18.61     1     1     4     1
## 4   21.4     6 258.0   110  3.08 3.215 19.44     1     0     3     1
## 5   18.7     8 360.0   175  3.15 3.440 17.02     0     0     3     2
## 6   18.1     6 225.0   105  2.76 3.460 20.22     1     0     3     1
## 7   14.3     8 360.0   245  3.21 3.570 15.84     0     0     3     4
## 8   24.4     4 146.7    62  3.69 3.190 20.00     1     0     4     2
## 9   22.8     4 140.8    95  3.92 3.150 22.90     1     0     4     2
## 10  19.2     6 167.6   123  3.92 3.440 18.30     1     0     4     4
## # ... with 22 more rows
dp_mtcars.m<-mutate(dp_mtcars,wt=wt*1000, good_mpg=ifelse(mpg>"25","good","bad"))
tail(dp_mtcars.m)
## # A tibble: 6 × 12
##     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
##   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1  26.0     4 120.3    91  4.43  2140  16.7     0     1     5     2
## 2  30.4     4  95.1   113  3.77  1513  16.9     1     1     5     2
## 3  15.8     8 351.0   264  4.22  3170  14.5     0     1     5     4
## 4  19.7     6 145.0   175  3.62  2770  15.5     0     1     5     6
## 5  15.0     8 301.0   335  3.54  3570  14.6     0     1     5     8
## 6  21.4     4 121.0   109  4.11  2780  18.6     1     1     4     2
## # ... with 1 more variables: good_mpg <chr>
table(dp_mtcars.m$good_mpg)
## 
##  bad good 
##   26    6
filter(dp_mtcars,mpg >=30 | wt>1500)
## # A tibble: 4 × 11
##     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
##   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1  32.4     4  78.7    66  4.08 2.200 19.47     1     1     4     1
## 2  30.4     4  75.7    52  4.93 1.615 18.52     1     1     4     2
## 3  33.9     4  71.1    65  4.22 1.835 19.90     1     1     4     1
## 4  30.4     4  95.1   113  3.77 1.513 16.90     1     1     5     2
filter(dp_mtcars.m,mpg >= 30 & wt>1.500)
## # A tibble: 4 × 12
##     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
##   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1  32.4     4  78.7    66  4.08  2200 19.47     1     1     4     1
## 2  30.4     4  75.7    52  4.93  1615 18.52     1     1     4     2
## 3  33.9     4  71.1    65  4.22  1835 19.90     1     1     4     1
## 4  30.4     4  95.1   113  3.77  1513 16.90     1     1     5     2
## # ... with 1 more variables: good_mpg <chr>
arrange(dp_mtcars.m,desc(wt))
## # A tibble: 32 × 12
##      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
##    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1   10.4     8 460.0   215  3.00  5424 17.82     0     0     3     4
## 2   14.7     8 440.0   230  3.23  5345 17.42     0     0     3     4
## 3   10.4     8 472.0   205  2.93  5250 17.98     0     0     3     4
## 4   16.4     8 275.8   180  3.07  4070 17.40     0     0     3     3
## 5   19.2     8 400.0   175  3.08  3845 17.05     0     0     3     2
## 6   13.3     8 350.0   245  3.73  3840 15.41     0     0     3     4
## 7   15.2     8 275.8   180  3.07  3780 18.00     0     0     3     3
## 8   17.3     8 275.8   180  3.07  3730 17.60     0     0     3     3
## 9   14.3     8 360.0   245  3.21  3570 15.84     0     0     3     4
## 10  15.0     8 301.0   335  3.54  3570 14.60     0     1     5     8
## # ... with 22 more rows, and 1 more variables: good_mpg <chr>
dp_mtcars%>%
  mutate(cyl=factor(cyl,levels=c(4,6,8)),
         am=factor(am,labels=c("Auto","Manual")))
## # A tibble: 32 × 11
##      mpg    cyl  disp    hp  drat    wt  qsec    vs     am  gear  carb
##    <dbl> <fctr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <fctr> <dbl> <dbl>
## 1   21.0      6 160.0   110  3.90 2.620 16.46     0 Manual     4     4
## 2   21.0      6 160.0   110  3.90 2.875 17.02     0 Manual     4     4
## 3   22.8      4 108.0    93  3.85 2.320 18.61     1 Manual     4     1
## 4   21.4      6 258.0   110  3.08 3.215 19.44     1   Auto     3     1
## 5   18.7      8 360.0   175  3.15 3.440 17.02     0   Auto     3     2
## 6   18.1      6 225.0   105  2.76 3.460 20.22     1   Auto     3     1
## 7   14.3      8 360.0   245  3.21 3.570 15.84     0   Auto     3     4
## 8   24.4      4 146.7    62  3.69 3.190 20.00     1   Auto     4     2
## 9   22.8      4 140.8    95  3.92 3.150 22.90     1   Auto     4     2
## 10  19.2      6 167.6   123  3.92 3.440 18.30     1   Auto     4     4
## # ... with 22 more rows
dp_mtcars%>%
  mutate(cyl=factor(cyl,levels=c(4,6,8)),
         am=factor(am,labels=c("Auto","Manual")))%>%
  ggplot(aes(x=wt,y=mpg,color=cyl))+geom_point()+facet_wrap(~am)+
  xlab("Weigth of the car (1000lbs)")+
  ylab("Miles per Gallon")+
  ggtitle("Fuel Economy as a function of Weigh,Transmision type
           and Cylinders")