4) Saving to an r markdown file, posting to rpubs, posting to github.
#install.packages("rio")
#install.packages("RCurl")
#install.packages("bitops")
#install.packages("ggplot2")
#install.packages("tidyr")
#install.packages("dplyr")
library(rio)
library(RCurl)
## Loading required package: bitops
library(bitops)
library(ggplot2)
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:RCurl':
##
## complete
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#Load the data
x <- getURL("https://raw.githubusercontent.com/excelsiordata/DATA607/master/FlightDelays.csv")
FlightDelays <- read.csv(text = x, head=TRUE, sep=",", stringsAsFactors=FALSE, col.names = c("Airline", "Status", "Los.Angeles", "Phoenix", "San.Diego", "San.Francisco", "Seattle"))
#Take a look at the data and make sure everything loaded in properly
head(FlightDelays)
## Airline Status Los.Angeles Phoenix San.Diego San.Francisco Seattle
## 1 ALASKA on time 497 221 212 503 1841
## 2 ALASKA delayed 62 12 20 102 305
## 3 AM WEST on time 694 4840 383 320 201
## 4 AM WEST delayed 117 415 65 129 61
## 5 Allegiant on time 658 425 352 645 648
## 6 Allegiant delayed 100 150 167 20 40
#Looks good!
#Let's get the separate city columns into rows
TidyFlightDelays1 <- data.frame(gather(FlightDelays, "City", "n", 3:7))
(TidyFlightDelays1)
## Airline Status City n
## 1 ALASKA on time Los.Angeles 497
## 2 ALASKA delayed Los.Angeles 62
## 3 AM WEST on time Los.Angeles 694
## 4 AM WEST delayed Los.Angeles 117
## 5 Allegiant on time Los.Angeles 658
## 6 Allegiant delayed Los.Angeles 100
## 7 Southwest on time Los.Angeles 700
## 8 Southwest delayed Los.Angeles 86
## 9 ALASKA on time Phoenix 221
## 10 ALASKA delayed Phoenix 12
## 11 AM WEST on time Phoenix 4840
## 12 AM WEST delayed Phoenix 415
## 13 Allegiant on time Phoenix 425
## 14 Allegiant delayed Phoenix 150
## 15 Southwest on time Phoenix 640
## 16 Southwest delayed Phoenix 77
## 17 ALASKA on time San.Diego 212
## 18 ALASKA delayed San.Diego 20
## 19 AM WEST on time San.Diego 383
## 20 AM WEST delayed San.Diego 65
## 21 Allegiant on time San.Diego 352
## 22 Allegiant delayed San.Diego 167
## 23 Southwest on time San.Diego 395
## 24 Southwest delayed San.Diego 115
## 25 ALASKA on time San.Francisco 503
## 26 ALASKA delayed San.Francisco 102
## 27 AM WEST on time San.Francisco 320
## 28 AM WEST delayed San.Francisco 129
## 29 Allegiant on time San.Francisco 645
## 30 Allegiant delayed San.Francisco 20
## 31 Southwest on time San.Francisco 452
## 32 Southwest delayed San.Francisco 103
## 33 ALASKA on time Seattle 1841
## 34 ALASKA delayed Seattle 305
## 35 AM WEST on time Seattle 201
## 36 AM WEST delayed Seattle 61
## 37 Allegiant on time Seattle 648
## 38 Allegiant delayed Seattle 40
## 39 Southwest on time Seattle 521
## 40 Southwest delayed Seattle 89
tbl_df(TidyFlightDelays1)
## # A tibble: 40 × 4
## Airline Status City n
## <chr> <chr> <chr> <int>
## 1 ALASKA on time Los.Angeles 497
## 2 ALASKA delayed Los.Angeles 62
## 3 AM WEST on time Los.Angeles 694
## 4 AM WEST delayed Los.Angeles 117
## 5 Allegiant on time Los.Angeles 658
## 6 Allegiant delayed Los.Angeles 100
## 7 Southwest on time Los.Angeles 700
## 8 Southwest delayed Los.Angeles 86
## 9 ALASKA on time Phoenix 221
## 10 ALASKA delayed Phoenix 12
## # ... with 30 more rows
glimpse(TidyFlightDelays1)
## Observations: 40
## Variables: 4
## $ Airline <chr> "ALASKA", "ALASKA", "AM WEST", "AM WEST", "Allegiant",...
## $ Status <chr> "on time", "delayed", "on time", "delayed", "on time",...
## $ City <chr> "Los.Angeles", "Los.Angeles", "Los.Angeles", "Los.Ange...
## $ n <int> 497, 62, 694, 117, 658, 100, 700, 86, 221, 12, 4840, 4...
View(TidyFlightDelays1)
#Let's split on time/delayed out into their own columns
TidyFlightDelays2 <- data.frame(spread(TidyFlightDelays1, "Status", "n", 2:2))
(TidyFlightDelays2)
## Airline City delayed on.time
## 1 ALASKA Los.Angeles 62 497
## 2 ALASKA Phoenix 12 221
## 3 ALASKA San.Diego 20 212
## 4 ALASKA San.Francisco 102 503
## 5 ALASKA Seattle 305 1841
## 6 Allegiant Los.Angeles 100 658
## 7 Allegiant Phoenix 150 425
## 8 Allegiant San.Diego 167 352
## 9 Allegiant San.Francisco 20 645
## 10 Allegiant Seattle 40 648
## 11 AM WEST Los.Angeles 117 694
## 12 AM WEST Phoenix 415 4840
## 13 AM WEST San.Diego 65 383
## 14 AM WEST San.Francisco 129 320
## 15 AM WEST Seattle 61 201
## 16 Southwest Los.Angeles 86 700
## 17 Southwest Phoenix 77 640
## 18 Southwest San.Diego 115 395
## 19 Southwest San.Francisco 103 452
## 20 Southwest Seattle 89 521
tbl_df(TidyFlightDelays2)
## # A tibble: 20 × 4
## Airline City delayed on.time
## * <chr> <chr> <int> <int>
## 1 ALASKA Los.Angeles 62 497
## 2 ALASKA Phoenix 12 221
## 3 ALASKA San.Diego 20 212
## 4 ALASKA San.Francisco 102 503
## 5 ALASKA Seattle 305 1841
## 6 Allegiant Los.Angeles 100 658
## 7 Allegiant Phoenix 150 425
## 8 Allegiant San.Diego 167 352
## 9 Allegiant San.Francisco 20 645
## 10 Allegiant Seattle 40 648
## 11 AM WEST Los.Angeles 117 694
## 12 AM WEST Phoenix 415 4840
## 13 AM WEST San.Diego 65 383
## 14 AM WEST San.Francisco 129 320
## 15 AM WEST Seattle 61 201
## 16 Southwest Los.Angeles 86 700
## 17 Southwest Phoenix 77 640
## 18 Southwest San.Diego 115 395
## 19 Southwest San.Francisco 103 452
## 20 Southwest Seattle 89 521
glimpse(TidyFlightDelays2)
## Observations: 20
## Variables: 4
## $ Airline <chr> "ALASKA", "ALASKA", "ALASKA", "ALASKA", "ALASKA", "All...
## $ City <chr> "Los.Angeles", "Phoenix", "San.Diego", "San.Francisco"...
## $ delayed <int> 62, 12, 20, 102, 305, 100, 150, 167, 20, 40, 117, 415,...
## $ on.time <int> 497, 221, 212, 503, 1841, 658, 425, 352, 645, 648, 694...
View(TidyFlightDelays2)