library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(hflights)
library(knitr)

Problem Set # 2

Due Date: October 14, 2022 Total Points: 32

1 The following ten observations, taken during the years 1970-1979, are on October snow cover for Eurasia in units of millions of square kilometers. Follow the instructions and answer the questions by typing the appropriate commands.

Year Snow 1970 6.5 1971 12.0 1972 14.9 1973 10.0 1974 10.7 1975 7.9 1976 21.9 1977 12.5 1978 14.5 1979 9.2

  1. Create a data frame from these data. (2)
Year <- c(1970:1979)
Snow <- c(6.5, 12.0, 14.9, 10.0, 10.7, 7.9, 21.9, 12.5, 14.5, 9.2)
Oct <- data.frame(Year,Snow)
head(Oct)
##   Year Snow
## 1 1970  6.5
## 2 1971 12.0
## 3 1972 14.9
## 4 1973 10.0
## 5 1974 10.7
## 6 1975  7.9
  1. What are the mean and median snow cover over this decade? (2)
Oct_mean <- mean(Snow)
Oct_med <- median(Snow)
Oct_mean
## [1] 12.01
Oct_med
## [1] 11.35
  1. What is the standard deviation of the snow cover over this decade? (2)
Snow_sd <- sd(Snow)
Snow_sd
## [1] 4.390761
  1. How many Octobers had snow cover greater than 10 million km\(^2\)? (2)
ten_milly <- filter(Oct, Snow >= 10.0)
count(ten_milly)
##   n
## 1 7

2 The data vector rivers contains the lengths (miles) of 141 major rivers in North America.

  1. What proportion of the rivers are shorter than 500 miles long? (2)
length(which(rivers < 500)) / length(rivers)
## [1] 0.5815603
  1. What proportion of the rivers are shorter than the mean length? (2)
small_riv <- rivers < mean(rivers)

sm_riv_prop <- (length(which(small_riv))) / (length(rivers))
sm_riv_prop
## [1] 0.6666667
## The small_riv variable is a subset of rivers shorter than the mean river length from the original data vector 'rivers'. 
  1. What is the 75th percentile river length? (2)
quantile(rivers, probs = 0.75)
## 75% 
## 680
  1. What is the interquartile range in river length? (2)
IQR(rivers)
## [1] 370

3 The dataset hflights from the hflights package contains all 227,496 flights that departed Houston in 2011. Using the functions in the dplyr package

head(hflights)
##      Year Month DayofMonth DayOfWeek DepTime ArrTime UniqueCarrier FlightNum
## 5424 2011     1          1         6    1400    1500            AA       428
## 5425 2011     1          2         7    1401    1501            AA       428
## 5426 2011     1          3         1    1352    1502            AA       428
## 5427 2011     1          4         2    1403    1513            AA       428
## 5428 2011     1          5         3    1405    1507            AA       428
## 5429 2011     1          6         4    1359    1503            AA       428
##      TailNum ActualElapsedTime AirTime ArrDelay DepDelay Origin Dest Distance
## 5424  N576AA                60      40      -10        0    IAH  DFW      224
## 5425  N557AA                60      45       -9        1    IAH  DFW      224
## 5426  N541AA                70      48       -8       -8    IAH  DFW      224
## 5427  N403AA                70      39        3        3    IAH  DFW      224
## 5428  N492AA                62      44       -3        5    IAH  DFW      224
## 5429  N262AA                64      45       -7       -1    IAH  DFW      224
##      TaxiIn TaxiOut Cancelled CancellationCode Diverted
## 5424      7      13         0                         0
## 5425      6       9         0                         0
## 5426      5      17         0                         0
## 5427      9      22         0                         0
## 5428      9       9         0                         0
## 5429      6      13         0                         0
  1. Create a data frame from hflights containing only those flights that departed on September 11th of that year. (4)
sflights <- 
  hflights %>%
  filter(Month == 9, DayofMonth == 11)

sflights_df <- data.frame(sflights)
head(sflights_df)
##   Year Month DayofMonth DayOfWeek DepTime ArrTime UniqueCarrier FlightNum
## 1 2011     9         11         7    1546    1651            AA       458
## 2 2011     9         11         7     551     904            AA       466
## 3 2011     9         11         7    1936    2036            AA       657
## 4 2011     9         11         7    1438    1544            AA       742
## 5 2011     9         11         7    1720    2030            AA      1294
## 6 2011     9         11         7    1142    1258            AA      1848
##   TailNum ActualElapsedTime AirTime ArrDelay DepDelay Origin Dest Distance
## 1  N559AA                65      40      -14       -4    IAH  DFW      224
## 2  N3EGAA               133     115      -16       -9    IAH  MIA      964
## 3  N498AA                60      40      -19       -4    IAH  DFW      224
## 4  N470AA                66      43        9       18    IAH  DFW      224
## 5  N3BVAA               130     118      -20       -5    IAH  MIA      964
## 6  N598AA                76      40       -2       -3    IAH  DFW      224
##   TaxiIn TaxiOut Cancelled CancellationCode Diverted
## 1     12      13         0                         0
## 2      5      13         0                         0
## 3      8      12         0                         0
## 4      6      17         0                         0
## 5      5       7         0                         0
## 6     22      14         0                         0
  1. How many flights departed on that day? (2)
length(sflights_df$FlightNum)
## [1] 602
dim(sflights_df)
## [1] 602  21
  1. Create a data frame with the first column being the tail number and the second being the number of departures from Houston the plane made that year sorted by most to least number of flights. (4)
hs_flights <- sflights_df %>%
 group_by(TailNum) %>%
  summarize(flightsn = n()) %>%
  arrange(desc(flightsn))
hs_flights
## # A tibble: 417 × 2
##    TailNum flightsn
##    <chr>      <int>
##  1 N13935         4
##  2 N15941         4
##  3 N11109         3
##  4 N11181         3
##  5 N12934         3
##  6 N13929         3
##  7 N13936         3
##  8 N13970         3
##  9 N13992         3
## 10 N14174         3
## # … with 407 more rows

4 Using the tornado data set (Canvas - Tornadoes.txt) create a data frame with the year in the first column and the total number of tornadoes in Kansas by year in the second column. (6)

torn <- read.table("C:/GIS517/Tornadoes.txt", na.string = "-9.900", header = TRUE)
head(torn)
##   OM YEAR MONTH DAY       DATE TIME TIMEZONE STATE FIPS STATENUMBE FSCALE
## 1  1 1950     1   3 1950-01-03 1100        3    MO   29          1      3
## 2  2 1950     1   3 1950-01-03 1155        3    IL   17          2      3
## 3  3 1950     1   3 1950-01-03 1600        3    OH   39          1      1
## 4  4 1950     1  13 1950-01-13  525        3    AR    5          1      3
## 5  5 1950     1  25 1950-01-25 1930        3    MO   29          2      2
## 6  6 1950     1  25 1950-01-25 2100        3    IL   17          3      2
##   INJURIES FATALITIES LOSS CROPLOSS  SLAT   SLON  ELAT   ELON LENGTH WIDTH NS
## 1        3          0    6        0 38.77 -90.22 38.83 -90.03    9.5   150  2
## 2        3          0    5        0 39.10 -89.30 39.12 -89.23    3.6   130  1
## 3        1          0    4        0 40.88 -84.58  0.00   0.00    0.1    10  1
## 4        1          1    3        0 34.40 -94.37  0.00   0.00    0.6    17  1
## 5        5          0    5        0 37.60 -90.68 37.63 -90.65    2.3   300  1
## 6        0          0    5        0 41.17 -87.33  0.00   0.00    0.1   100  1
##   SN SG  F1 F2 F3 F4
## 1  0  1   0  0  0  0
## 2  1  1 135  0  0  0
## 3  1  1 161  0  0  0
## 4  1  1 113  0  0  0
## 5  1  1  93  0  0  0
## 6  1  1  91  0  0  0
torn_ks <- torn %>%
  filter(STATE == "KS") %>%
  group_by(YEAR) %>%
  summarize(torn = n()) 

torn_ks
## # A tibble: 62 × 2
##     YEAR  torn
##    <int> <int>
##  1  1950    30
##  2  1951    77
##  3  1952    19
##  4  1953    29
##  5  1954    68
##  6  1955    96
##  7  1956    57
##  8  1957    63
##  9  1958    49
## 10  1959    65
## # … with 52 more rows