library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(hflights)
library(knitr)
Due Date: October 14, 2022 Total Points: 32
1 The following ten observations, taken during the years 1970-1979, are on October snow cover for Eurasia in units of millions of square kilometers. Follow the instructions and answer the questions by typing the appropriate commands.
Year Snow 1970 6.5 1971 12.0 1972 14.9 1973 10.0 1974 10.7 1975 7.9 1976 21.9 1977 12.5 1978 14.5 1979 9.2
Year <- c(1970:1979)
Snow <- c(6.5, 12.0, 14.9, 10.0, 10.7, 7.9, 21.9, 12.5, 14.5, 9.2)
Oct <- data.frame(Year,Snow)
head(Oct)
## Year Snow
## 1 1970 6.5
## 2 1971 12.0
## 3 1972 14.9
## 4 1973 10.0
## 5 1974 10.7
## 6 1975 7.9
Oct_mean <- mean(Snow)
Oct_med <- median(Snow)
Oct_mean
## [1] 12.01
Oct_med
## [1] 11.35
Snow_sd <- sd(Snow)
Snow_sd
## [1] 4.390761
ten_milly <- filter(Oct, Snow >= 10.0)
count(ten_milly)
## n
## 1 7
2 The data vector rivers contains the lengths (miles) of 141 major rivers in North America.
length(which(rivers < 500)) / length(rivers)
## [1] 0.5815603
small_riv <- rivers < mean(rivers)
sm_riv_prop <- (length(which(small_riv))) / (length(rivers))
sm_riv_prop
## [1] 0.6666667
## The small_riv variable is a subset of rivers shorter than the mean river length from the original data vector 'rivers'.
quantile(rivers, probs = 0.75)
## 75%
## 680
IQR(rivers)
## [1] 370
3 The dataset hflights from the hflights package contains all 227,496 flights that departed Houston in 2011. Using the functions in the dplyr package
head(hflights)
## Year Month DayofMonth DayOfWeek DepTime ArrTime UniqueCarrier FlightNum
## 5424 2011 1 1 6 1400 1500 AA 428
## 5425 2011 1 2 7 1401 1501 AA 428
## 5426 2011 1 3 1 1352 1502 AA 428
## 5427 2011 1 4 2 1403 1513 AA 428
## 5428 2011 1 5 3 1405 1507 AA 428
## 5429 2011 1 6 4 1359 1503 AA 428
## TailNum ActualElapsedTime AirTime ArrDelay DepDelay Origin Dest Distance
## 5424 N576AA 60 40 -10 0 IAH DFW 224
## 5425 N557AA 60 45 -9 1 IAH DFW 224
## 5426 N541AA 70 48 -8 -8 IAH DFW 224
## 5427 N403AA 70 39 3 3 IAH DFW 224
## 5428 N492AA 62 44 -3 5 IAH DFW 224
## 5429 N262AA 64 45 -7 -1 IAH DFW 224
## TaxiIn TaxiOut Cancelled CancellationCode Diverted
## 5424 7 13 0 0
## 5425 6 9 0 0
## 5426 5 17 0 0
## 5427 9 22 0 0
## 5428 9 9 0 0
## 5429 6 13 0 0
sflights <-
hflights %>%
filter(Month == 9, DayofMonth == 11)
sflights_df <- data.frame(sflights)
head(sflights_df)
## Year Month DayofMonth DayOfWeek DepTime ArrTime UniqueCarrier FlightNum
## 1 2011 9 11 7 1546 1651 AA 458
## 2 2011 9 11 7 551 904 AA 466
## 3 2011 9 11 7 1936 2036 AA 657
## 4 2011 9 11 7 1438 1544 AA 742
## 5 2011 9 11 7 1720 2030 AA 1294
## 6 2011 9 11 7 1142 1258 AA 1848
## TailNum ActualElapsedTime AirTime ArrDelay DepDelay Origin Dest Distance
## 1 N559AA 65 40 -14 -4 IAH DFW 224
## 2 N3EGAA 133 115 -16 -9 IAH MIA 964
## 3 N498AA 60 40 -19 -4 IAH DFW 224
## 4 N470AA 66 43 9 18 IAH DFW 224
## 5 N3BVAA 130 118 -20 -5 IAH MIA 964
## 6 N598AA 76 40 -2 -3 IAH DFW 224
## TaxiIn TaxiOut Cancelled CancellationCode Diverted
## 1 12 13 0 0
## 2 5 13 0 0
## 3 8 12 0 0
## 4 6 17 0 0
## 5 5 7 0 0
## 6 22 14 0 0
length(sflights_df$FlightNum)
## [1] 602
dim(sflights_df)
## [1] 602 21
hs_flights <- sflights_df %>%
group_by(TailNum) %>%
summarize(flightsn = n()) %>%
arrange(desc(flightsn))
hs_flights
## # A tibble: 417 × 2
## TailNum flightsn
## <chr> <int>
## 1 N13935 4
## 2 N15941 4
## 3 N11109 3
## 4 N11181 3
## 5 N12934 3
## 6 N13929 3
## 7 N13936 3
## 8 N13970 3
## 9 N13992 3
## 10 N14174 3
## # … with 407 more rows
4 Using the tornado data set (Canvas - Tornadoes.txt) create a data frame with the year in the first column and the total number of tornadoes in Kansas by year in the second column. (6)
torn <- read.table("C:/GIS517/Tornadoes.txt", na.string = "-9.900", header = TRUE)
head(torn)
## OM YEAR MONTH DAY DATE TIME TIMEZONE STATE FIPS STATENUMBE FSCALE
## 1 1 1950 1 3 1950-01-03 1100 3 MO 29 1 3
## 2 2 1950 1 3 1950-01-03 1155 3 IL 17 2 3
## 3 3 1950 1 3 1950-01-03 1600 3 OH 39 1 1
## 4 4 1950 1 13 1950-01-13 525 3 AR 5 1 3
## 5 5 1950 1 25 1950-01-25 1930 3 MO 29 2 2
## 6 6 1950 1 25 1950-01-25 2100 3 IL 17 3 2
## INJURIES FATALITIES LOSS CROPLOSS SLAT SLON ELAT ELON LENGTH WIDTH NS
## 1 3 0 6 0 38.77 -90.22 38.83 -90.03 9.5 150 2
## 2 3 0 5 0 39.10 -89.30 39.12 -89.23 3.6 130 1
## 3 1 0 4 0 40.88 -84.58 0.00 0.00 0.1 10 1
## 4 1 1 3 0 34.40 -94.37 0.00 0.00 0.6 17 1
## 5 5 0 5 0 37.60 -90.68 37.63 -90.65 2.3 300 1
## 6 0 0 5 0 41.17 -87.33 0.00 0.00 0.1 100 1
## SN SG F1 F2 F3 F4
## 1 0 1 0 0 0 0
## 2 1 1 135 0 0 0
## 3 1 1 161 0 0 0
## 4 1 1 113 0 0 0
## 5 1 1 93 0 0 0
## 6 1 1 91 0 0 0
torn_ks <- torn %>%
filter(STATE == "KS") %>%
group_by(YEAR) %>%
summarize(torn = n())
torn_ks
## # A tibble: 62 × 2
## YEAR torn
## <int> <int>
## 1 1950 30
## 2 1951 77
## 3 1952 19
## 4 1953 29
## 5 1954 68
## 6 1955 96
## 7 1956 57
## 8 1957 63
## 9 1958 49
## 10 1959 65
## # … with 52 more rows