Due Date: September 17, 2015 Total Points: 46
1 The following ten observations, taken during the years 1970-1979, are on October snow cover for Eurasia in units of millions of square kilometers. Follow the instructions and answer questions by typing the appropriate R commands.
Year Snow 1970 6.5 1971 12.0 1972 14.9 1973 10.0 1974 10.7 1975 7.9 1976 21.9 1977 12.5 1978 14.5 1979 9.2
year = c(1970:1979)
snow = c(6.5,12.0,14.9,10.0,10.7,7.9,21.9,12.5,14.5,9.2)
oct.snow = data.frame (year, snow)
mean(oct.snow$snow)
## [1] 12.01
median(oct.snow$snow)
## [1] 11.35
sd(oct.snow$snow)
## [1] 4.390761
length(which(oct.snow$snow > 10))
## [1] 6
2 The data set rivers contains the lengths (miles) of 141 major rivers in North America.
length(which(rivers < 500))/length(rivers)
## [1] 0.5815603
length(which(rivers < mean (rivers)))/length(rivers)
## [1] 0.6666667
quantile(sort(rivers), pros = 0.75)
## 0% 25% 50% 75% 100%
## 135 310 425 680 3710
IQR(rivers)
## [1] 370
3 The dataset hflights from the hflights package contains all 227,496 flights that departed Houston in 2011. Using the functions in the dplyr package
library (dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library (hflights)
hflights.df <-tbl_df(hflights)
hflights_filtered <- filter(hflights.df, Month == 9, DayofMonth ==11)
hflights_filtered
## Source: local data frame [602 x 21]
##
## Year Month DayofMonth DayOfWeek DepTime ArrTime UniqueCarrier
## (int) (int) (int) (int) (int) (int) (chr)
## 1 2011 9 11 7 1546 1651 AA
## 2 2011 9 11 7 551 904 AA
## 3 2011 9 11 7 1936 2036 AA
## 4 2011 9 11 7 1438 1544 AA
## 5 2011 9 11 7 1720 2030 AA
## 6 2011 9 11 7 1142 1258 AA
## 7 2011 9 11 7 1006 1333 AA
## 8 2011 9 11 7 712 815 AA
## 9 2011 9 11 7 858 1003 AA
## 10 2011 9 11 7 1833 2051 AS
## .. ... ... ... ... ... ... ...
## Variables not shown: FlightNum (int), TailNum (chr), ActualElapsedTime
## (int), AirTime (int), ArrDelay (int), DepDelay (int), Origin (chr), Dest
## (chr), Distance (int), TaxiIn (int), TaxiOut (int), Cancelled (int),
## CancellationCode (chr), Diverted (int)
dim(hflights_filtered)
## [1] 602 21
4 Consider the SSN.txt file from http://myweb.fsu.edu/jelsner/data/SSN.txt. The file contains monthly sunspot numbers for since 1851.
con = "http://myweb.fsu.edu/jelsner/data/SSN.txt"
SSN = read.table(con, na.string = "-9.900", header = TRUE)
library (ggplot2)
ggplot(SSN, aes(Sep)) +
geom_histogram(fill = "green") +
xlab("Sun Spot") +
theme_gray()
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
boxplot(SSN$Jun, ylab = "Sun Spot" )
f = fivenum(SSN$Jun)
text(rep(1.3, 5), f, labels = c("Minimum","Maximum"))
ggplot(SSN, aes(x = Jun, y = Sep)) + geom_point() + xlab("June") +
ylab("Sun Spots")