library(LearnEDAfunctions)
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Loading required package: ggplot2
LearnEDAfunctions::boston.marathon.wtimes
##    year minutes
## 1  1897     175
## 2  1898     162
## 3  1899     174
## 4  1900     159
## 5  1901     149
## 6  1902     163
## 7  1903     161
## 8  1904     158
## 9  1905     158
## 10 1906     165
## 11 1907     144
## 12 1908     145
## 13 1909     173
## 14 1910     148
## 15 1911     141
## 16 1912     141
## 17 1913     145
## 18 1914     145
## 19 1915     151
## 20 1916     147
## 21 1917     148
## 22 1919     149
## 23 1920     149
## 24 1921     138
## 25 1922     138
## 26 1923     143
## 27 1924     149
## 28 1925     153
## 29 1926     145
## 30 1927     160
## 31 1928     157
## 32 1929     153
## 33 1930     154
## 34 1931     166
## 35 1932     153
## 36 1933     151
## 37 1934     152
## 38 1935     152
## 39 1936     153
## 40 1937     153
## 41 1938     155
## 42 1939     148
## 43 1940     148
## 44 1941     150
## 45 1942     146
## 46 1943     148
## 47 1944     151
## 48 1945     150
## 49 1946     149
## 50 1947     145
## 51 1948     151
## 52 1949     151
## 53 1950     152
## 54 1951     147
## 55 1952     151
## 56 1953     138
## 57 1954     140
## 58 1955     138
## 59 1956     134
## 60 1957     140
## 61 1958     145
## 62 1959     142
## 63 1960     140
## 64 1961     143
## 65 1962     143
## 66 1963     138
## 67 1964     139
## 68 1965     136
## 69 1966     137
## 70 1967     135
## 71 1968     142
## 72 1969     133
## 73 1970     130
## 74 1971     138
## 75 1972     135
## 76 1973     136
## 77 1974     133
## 78 1975     129
## 79 1976     140
## 80 1977     134
## 81 1978     130
## 82 1979     129
## 83 1980     132
## 84 1981     129
## 85 1982     128
## 86 1983     129
## 87 1984     130
## 88 1985     134
## 89 1986     127
## 90 1987     131
## 91 1988     128
## 92 1989     129
## 93 1990     128
## 94 1991     131
## 95 1992     128
## 96 1993     129
## 97 1994     127
## 98 1995     129
## 99 1996     129
head(boston.marathon.wtimes)
##   year minutes
## 1 1897     175
## 2 1898     162
## 3 1899     174
## 4 1900     159
## 5 1901     149
## 6 1902     163
ggplot(boston.marathon.wtimes,
        aes(year, minutes))+
  geom_point()

slice(boston.marathon.wtimes, 1:10)
##    year minutes
## 1  1897     175
## 2  1898     162
## 3  1899     174
## 4  1900     159
## 5  1901     149
## 6  1902     163
## 7  1903     161
## 8  1904     158
## 9  1905     158
## 10 1906     165
Smooth3 <- c(NA, 175, 162, 174, 159, 149, 163, 161, 158, 158, 165, NA)
cbind(boston.marathon.wtimes[1:12, ], Smooth3)
##    year minutes Smooth3
## 1  1897     175      NA
## 2  1898     162     175
## 3  1899     174     162
## 4  1900     159     174
## 5  1901     149     159
## 6  1902     163     149
## 7  1903     161     163
## 8  1904     158     161
## 9  1905     158     158
## 10 1906     165     158
## 11 1907     144     165
## 12 1908     145      NA
boston.marathon.wtimes <- mutate(boston.marathon.wtimes,
                                 smooth.3R=as.vector(smooth(minutes, kind="3R")))
ggplot(boston.marathon.wtimes,
       aes(year, minutes))+
  geom_point()+
  geom_line(aes(year, smooth.3R), color="red")

Generally, the winning times for the Boston Marathon decrease over the years. After about 1980, times begin to level off.

boston.marathon.wtimes <- boston.marathon.wtimes %>%
  mutate(smooth.3RSS = as.vector(smooth(minutes, kind = "3RSS")))

ggplot(boston.marathon.wtimes, aes(year, minutes)) +
  geom_point() +
  geom_line(aes(y = smooth.3RSS), color = "blue")+
  geom_line(aes(y = smooth.3R), color = "red") 

boston.marathon.wtimes <- mutate(boston.marathon.wtimes,
                                 smooth.3RSSH=han(as.vector(smooth(minutes,
                                                                   kind="3RSS"))))
ggplot(boston.marathon.wtimes,
       aes(year, minutes))+
  geom_point()+
  geom_line(aes(year, smooth.3RSSH), color="green")

boston.marathon.wtimes <- mutate(boston.marathon.wtimes,
                                 Rough= minutes -smooth.3RSS)
slice(boston.marathon.wtimes, 1:10)
##    year minutes smooth.3R smooth.3RSS smooth.3RSSH Rough
## 1  1897     175       175         175       175.00     0
## 2  1898     162       174         174       171.25   -12
## 3  1899     174       162         162       164.25    12
## 4  1900     159       159         159       160.25     0
## 5  1901     149       159         161       160.00   -12
## 6  1902     163       161         159       159.25     4
## 7  1903     161       161         158       158.25     3
## 8  1904     158       158         158       158.00     0
## 9  1905     158       158         158       158.00     0
## 10 1906     165       158         158       158.00     7
options(width=60)
boston.marathon.wtimes$Rough
##  [1]   0 -12  12   0 -12   4   3   0   0   7 -14  -3  28   7
## [15]  -7  -4   0   0   4  -1   0   1  11 -11   0   0   0   4
## [29]  -8   3   3  -1   0  12   0  -1   0   0   0   0   2   0
## [43]   0   2  -2   0   3   1  -1  -6   0   0   1  -4   4  -2
## [57]   2   0  -4   0   3   0  -2   1   2  -1   1  -1   0  -1
## [71]   7   0  -5   3   0   1  -2  -5   7   4   0  -1   3   0
## [85]  -1   0   0   4  -3   2  -1   0  -1   2  -1   0  -2   0
## [99]   0
boston.marathon.wtimes <- boston.marathon.wtimes %>%
  mutate(smooth.3RS3R.twice = as.vector(smooth(minutes, kind = "3RS3R")))

ggplot(boston.marathon.wtimes, aes(x = year, y = minutes)) +
  geom_point() +
  geom_line(aes(y = smooth.3RS3R.twice), color = "black")

boston.marathon.wtimes <- mutate(boston.marathon.wtimes,
                                 FinalRough=minutes-smooth.3RS3R.twice)
ggplot(boston.marathon.wtimes,
       aes(year, FinalRough))+
  geom_point()+
  geom_hline(yintercept=0, color="blue")

boston.marathon.wtimes <- boston.marathon.wtimes %>%
  mutate(size = abs(FinalRough))

stem(boston.marathon.wtimes$size,
     scale = 2)
## 
##   The decimal point is at the |
## 
##    0 | 000000000000000000000000000000000000000000000000000
##    2 | 0000000000000000000000
##    4 | 00000000000000
##    6 | 00000
##    8 | 0
##   10 | 0
##   12 | 000
##   14 | 0
##   16 | 
##   18 | 
##   20 | 
##   22 | 
##   24 | 
##   26 | 
##   28 | 0

This stemplot shows that most residuals are typically between 2 and 10 minutes. Only a couple of years may considered outliers.