library(LearnEDAfunctions)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Loading required package: ggplot2
LearnEDAfunctions::boston.marathon.wtimes
## year minutes
## 1 1897 175
## 2 1898 162
## 3 1899 174
## 4 1900 159
## 5 1901 149
## 6 1902 163
## 7 1903 161
## 8 1904 158
## 9 1905 158
## 10 1906 165
## 11 1907 144
## 12 1908 145
## 13 1909 173
## 14 1910 148
## 15 1911 141
## 16 1912 141
## 17 1913 145
## 18 1914 145
## 19 1915 151
## 20 1916 147
## 21 1917 148
## 22 1919 149
## 23 1920 149
## 24 1921 138
## 25 1922 138
## 26 1923 143
## 27 1924 149
## 28 1925 153
## 29 1926 145
## 30 1927 160
## 31 1928 157
## 32 1929 153
## 33 1930 154
## 34 1931 166
## 35 1932 153
## 36 1933 151
## 37 1934 152
## 38 1935 152
## 39 1936 153
## 40 1937 153
## 41 1938 155
## 42 1939 148
## 43 1940 148
## 44 1941 150
## 45 1942 146
## 46 1943 148
## 47 1944 151
## 48 1945 150
## 49 1946 149
## 50 1947 145
## 51 1948 151
## 52 1949 151
## 53 1950 152
## 54 1951 147
## 55 1952 151
## 56 1953 138
## 57 1954 140
## 58 1955 138
## 59 1956 134
## 60 1957 140
## 61 1958 145
## 62 1959 142
## 63 1960 140
## 64 1961 143
## 65 1962 143
## 66 1963 138
## 67 1964 139
## 68 1965 136
## 69 1966 137
## 70 1967 135
## 71 1968 142
## 72 1969 133
## 73 1970 130
## 74 1971 138
## 75 1972 135
## 76 1973 136
## 77 1974 133
## 78 1975 129
## 79 1976 140
## 80 1977 134
## 81 1978 130
## 82 1979 129
## 83 1980 132
## 84 1981 129
## 85 1982 128
## 86 1983 129
## 87 1984 130
## 88 1985 134
## 89 1986 127
## 90 1987 131
## 91 1988 128
## 92 1989 129
## 93 1990 128
## 94 1991 131
## 95 1992 128
## 96 1993 129
## 97 1994 127
## 98 1995 129
## 99 1996 129
head(boston.marathon.wtimes)
## year minutes
## 1 1897 175
## 2 1898 162
## 3 1899 174
## 4 1900 159
## 5 1901 149
## 6 1902 163
ggplot(boston.marathon.wtimes,
aes(year, minutes))+
geom_point()
slice(boston.marathon.wtimes, 1:10)
## year minutes
## 1 1897 175
## 2 1898 162
## 3 1899 174
## 4 1900 159
## 5 1901 149
## 6 1902 163
## 7 1903 161
## 8 1904 158
## 9 1905 158
## 10 1906 165
Smooth3 <- c(NA, 175, 162, 174, 159, 149, 163, 161, 158, 158, 165, NA)
cbind(boston.marathon.wtimes[1:12, ], Smooth3)
## year minutes Smooth3
## 1 1897 175 NA
## 2 1898 162 175
## 3 1899 174 162
## 4 1900 159 174
## 5 1901 149 159
## 6 1902 163 149
## 7 1903 161 163
## 8 1904 158 161
## 9 1905 158 158
## 10 1906 165 158
## 11 1907 144 165
## 12 1908 145 NA
boston.marathon.wtimes <- mutate(boston.marathon.wtimes,
smooth.3R=as.vector(smooth(minutes, kind="3R")))
ggplot(boston.marathon.wtimes,
aes(year, minutes))+
geom_point()+
geom_line(aes(year, smooth.3R), color="red")
Generally, the winning times for the Boston Marathon decrease over the
years. After about 1980, times begin to level off.
boston.marathon.wtimes <- boston.marathon.wtimes %>%
mutate(smooth.3RSS = as.vector(smooth(minutes, kind = "3RSS")))
ggplot(boston.marathon.wtimes, aes(year, minutes)) +
geom_point() +
geom_line(aes(y = smooth.3RSS), color = "blue")+
geom_line(aes(y = smooth.3R), color = "red")
boston.marathon.wtimes <- mutate(boston.marathon.wtimes,
smooth.3RSSH=han(as.vector(smooth(minutes,
kind="3RSS"))))
ggplot(boston.marathon.wtimes,
aes(year, minutes))+
geom_point()+
geom_line(aes(year, smooth.3RSSH), color="green")
boston.marathon.wtimes <- mutate(boston.marathon.wtimes,
Rough= minutes -smooth.3RSS)
slice(boston.marathon.wtimes, 1:10)
## year minutes smooth.3R smooth.3RSS smooth.3RSSH Rough
## 1 1897 175 175 175 175.00 0
## 2 1898 162 174 174 171.25 -12
## 3 1899 174 162 162 164.25 12
## 4 1900 159 159 159 160.25 0
## 5 1901 149 159 161 160.00 -12
## 6 1902 163 161 159 159.25 4
## 7 1903 161 161 158 158.25 3
## 8 1904 158 158 158 158.00 0
## 9 1905 158 158 158 158.00 0
## 10 1906 165 158 158 158.00 7
options(width=60)
boston.marathon.wtimes$Rough
## [1] 0 -12 12 0 -12 4 3 0 0 7 -14 -3 28 7
## [15] -7 -4 0 0 4 -1 0 1 11 -11 0 0 0 4
## [29] -8 3 3 -1 0 12 0 -1 0 0 0 0 2 0
## [43] 0 2 -2 0 3 1 -1 -6 0 0 1 -4 4 -2
## [57] 2 0 -4 0 3 0 -2 1 2 -1 1 -1 0 -1
## [71] 7 0 -5 3 0 1 -2 -5 7 4 0 -1 3 0
## [85] -1 0 0 4 -3 2 -1 0 -1 2 -1 0 -2 0
## [99] 0
boston.marathon.wtimes <- boston.marathon.wtimes %>%
mutate(smooth.3RS3R.twice = as.vector(smooth(minutes, kind = "3RS3R")))
ggplot(boston.marathon.wtimes, aes(x = year, y = minutes)) +
geom_point() +
geom_line(aes(y = smooth.3RS3R.twice), color = "black")
boston.marathon.wtimes <- mutate(boston.marathon.wtimes,
FinalRough=minutes-smooth.3RS3R.twice)
ggplot(boston.marathon.wtimes,
aes(year, FinalRough))+
geom_point()+
geom_hline(yintercept=0, color="blue")
boston.marathon.wtimes <- boston.marathon.wtimes %>%
mutate(size = abs(FinalRough))
stem(boston.marathon.wtimes$size,
scale = 2)
##
## The decimal point is at the |
##
## 0 | 000000000000000000000000000000000000000000000000000
## 2 | 0000000000000000000000
## 4 | 00000000000000
## 6 | 00000
## 8 | 0
## 10 | 0
## 12 | 000
## 14 | 0
## 16 |
## 18 |
## 20 |
## 22 |
## 24 |
## 26 |
## 28 | 0
This stemplot shows that most residuals are typically between 2 and 10 minutes. Only a couple of years may considered outliers.