suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(UsingR))
head(reaction.time)
## age gender control time
## 1 16-24 F T 1.360075
## 2 16-24 M T 1.467939
## 3 16-24 M T 1.512036
## 4 16-24 F T 1.390647
## 5 16-24 M T 1.384208
## 6 16-24 M C 1.393875
reaction.time %>%
group_by(age) %>%
summarise(mean = mean(time), sd = sd(time))
## # A tibble: 2 × 3
## age mean sd
## <fct> <dbl> <dbl>
## 1 16-24 1.38 0.0665
## 2 25+ 1.45 0.0888
ggplot(reaction.time, aes(x = age, y = time)) +
geom_boxplot()
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
str(mtcars)
## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
ggplot(mtcars, aes(mpg)) +
geom_histogram(binwidth = 3) +
xlab("Miles per gallon") +
ylab("Number of Cars")
mtcars$am <- as.factor(mtcars$am)
levels(mtcars$am) <-c("Automatic", "Manual")
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 Manual 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 Manual 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 Manual 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 Automatic 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 Automatic 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 Automatic 3 1
wilcox.test(mpg ~ am, data=mtcars)
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with ties
##
## Wilcoxon rank sum test with continuity correction
##
## data: mpg by am
## W = 42, p-value = 0.001871
## alternative hypothesis: true location shift is not equal to 0
head(trees)
## Girth Height Volume
## 1 8.3 70 10.3
## 2 8.6 65 10.3
## 3 8.8 63 10.2
## 4 10.5 72 16.4
## 5 10.7 81 18.8
## 6 10.8 83 19.7
str(trees)
## 'data.frame': 31 obs. of 3 variables:
## $ Girth : num 8.3 8.6 8.8 10.5 10.7 10.8 11 11 11.1 11.2 ...
## $ Height: num 70 65 63 72 81 83 66 75 80 75 ...
## $ Volume: num 10.3 10.3 10.2 16.4 18.8 19.7 15.6 18.2 22.6 19.9 ...
summary(trees)
## Girth Height Volume
## Min. : 8.30 Min. :63 Min. :10.20
## 1st Qu.:11.05 1st Qu.:72 1st Qu.:19.40
## Median :12.90 Median :76 Median :24.20
## Mean :13.25 Mean :76 Mean :30.17
## 3rd Qu.:15.25 3rd Qu.:80 3rd Qu.:37.30
## Max. :20.60 Max. :87 Max. :77.00
sp1 = ggplot(trees, aes(x = Girth, y = Volume)) +
geom_point(size = 3) +
xlab("Girth (inches)") +
ylab("Volume (Cubic ft.)")
sp1
sp2 = sp1 + geom_smooth(method=lm, color="red",se=FALSE)
sp2
## `geom_smooth()` using formula 'y ~ x'
SSR1 = sum(residuals(lm(Volume~Girth, data=trees))^2)
SSR1
## [1] 524.3025
Squared_Girth = (trees$Girth)^2
sp3 = ggplot(trees, aes(x = Squared_Girth, y = Volume)) +
geom_point(size = 3) +
xlab("Squared Girth (inches^2)") +
ylab("Volume (Cubic ft.)")
sp4 = sp3 + geom_smooth(method=lm, color="red", se=FALSE)
sp4
## `geom_smooth()` using formula 'y ~ x'
SSR2 = sum(residuals(lm(Volume~Squared_Girth, data=trees))^2)
SSR2
## [1] 329.3191
suppressPackageStartupMessages(library(HSAUR2))
head(USmelanoma)
## mortality latitude longitude ocean
## Alabama 219 33.0 87.0 yes
## Arizona 160 34.5 112.0 no
## Arkansas 170 35.0 92.5 no
## California 182 37.5 119.5 yes
## Colorado 149 39.0 105.5 no
## Connecticut 159 41.8 72.8 yes
str(USmelanoma)
## 'data.frame': 49 obs. of 4 variables:
## $ mortality: int 219 160 170 182 149 159 200 177 197 214 ...
## $ latitude : num 33 34.5 35 37.5 39 41.8 39 39 28 33 ...
## $ longitude: num 87 112 92.5 119.5 105.5 ...
## $ ocean : Factor w/ 2 levels "no","yes": 2 1 1 2 1 2 2 1 2 2 ...
sp5 = ggplot(USmelanoma, aes(x = latitude, y = mortality)) +
geom_point(size = 2) +
xlab("Latitude (Degree)") +
ylab("Mortality (per million)")
sp5
sp6 = sp5 + geom_smooth(method = lm, color = "red", se = FALSE)
sp6
## `geom_smooth()` using formula 'y ~ x'
model = lm(mortality ~ latitude, data = USmelanoma)
model
##
## Call:
## lm(formula = mortality ~ latitude, data = USmelanoma)
##
## Coefficients:
## (Intercept) latitude
## 389.189 -5.978
SSR3 = sum(residuals(lm(mortality ~latitude, data = USmelanoma))^2)
SSR3
## [1] 17173.07
r = cor(USmelanoma$mortality,USmelanoma$latitude)
r
## [1] -0.8245178
summary(model)
##
## Call:
## lm(formula = mortality ~ latitude, data = USmelanoma)
##
## Residuals:
## Min 1Q Median 3Q Max
## -38.972 -13.185 0.972 12.006 43.938
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 389.1894 23.8123 16.34 < 2e-16 ***
## latitude -5.9776 0.5984 -9.99 3.31e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19.12 on 47 degrees of freedom
## Multiple R-squared: 0.6798, Adjusted R-squared: 0.673
## F-statistic: 99.8 on 1 and 47 DF, p-value: 3.309e-13
suppressPackageStartupMessages(library(sm))
## Warning: package 'sm' was built under R version 4.2.2
sm.density(res, xlab = "Model Residuals", model = "Normal")
### We can see that the density of the residuals clearly falls within
the confidence envelope of a normal distribution, providing strong
evidence for thinking that the model’s assumption of normality is
correct.