hw1 <- tbl_df(faithful)
hw1
# A tibble: 272 x 2
eruptions waiting
* <dbl> <dbl>
1 3.600 79
2 1.800 54
3 3.333 74
4 2.283 62
5 4.533 85
6 2.883 55
7 4.700 88
8 3.600 85
9 1.950 51
10 4.350 85
# ... with 262 more rows
ggplot(hw1, aes(x=waiting)) + geom_histogram(bins = 30, col = "yellow", fill = "blue") + labs(title = "Waiting Times Between Eruptions for Old Faithful Geyser", x = "Waiting Times in Minutes")
psych::describe(hw1 %>% select(waiting))
vars n mean sd median trimmed mad min max range skew
waiting 1 272 70.9 13.59 76 71.5 11.86 43 96 53 -0.41
kurtosis se
waiting -1.16 0.82
hw1 %>%
summarize(IQR(waiting), quantile(waiting, 0.25), quantile(waiting, 0.75))
# A tibble: 1 x 3
`IQR(waiting)` `quantile(waiting, 0.25)` `quantile(waiting, 0.75)`
<dbl> <dbl> <dbl>
1 24 58 82
hw1 %>%
select(waiting) %>%
summarize_all(sd)
# A tibble: 1 x 1
waiting
<dbl>
1 13.59497
ggplot(hw1, aes(x = 1, y = waiting)) +
geom_boxplot(fill = "yellow") +
coord_flip() +
labs(title = "Boxplot of Waiting Times for Old Faithful",
y = "Waiting Times",
x = "") +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank())
ggplot(hw1, aes(sample = waiting)) +
geom_point(stat="qq") +
theme_bw() # eliminate the gray background
ggplot(hw1, aes(x=eruptions, y=waiting)) + geom_point() + labs(title = "Waiting Times vs. Eruption Duration in the Old Faithful Data")
##Question 9 ##==========
cor(hw1$waiting, hw1$eruptions)
[1] 0.9008112
ggplot(hw1, aes(x = eruptions, y = waiting)) +
geom_point(size = 3) +
geom_smooth(method = "lm") +
labs(title = "Waiting Times by Eruption Duration",
x = "Eruption Duration", y = "Waiting Times")
summary(lm(waiting ~ eruptions, data = hw1))
Call:
lm(formula = waiting ~ eruptions, data = hw1)
Residuals:
Min 1Q Median 3Q Max
-12.0796 -4.4831 0.2122 3.9246 15.9719
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 33.4744 1.1549 28.98 <2e-16 ***
eruptions 10.7296 0.3148 34.09 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 5.914 on 270 degrees of freedom
Multiple R-squared: 0.8115, Adjusted R-squared: 0.8108
F-statistic: 1162 on 1 and 270 DF, p-value: < 2.2e-16
hw1extra <- tbl_df(MASS::geyser)
hw1extra
# A tibble: 299 x 2
waiting duration
* <dbl> <dbl>
1 80 4.016667
2 71 2.150000
3 57 4.000000
4 80 4.000000
5 75 4.000000
6 77 2.000000
7 60 4.383333
8 86 4.283333
9 77 2.033333
10 56 4.833333
# ... with 289 more rows
ggplot(hw1extra, aes(x=duration, y=waiting)) + geom_point() + labs(title = "Waiting Times vs. Eruption Duration in the Geyser Data")
cor(hw1extra$duration, hw1extra$waiting)
[1] -0.644623
ggplot(hw1extra, aes(x = waiting, y = duration)) +
geom_point(size = 3) +
geom_smooth(method = "lm") +
labs(title = "Eruption Duration by Waiting Times",
x = "Waiting Times", y = "Eruption Duration")
summary(lm(duration ~ waiting, data = hw1extra))
Call:
lm(formula = duration ~ waiting, data = hw1extra)
Residuals:
Min 1Q Median 3Q Max
-2.21805 -0.72357 -0.01979 0.75071 2.11109
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 7.313144 0.269935 27.09 <2e-16 ***
waiting -0.053272 0.003666 -14.53 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.879 on 297 degrees of freedom
Multiple R-squared: 0.4155, Adjusted R-squared: 0.4136
F-statistic: 211.2 on 1 and 297 DF, p-value: < 2.2e-16
```
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
speed dist
Min. : 4.0 Min. : 2.00
1st Qu.:12.0 1st Qu.: 26.00
Median :15.0 Median : 36.00
Mean :15.4 Mean : 42.98
3rd Qu.:19.0 3rd Qu.: 56.00
Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.