Instructor: Dr. Bob Batzinger
Academic year: 2021/2022
Semester: 1
Begins June 2021
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.2 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## lastname = col_character(),
## fname = col_character(),
## age = col_double(),
## status = col_character(),
## section = col_character()
## )
## # A tibble: 5 x 5
## lastname fname age status section
## <chr> <chr> <dbl> <chr> <chr>
## 1 Abbing Mr. Anthony 40 pass thirdClass
## 2 Abbott Mr. Ernest Owen 21 staff Victualling
## 3 Abbott Mr. Eugene Joseph 14 pass thirdClass
## 4 Abbott Mr. Rossmore Edward 16 pass thirdClass
## 5 Abbott* Mrs. Rhoda Mary 39 pass thirdClass
c = b + geom_dotplot(aes(x=status,y=age,color=status),binaxis='y', stackdir='center',dotsize=0.12)
c
## Bin width defaults to 1/30 of the range of the data. Pick better value with `binwidth`.
f = e +labs(title="Age distribution",
caption="\nFig. 1. Ages of staff and passengers of the RMS Titanic" )
f
titpass %>% ggplot() + geom_violin(mapping=aes(x=status,y=age,fill=status)) +
coord_flip() +
labs(title="Age distribution",
caption="\nFig. 1. Ages of staff and passengers of the RMS Titanic") +
annotate(geom="text",y=c(31,23),x=c("staff","pass"), label = c("S","P"))
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
## Bin width defaults to 1/30 of the range of the data. Pick better value with `binwidth`.
grid.arrange(a+labs(title="Step 1"), b+labs(title="Step 2"),
c+labs(title="Step 3"), d+labs(title="Step 4"),
ncol = 2, nrow = 2,top = "Building graphics incrementally",
bottom = "Fig. 2: Development of ggplot graphic")
## Bin width defaults to 1/30 of the range of the data. Pick better value with `binwidth`.
diamonds %>% ggplot(aes(x=carat, y=price, color=clarity)) +
geom_smooth(method="loess")+facet_grid(rows=vars(cut))
## `geom_smooth()` using formula 'y ~ x'
##
## Call:
## lm(formula = price ~ carat + color + clarity + cut - 1, data = diamonds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16813.5 -680.4 -197.6 466.4 10394.9
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## carat 8886.129 12.034 738.437 <2e-16 ***
## colorD -2886.973 18.211 -158.526 <2e-16 ***
## colorE -3098.655 16.328 -189.776 <2e-16 ***
## colorF -3190.283 16.691 -191.138 <2e-16 ***
## colorG -3393.172 16.448 -206.299 <2e-16 ***
## colorH -3865.670 18.370 -210.438 <2e-16 ***
## colorI -4327.275 21.457 -201.670 <2e-16 ***
## colorJ -5212.195 26.973 -193.239 <2e-16 ***
## clarity.L 4217.535 30.831 136.794 <2e-16 ***
## clarity.Q -1832.406 28.827 -63.565 <2e-16 ***
## clarity.C 923.273 24.679 37.411 <2e-16 ***
## clarity^4 -361.995 19.739 -18.339 <2e-16 ***
## clarity^5 216.616 16.109 13.447 <2e-16 ***
## clarity^6 2.105 14.037 0.150 0.881
## clarity^7 110.340 12.383 8.910 <2e-16 ***
## cut.L 698.907 20.335 34.369 <2e-16 ***
## cut.Q -327.686 17.911 -18.295 <2e-16 ***
## cut.C 180.565 15.557 11.607 <2e-16 ***
## cut^4 -1.207 12.458 -0.097 0.923
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1157 on 53921 degrees of freedom
## Multiple R-squared: 0.9574, Adjusted R-squared: 0.9574
## F-statistic: 6.373e+04 on 19 and 53921 DF, p-value: < 2.2e-16
diamonds %>% ggplot(aes(x=table,y=carat,color=price),xlim=c(0,1000)) +
scale_colour_gradient(low = "red", high = "green",
space = "Lab",
na.value = "grey50",
guide = "colourbar",
aesthetics = "colour")+
geom_jitter(mapping=aes(x=table,y=carat,color=price),size=0.5,alpha=0.5) +
xlim(45,75)+
facet_grid(rows=vars(cut))
## Warning: Removed 5 rows containing missing values (geom_point).
```