Using data from the \(\color{red}{\text{nasaweather}}\) package,
* Create a scatterplot between wind and pressure, with color being used to distinguish the type of storm.
#packageDescription("nasaweather")
#glimpse(storms)
g1 <- ggplot(storms, aes(wind,pressure)) + geom_point(aes(color = type))
g1
g2 <- ggplot(storms, aes(lat,long)) + geom_path(aes(color = name)) + facet_wrap(~year)
g2
Using data from the \(\color{red}{\text{mlb_teams.csv}}\) file,
* Create an informative data graphic & summary that illustrates an interesting relationship within the MLB data.
glimpse(MLB_Data)
## Observations: 210
## Variables: 11
## $ yearID <int> 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008…
## $ teamID <fct> ARI, ATL, BAL, BOS, CHA, CHN, CIN, CLE, COL, DET, FLO, HOU…
## $ lgID <fct> NL, NL, AL, AL, AL, NL, NL, AL, NL, AL, NL, NL, AL, AL, NL…
## $ W <int> 82, 72, 68, 95, 89, 97, 74, 81, 74, 74, 84, 86, 75, 100, 8…
## $ L <int> 80, 90, 93, 67, 74, 64, 88, 81, 88, 88, 77, 75, 87, 62, 78…
## $ WPct <dbl> 0.5061728, 0.4444444, 0.4223602, 0.5864198, 0.5460123, 0.6…
## $ attendance <int> 2509924, 2532834, 1950075, 3048250, 2500648, 3300200, 2058…
## $ normAttend <dbl> 0.5838859, 0.5892155, 0.4536477, 0.7091172, 0.5817280, 0.7…
## $ payroll <int> 66202712, 102365683, 67196246, 133390035, 121189332, 11834…
## $ metroPop <int> 4489109, 5614323, 2785874, 4732161, 9554598, 9554598, 2149…
## $ name <fct> Arizona Diamondbacks, Atlanta Braves, Baltimore Orioles, B…
g3 <- ggplot(MLB_Data, aes(WPct,normAttend)) + geom_point(aes(color = lgID)) + geom_smooth(method = lm) + facet_grid(cols = vars(teamID))
g3
# create linear model
lm_fit <- lm(normAttend ~ WPct, data=MLB_Data)
summary(lm_fit)
##
## Call:
## lm(formula = normAttend ~ WPct, data = MLB_Data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.28881 -0.09939 -0.01399 0.10299 0.37451
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.08979 0.07135 1.258 0.21
## WPct 0.97510 0.14138 6.897 6.27e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.14 on 208 degrees of freedom
## Multiple R-squared: 0.1861, Adjusted R-squared: 0.1822
## F-statistic: 47.57 on 1 and 208 DF, p-value: 6.267e-11
Graph the functions and find the area between \(y = x^2\) and \(y = \sqrt x\): using the integrate function.
fun1 <- function(x) sqrt(x)
fun2 <- function(x) x^2
fun <- function(x) sqrt(x) - x^2
g4 <- ggplot(data.frame(x = c(0, 1)), aes(x = x)) + stat_function(fun = fun1) + stat_function(fun = fun2)
g4
integrate(fun,0,1)
## 0.3333334 with absolute error < 7.8e-05
Find the area between \(y = x^2\) and \(y = \sqrt x\): with a simulation - Think Dartboard!
darts <- 100000
x <- runif(darts)
y<- runif(darts)
z<- ifelse(y>x^2 & y<sqrt(x),1,0)
t <- tibble(x,y,z)
g5 <- ggplot(t, aes(x,y)) + geom_point(aes(color = z))
area <- sum(z)/darts
g5
area
## [1] 0.33264
darts <- 100000
x <- runif(darts)*6-3
y<- runif(darts)*.4
z<- ifelse(y>-0 & y<dnorm(x,0,1),-3,3)
t <- tibble(x,y,z)
g6 <- ggplot(t, aes(x,y)) + geom_point(aes(color = z))
area <- sum(z)/darts
g6
area
## [1] 0.513
Compare & Contrast the ages of the Academy Award Winning Actors & Actresses.
#glimpse(Oscars)
g6 <- ggplot(Oscars, aes(Age, fill = Gender)) + geom_histogram(binwidth = 2, color = "Black")
g6
s <- Oscars %>%
group_by(Gender) %>%
summarise(mean = mean(Age),median = median(Age),sd = sd(Age),min = min(Age),max = max(Age))
s
## # A tibble: 2 x 6
## Gender mean median sd min max
## <fct> <dbl> <dbl> <dbl> <int> <int>
## 1 F 36.2 33 11.8 21 81
## 2 M 43.7 42 8.86 29 76
f <-s[[1,2]]
m <-s[[2,2]]
g7 <- ggplot(Oscars, aes(Year,Age, color = Gender)) + geom_point() + geom_hline(yintercept=f, linetype="dashed", color = "red") + geom_hline(yintercept=m, linetype="dashed", color = "lightblue")
g7