Exercise #1

Using data from the \(\color{red}{\text{nasaweather}}\) package,
* Create a scatterplot between wind and pressure, with color being used to distinguish the type of storm.

#packageDescription("nasaweather")
#glimpse(storms)
g1 <- ggplot(storms, aes(wind,pressure)) + geom_point(aes(color = type))
g1

g2 <- ggplot(storms, aes(lat,long)) + geom_path(aes(color = name)) + facet_wrap(~year)
g2

Exercise #2

Using data from the \(\color{red}{\text{mlb_teams.csv}}\) file,
* Create an informative data graphic & summary that illustrates an interesting relationship within the MLB data.

glimpse(MLB_Data)
## Observations: 210
## Variables: 11
## $ yearID     <int> 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008…
## $ teamID     <fct> ARI, ATL, BAL, BOS, CHA, CHN, CIN, CLE, COL, DET, FLO, HOU…
## $ lgID       <fct> NL, NL, AL, AL, AL, NL, NL, AL, NL, AL, NL, NL, AL, AL, NL…
## $ W          <int> 82, 72, 68, 95, 89, 97, 74, 81, 74, 74, 84, 86, 75, 100, 8…
## $ L          <int> 80, 90, 93, 67, 74, 64, 88, 81, 88, 88, 77, 75, 87, 62, 78…
## $ WPct       <dbl> 0.5061728, 0.4444444, 0.4223602, 0.5864198, 0.5460123, 0.6…
## $ attendance <int> 2509924, 2532834, 1950075, 3048250, 2500648, 3300200, 2058…
## $ normAttend <dbl> 0.5838859, 0.5892155, 0.4536477, 0.7091172, 0.5817280, 0.7…
## $ payroll    <int> 66202712, 102365683, 67196246, 133390035, 121189332, 11834…
## $ metroPop   <int> 4489109, 5614323, 2785874, 4732161, 9554598, 9554598, 2149…
## $ name       <fct> Arizona Diamondbacks, Atlanta Braves, Baltimore Orioles, B…
g3 <- ggplot(MLB_Data, aes(WPct,normAttend)) + geom_point(aes(color = lgID)) + geom_smooth(method = lm) + facet_grid(cols = vars(teamID))
g3

# create linear model
lm_fit <- lm(normAttend ~ WPct, data=MLB_Data)
summary(lm_fit)
## 
## Call:
## lm(formula = normAttend ~ WPct, data = MLB_Data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.28881 -0.09939 -0.01399  0.10299  0.37451 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.08979    0.07135   1.258     0.21    
## WPct         0.97510    0.14138   6.897 6.27e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.14 on 208 degrees of freedom
## Multiple R-squared:  0.1861, Adjusted R-squared:  0.1822 
## F-statistic: 47.57 on 1 and 208 DF,  p-value: 6.267e-11

Exercise #3


Graph the functions and find the area between \(y = x^2\) and \(y = \sqrt x\): using the integrate function.

fun1 <- function(x) sqrt(x)
fun2 <- function(x) x^2
fun <- function(x) sqrt(x) - x^2
g4 <- ggplot(data.frame(x = c(0, 1)), aes(x = x)) + stat_function(fun = fun1) + stat_function(fun = fun2)
g4

integrate(fun,0,1)
## 0.3333334 with absolute error < 7.8e-05

Exercise #4

Find the area between \(y = x^2\) and \(y = \sqrt x\): with a simulation - Think Dartboard!

darts <- 100000
x <- runif(darts)
y<- runif(darts)
z<- ifelse(y>x^2 & y<sqrt(x),1,0)
t <- tibble(x,y,z)
g5 <- ggplot(t, aes(x,y)) + geom_point(aes(color = z))
area <- sum(z)/darts
g5

area
## [1] 0.33264
darts <- 100000
x <- runif(darts)*6-3
y<- runif(darts)*.4
z<- ifelse(y>-0 & y<dnorm(x,0,1),-3,3)
t <- tibble(x,y,z)
g6 <- ggplot(t, aes(x,y)) + geom_point(aes(color = z))
area <- sum(z)/darts
g6

area
## [1] 0.513

Exercise #5

Compare & Contrast the ages of the Academy Award Winning Actors & Actresses.

#glimpse(Oscars)
g6 <- ggplot(Oscars, aes(Age, fill = Gender)) + geom_histogram(binwidth = 2, color = "Black")
g6

s <- Oscars %>%
  group_by(Gender) %>%
  summarise(mean = mean(Age),median = median(Age),sd = sd(Age),min = min(Age),max = max(Age))
s
## # A tibble: 2 x 6
##   Gender  mean median    sd   min   max
##   <fct>  <dbl>  <dbl> <dbl> <int> <int>
## 1 F       36.2     33 11.8     21    81
## 2 M       43.7     42  8.86    29    76
f <-s[[1,2]]
m <-s[[2,2]]
g7 <- ggplot(Oscars, aes(Year,Age, color = Gender)) + geom_point() + geom_hline(yintercept=f, linetype="dashed", color = "red") + geom_hline(yintercept=m, linetype="dashed", color = "lightblue")
g7