library(tidyverse)
## -- Attaching packages ---------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.0.0     v purrr   0.2.5
## v tibble  1.4.2     v dplyr   0.7.6
## v tidyr   0.8.1     v stringr 1.3.1
## v readr   1.1.1     v forcats 0.3.0
## -- Conflicts ------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library("ggplot2")

I did a lot of the homework but not all of the homework.

Question 1

SimpleR 4.1

q1 = c(3, 3, 3, 4, 3, 4, 3, 4, 3, 4)
q2 = c(5, 2, 5, 5, 2, 2, 5, 5, 4, 2)
table(q1,q2)
##    q2
## q1  2 4 5
##   3 2 1 3
##   4 2 0 2

Question 2

SimpleR 4.2

library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
data(UScereal)
attach(UScereal)
names(UScereal)
##  [1] "mfr"       "calories"  "protein"   "fat"       "sodium"   
##  [6] "fibre"     "carbo"     "sugars"    "shelf"     "potassium"
## [11] "vitamins"

Question 3

SimpleR 4.9

data(mtcars)
names(mtcars)
##  [1] "mpg"  "cyl"  "disp" "hp"   "drat" "wt"   "qsec" "vs"   "am"   "gear"
## [11] "carb"

The variable names are mpg, cyl, disp, hp, drat, wt, qsec, vs, am, gear, and carb

Question 4

R for Data Science 3.3.1 Exercise 2

ggplot2::mpg
## # A tibble: 234 x 11
##    manufacturer model displ  year   cyl trans drv     cty   hwy fl    cla~
##    <chr>        <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <ch>
##  1 audi         a4      1.8  1999     4 auto~ f        18    29 p     com~
##  2 audi         a4      1.8  1999     4 manu~ f        21    29 p     com~
##  3 audi         a4      2    2008     4 manu~ f        20    31 p     com~
##  4 audi         a4      2    2008     4 auto~ f        21    30 p     com~
##  5 audi         a4      2.8  1999     6 auto~ f        16    26 p     com~
##  6 audi         a4      2.8  1999     6 manu~ f        18    26 p     com~
##  7 audi         a4      3.1  2008     6 auto~ f        18    27 p     com~
##  8 audi         a4 q~   1.8  1999     4 manu~ 4        18    26 p     com~
##  9 audi         a4 q~   1.8  1999     4 auto~ 4        16    25 p     com~
## 10 audi         a4 q~   2    2008     4 manu~ 4        20    28 p     com~
## # ... with 224 more rows
str(mpg)
## Classes 'tbl_df', 'tbl' and 'data.frame':    234 obs. of  11 variables:
##  $ manufacturer: chr  "audi" "audi" "audi" "audi" ...
##  $ model       : chr  "a4" "a4" "a4" "a4" ...
##  $ displ       : num  1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
##  $ year        : int  1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
##  $ cyl         : int  4 4 4 4 6 6 6 4 4 4 ...
##  $ trans       : chr  "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
##  $ drv         : chr  "f" "f" "f" "f" ...
##  $ cty         : int  18 21 20 21 16 18 18 18 16 20 ...
##  $ hwy         : int  29 29 31 30 26 26 27 26 25 28 ...
##  $ fl          : chr  "p" "p" "p" "p" ...
##  $ class       : chr  "compact" "compact" "compact" "compact" ...

Categorical: model, year, cyl, trans, drv, fl, class

Continuous: displ, cty, hwy

Question 5

R for Data Science 3.5.1 Exercise 3

ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy)) +
  facet_grid(drv ~ .)

ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy)) +
  facet_grid(. ~ cyl)

The . indicates if the 3rd variable (drv or cyl should make horizontal or vertical graphs)

Question 6

R for Data Science 3.6.1 Exercise 2

ggplot(data = mpg, mapping = aes(x = displ, y = hwy, color = drv)) + 
  geom_point() + 
  geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

This is about what I expected but now I better understand color. I was thinking more about specifying which color the graph would be (“blue) as opposed to picking a variable as color and having the program select the colors.

Question 7

R for Data Science 3.7.1 Exercise 1

The default geom associated with stat_summary() is geom_linerange but I could not graph it. I tried a few different ways. Below is one of them. I kept getting an error about 1d atomic vectors.

ggplot(data = diamonds, mapping = aes(x = cut, y = depth, fun.ymin = min, fun.ymax = max, fun.y = median)) + geom_linerange() + geom_point()

Question 8

R for Data Science 3.8.1 Exercise 1

ggplot(data = mpg, mapping = aes(x = cty, y = hwy)) + 
  geom_point()

ggplot(data = mpg) + 
  geom_point(mapping = aes(x = cty, y = hwy), position = "jitter")

By adding jitter, we can see a lot more datapoints giving us a better understanding of the data.

Question 9

R for Data Science 3.9.1 Exercise 4

ggplot(data = mpg, mapping = aes(x = cty, y = hwy)) +
  geom_point() + 
  geom_abline() +
  coord_fixed()

There’s a direct relationship between cars’ city mpg and hwy mpg. The higher the city mpg, the higher the highway mpg probably is.

coord(fixed) makes sure that the units of one variable are equivalent to the other so they can be properly compared.

geom_abline() adds in the straight line in the above graph.

Question 10

SQL Functions

id=c(1,2,3,4,5) 
age=c(31,42,51,55,70) 
gender=c(0,0,1,1,1) 
mydata1=data.frame(cbind(id,age)) 
colnames(mydata1)=c("id", "age") 
mydata2=data.frame(cbind(id,gender)) 
colnames(mydata1)=c("id", "gender")
mydata3 <- merge(mydata1, mydata2, all = TRUE)
mydata3
##    id gender
## 1   1      0
## 2   1     31
## 3   2      0
## 4   2     42
## 5   3      1
## 6   3     51
## 7   4      1
## 8   4     55
## 9   5      1
## 10  5     70

I could not figure out how to load sqlr or sqldf and I am struggling with the merge line.