library(tidyverse)
library("ggplot2")
library(psych)
library(XML)
library(data.table)
library(rvest)
library(xml2)

Note: I struggled with question 1 but I completed the rest of the assignment.

Question 1

SimpleR 4.1

student = c(1, 2, 3, 4, 5, 6, 7, 8 ,9, 10)
q1 = c(3, 3, 3, 4, 3, 4, 3, 4, 3, 4)
q2 = c(5, 2, 5, 5, 2, 2, 5, 5, 4, 2)
q3 = c(1, 3, 1, 1, 1, 3, 1, 1, 1, 1)
table(student,q1,q2,q3)
## , , q2 = 2, q3 = 1
## 
##        q1
## student 3 4
##      1  0 0
##      2  0 0
##      3  0 0
##      4  0 0
##      5  1 0
##      6  0 0
##      7  0 0
##      8  0 0
##      9  0 0
##      10 0 1
## 
## , , q2 = 4, q3 = 1
## 
##        q1
## student 3 4
##      1  0 0
##      2  0 0
##      3  0 0
##      4  0 0
##      5  0 0
##      6  0 0
##      7  0 0
##      8  0 0
##      9  1 0
##      10 0 0
## 
## , , q2 = 5, q3 = 1
## 
##        q1
## student 3 4
##      1  1 0
##      2  0 0
##      3  1 0
##      4  0 1
##      5  0 0
##      6  0 0
##      7  1 0
##      8  0 1
##      9  0 0
##      10 0 0
## 
## , , q2 = 2, q3 = 3
## 
##        q1
## student 3 4
##      1  0 0
##      2  1 0
##      3  0 0
##      4  0 0
##      5  0 0
##      6  0 1
##      7  0 0
##      8  0 0
##      9  0 0
##      10 0 0
## 
## , , q2 = 4, q3 = 3
## 
##        q1
## student 3 4
##      1  0 0
##      2  0 0
##      3  0 0
##      4  0 0
##      5  0 0
##      6  0 0
##      7  0 0
##      8  0 0
##      9  0 0
##      10 0 0
## 
## , , q2 = 5, q3 = 3
## 
##        q1
## student 3 4
##      1  0 0
##      2  0 0
##      3  0 0
##      4  0 0
##      5  0 0
##      6  0 0
##      7  0 0
##      8  0 0
##      9  0 0
##      10 0 0

Question 2

SimpleR 4.2

library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
data(UScereal)
attach(UScereal)
names(UScereal)
##  [1] "mfr"       "calories"  "protein"   "fat"       "sodium"   
##  [6] "fibre"     "carbo"     "sugars"    "shelf"     "potassium"
## [11] "vitamins"
head(UScereal, 65)
##                                       mfr  calories    protein       fat
## 100% Bran                               N 212.12121 12.1212121 3.0303030
## All-Bran                                K 212.12121 12.1212121 3.0303030
## All-Bran with Extra Fiber               K 100.00000  8.0000000 0.0000000
## Apple Cinnamon Cheerios                 G 146.66667  2.6666667 2.6666667
## Apple Jacks                             K 110.00000  2.0000000 0.0000000
## Basic 4                                 G 173.33333  4.0000000 2.6666667
## Bran Chex                               R 134.32836  2.9850746 1.4925373
## Bran Flakes                             P 134.32836  4.4776119 0.0000000
## Cap'n'Crunch                            Q 160.00000  1.3333333 2.6666667
## Cheerios                                G  88.00000  4.8000000 1.6000000
## Cinnamon Toast Crunch                   G 160.00000  1.3333333 4.0000000
## Clusters                                G 220.00000  6.0000000 4.0000000
## Cocoa Puffs                             G 110.00000  1.0000000 1.0000000
## Corn Chex                               R 110.00000  2.0000000 0.0000000
## Corn Flakes                             K 100.00000  2.0000000 0.0000000
## Corn Pops                               K 110.00000  1.0000000 0.0000000
## Count Chocula                           G 110.00000  1.0000000 1.0000000
## Cracklin' Oat Bran                      K 220.00000  6.0000000 6.0000000
## Crispix                                 K 110.00000  2.0000000 0.0000000
## Crispy Wheat & Raisins                  G 133.33333  2.6666667 1.3333333
## Double Chex                             R 133.33333  2.6666667 0.0000000
## Froot Loops                             K 110.00000  2.0000000 1.0000000
## Frosted Flakes                          K 146.66667  1.3333333 0.0000000
## Frosted Mini-Wheats                     K 125.00000  3.7500000 0.0000000
## Fruit & Fibre: Dates Walnuts and Oats   P 179.10448  4.4776119 2.9850746
## Fruitful Bran                           K 179.10448  4.4776119 0.0000000
## Fruity Pebbles                          P 146.66667  1.3333333 1.3333333
## Golden Crisp                            P 113.63636  2.2727273 0.0000000
## Golden Grahams                          G 146.66667  1.3333333 1.3333333
## Grape Nuts Flakes                       P 113.63636  3.4090909 1.1363636
## Grape-Nuts                              P 440.00000 12.0000000 0.0000000
## Great Grains Pecan                      P 363.63636  9.0909091 9.0909091
## Honey Graham Ohs                        Q 120.00000  1.0000000 2.0000000
## Honey Nut Cheerios                      G 146.66667  4.0000000 1.3333333
## Honey-comb                              P  82.70677  0.7518797 0.0000000
## Just Right Fruit & Nut                  K 186.66667  4.0000000 1.3333333
## Kix                                     G  73.33333  1.3333333 0.6666667
## Life                                    Q 149.25373  5.9701493 2.9850746
## Lucky Charms                            G 110.00000  2.0000000 1.0000000
## Mueslix Crispy Blend                    K 238.80597  4.4776119 2.9850746
## Multi-Grain Cheerios                    G 100.00000  2.0000000 1.0000000
## Nut&Honey Crunch                        K 179.10448  2.9850746 1.4925373
## Nutri-Grain Almond-Raisin               K 208.95522  4.4776119 2.9850746
## Oatmeal Raisin Crisp                    G 260.00000  6.0000000 4.0000000
## Post Nat. Raisin Bran                   P 179.10448  4.4776119 1.4925373
## Product 19                              K 100.00000  3.0000000 0.0000000
## Puffed Rice                             Q  50.00000  1.0000000 0.0000000
## Quaker Oat Squares                      Q 200.00000  8.0000000 2.0000000
## Raisin Bran                             K 160.00000  4.0000000 1.3333333
## Raisin Nut Bran                         G 200.00000  6.0000000 4.0000000
## Raisin Squares                          K 180.00000  4.0000000 0.0000000
## Rice Chex                               R  97.34513  0.8849558 0.0000000
## Rice Krispies                           K 110.00000  2.0000000 0.0000000
## Shredded Wheat 'n'Bran                  N 134.32836  4.4776119 0.0000000
## Shredded Wheat spoon size               N 134.32836  4.4776119 0.0000000
## Smacks                                  K 146.66667  2.6666667 1.3333333
## Special K                               K 110.00000  6.0000000 0.0000000
## Total Corn Flakes                       G 110.00000  2.0000000 1.0000000
## Total Raisin Bran                       G 140.00000  3.0000000 1.0000000
## Total Whole Grain                       G 100.00000  3.0000000 1.0000000
## Triples                                 G 146.66667  2.6666667 1.3333333
## Trix                                    G 110.00000  1.0000000 1.0000000
## Wheat Chex                              R 149.25373  4.4776119 1.4925373
## Wheaties                                G 100.00000  3.0000000 1.0000000
## Wheaties Honey Gold                     G 146.66667  2.6666667 1.3333333
##                                          sodium     fibre    carbo
## 100% Bran                             393.93939 30.303030 15.15152
## All-Bran                              787.87879 27.272727 21.21212
## All-Bran with Extra Fiber             280.00000 28.000000 16.00000
## Apple Cinnamon Cheerios               240.00000  2.000000 14.00000
## Apple Jacks                           125.00000  1.000000 11.00000
## Basic 4                               280.00000  2.666667 24.00000
## Bran Chex                             298.50746  5.970149 22.38806
## Bran Flakes                           313.43284  7.462687 19.40299
## Cap'n'Crunch                          293.33333  0.000000 16.00000
## Cheerios                              232.00000  1.600000 13.60000
## Cinnamon Toast Crunch                 280.00000  0.000000 17.33333
## Clusters                              280.00000  4.000000 26.00000
## Cocoa Puffs                           180.00000  0.000000 12.00000
## Corn Chex                             280.00000  0.000000 22.00000
## Corn Flakes                           290.00000  1.000000 21.00000
## Corn Pops                              90.00000  1.000000 13.00000
## Count Chocula                         180.00000  0.000000 12.00000
## Cracklin' Oat Bran                    280.00000  8.000000 20.00000
## Crispix                               220.00000  1.000000 21.00000
## Crispy Wheat & Raisins                186.66667  2.666667 14.66667
## Double Chex                           253.33333  1.333333 24.00000
## Froot Loops                           125.00000  1.000000 11.00000
## Frosted Flakes                        266.66667  1.333333 18.66667
## Frosted Mini-Wheats                     0.00000  3.750000 17.50000
## Fruit & Fibre: Dates Walnuts and Oats 238.80597  7.462687 17.91045
## Fruitful Bran                         358.20896  7.462687 20.89552
## Fruity Pebbles                        180.00000  0.000000 17.33333
## Golden Crisp                           51.13636  0.000000 12.50000
## Golden Grahams                        373.33333  0.000000 20.00000
## Grape Nuts Flakes                     159.09091  3.409091 17.04545
## Grape-Nuts                            680.00000 12.000000 68.00000
## Great Grains Pecan                    227.27273  9.090909 39.39394
## Honey Graham Ohs                      220.00000  1.000000 12.00000
## Honey Nut Cheerios                    333.33333  2.000000 15.33333
## Honey-comb                            135.33835  0.000000 10.52632
## Just Right Fruit & Nut                226.66667  2.666667 26.66667
## Kix                                   173.33333  0.000000 14.00000
## Life                                  223.88060  2.985075 17.91045
## Lucky Charms                          180.00000  0.000000 12.00000
## Mueslix Crispy Blend                  223.88060  4.477612 25.37313
## Multi-Grain Cheerios                  220.00000  2.000000 15.00000
## Nut&Honey Crunch                      283.58209  0.000000 22.38806
## Nutri-Grain Almond-Raisin             328.35821  4.477612 31.34328
## Oatmeal Raisin Crisp                  340.00000  3.000000 27.00000
## Post Nat. Raisin Bran                 298.50746  8.955224 16.41791
## Product 19                            320.00000  1.000000 20.00000
## Puffed Rice                             0.00000  0.000000 13.00000
## Quaker Oat Squares                    270.00000  4.000000 28.00000
## Raisin Bran                           280.00000  6.666667 18.66667
## Raisin Nut Bran                       280.00000  5.000000 21.00000
## Raisin Squares                          0.00000  4.000000 30.00000
## Rice Chex                             212.38938  0.000000 20.35398
## Rice Krispies                         290.00000  0.000000 22.00000
## Shredded Wheat 'n'Bran                  0.00000  5.970149 28.35821
## Shredded Wheat spoon size               0.00000  4.477612 29.85075
## Smacks                                 93.33333  1.333333 12.00000
## Special K                             230.00000  1.000000 16.00000
## Total Corn Flakes                     200.00000  0.000000 21.00000
## Total Raisin Bran                     190.00000  4.000000 15.00000
## Total Whole Grain                     200.00000  3.000000 16.00000
## Triples                               333.33333  0.000000 28.00000
## Trix                                  140.00000  0.000000 13.00000
## Wheat Chex                            343.28358  4.477612 25.37313
## Wheaties                              200.00000  3.000000 17.00000
## Wheaties Honey Gold                   266.66667  1.333333 21.33333
##                                          sugars shelf potassium vitamins
## 100% Bran                             18.181818     3 848.48485 enriched
## All-Bran                              15.151515     3 969.69697 enriched
## All-Bran with Extra Fiber              0.000000     3 660.00000 enriched
## Apple Cinnamon Cheerios               13.333333     1  93.33333 enriched
## Apple Jacks                           14.000000     2  30.00000 enriched
## Basic 4                               10.666667     3 133.33333 enriched
## Bran Chex                              8.955224     1 186.56716 enriched
## Bran Flakes                            7.462687     3 283.58209 enriched
## Cap'n'Crunch                          16.000000     2  46.66667 enriched
## Cheerios                               0.800000     1  84.00000 enriched
## Cinnamon Toast Crunch                 12.000000     2  60.00000 enriched
## Clusters                              14.000000     3 210.00000 enriched
## Cocoa Puffs                           13.000000     2  55.00000 enriched
## Corn Chex                              3.000000     1  25.00000 enriched
## Corn Flakes                            2.000000     1  35.00000 enriched
## Corn Pops                             12.000000     2  20.00000 enriched
## Count Chocula                         13.000000     2  65.00000 enriched
## Cracklin' Oat Bran                    14.000000     3 320.00000 enriched
## Crispix                                3.000000     3  30.00000 enriched
## Crispy Wheat & Raisins                13.333333     3 160.00000 enriched
## Double Chex                            6.666667     3 106.66667 enriched
## Froot Loops                           13.000000     2  30.00000 enriched
## Frosted Flakes                        14.666667     1  33.33333 enriched
## Frosted Mini-Wheats                    8.750000     2 125.00000 enriched
## Fruit & Fibre: Dates Walnuts and Oats 14.925373     3 298.50746 enriched
## Fruitful Bran                         17.910448     3 283.58209 enriched
## Fruity Pebbles                        16.000000     2  33.33333 enriched
## Golden Crisp                          17.045455     1  45.45455 enriched
## Golden Grahams                        12.000000     2  60.00000 enriched
## Grape Nuts Flakes                      5.681818     3  96.59091 enriched
## Grape-Nuts                            12.000000     3 360.00000 enriched
## Great Grains Pecan                    12.121212     3 303.03030 enriched
## Honey Graham Ohs                      11.000000     2  45.00000 enriched
## Honey Nut Cheerios                    13.333333     1 120.00000 enriched
## Honey-comb                             8.270677     1  26.31579 enriched
## Just Right Fruit & Nut                12.000000     3 126.66667     100%
## Kix                                    2.000000     2  26.66667 enriched
## Life                                   8.955224     2 141.79104 enriched
## Lucky Charms                          12.000000     2  55.00000 enriched
## Mueslix Crispy Blend                  19.402985     3 238.80597 enriched
## Multi-Grain Cheerios                   6.000000     1  90.00000 enriched
## Nut&Honey Crunch                      13.432836     2  59.70149 enriched
## Nutri-Grain Almond-Raisin             10.447761     3 194.02985 enriched
## Oatmeal Raisin Crisp                  20.000000     3 240.00000 enriched
## Post Nat. Raisin Bran                 20.895522     3 388.05970 enriched
## Product 19                             3.000000     3  45.00000     100%
## Puffed Rice                            0.000000     3  15.00000     none
## Quaker Oat Squares                    12.000000     3 220.00000 enriched
## Raisin Bran                           16.000000     2 320.00000 enriched
## Raisin Nut Bran                       16.000000     3 280.00000 enriched
## Raisin Squares                        12.000000     3 220.00000 enriched
## Rice Chex                              1.769912     1  26.54867 enriched
## Rice Krispies                          3.000000     1  35.00000 enriched
## Shredded Wheat 'n'Bran                 0.000000     1 208.95522     none
## Shredded Wheat spoon size              0.000000     1 179.10448     none
## Smacks                                20.000000     2  53.33333 enriched
## Special K                              3.000000     1  55.00000 enriched
## Total Corn Flakes                      3.000000     3  35.00000     100%
## Total Raisin Bran                     14.000000     3 230.00000     100%
## Total Whole Grain                      3.000000     3 110.00000     100%
## Triples                                4.000000     3  80.00000 enriched
## Trix                                  12.000000     2  25.00000 enriched
## Wheat Chex                             4.477612     1 171.64179 enriched
## Wheaties                               3.000000     1 110.00000 enriched
## Wheaties Honey Gold                   10.666667     1  80.00000 enriched
summary(UScereal)
##  mfr       calories        protein             fat            sodium     
##  G:22   Min.   : 50.0   Min.   : 0.7519   Min.   :0.000   Min.   :  0.0  
##  K:21   1st Qu.:110.0   1st Qu.: 2.0000   1st Qu.:0.000   1st Qu.:180.0  
##  N: 3   Median :134.3   Median : 3.0000   Median :1.000   Median :232.0  
##  P: 9   Mean   :149.4   Mean   : 3.6837   Mean   :1.423   Mean   :237.8  
##  Q: 5   3rd Qu.:179.1   3rd Qu.: 4.4776   3rd Qu.:2.000   3rd Qu.:290.0  
##  R: 5   Max.   :440.0   Max.   :12.1212   Max.   :9.091   Max.   :787.9  
##      fibre            carbo           sugars          shelf      
##  Min.   : 0.000   Min.   :10.53   Min.   : 0.00   Min.   :1.000  
##  1st Qu.: 0.000   1st Qu.:15.00   1st Qu.: 4.00   1st Qu.:1.000  
##  Median : 2.000   Median :18.67   Median :12.00   Median :2.000  
##  Mean   : 3.871   Mean   :19.97   Mean   :10.05   Mean   :2.169  
##  3rd Qu.: 4.478   3rd Qu.:22.39   3rd Qu.:14.00   3rd Qu.:3.000  
##  Max.   :30.303   Max.   :68.00   Max.   :20.90   Max.   :3.000  
##    potassium          vitamins 
##  Min.   : 15.00   100%    : 5  
##  1st Qu.: 45.00   enriched:57  
##  Median : 96.59   none    : 3  
##  Mean   :159.12                
##  3rd Qu.:220.00                
##  Max.   :969.70
ggplot(data = UScereal) + 
  geom_point(mapping = aes(x = mfr, y = shelf), position = "jitter")

attach(UScereal)
## The following objects are masked from UScereal (pos = 3):
## 
##     calories, carbo, fat, fibre, mfr, potassium, protein, shelf,
##     sodium, sugars, vitamins
plot(carbo, sugars, main="Carbo and Sugars Scatterplot", 
    xlab="Carbohydrates", ylab="Sugars ", pch=19)
abline(lm(carbo ~ sugars))

ggplot(data = UScereal) + 
  geom_point(mapping = aes(x = fat, y = shelf), position = "jitter")

ggplot(data = UScereal) + 
  geom_point(mapping = aes(x = fibre, y = mfr), position = "jitter")

attach(UScereal)
## The following objects are masked from UScereal (pos = 3):
## 
##     calories, carbo, fat, fibre, mfr, potassium, protein, shelf,
##     sodium, sugars, vitamins
## The following objects are masked from UScereal (pos = 4):
## 
##     calories, carbo, fat, fibre, mfr, potassium, protein, shelf,
##     sodium, sugars, vitamins
plot(sodium, sugars, main="Sodium and Sugars Scatterplot", 
    xlab="Sodium", ylab="Sugars ", pch=19)
abline(lm(sodium ~ sugars))

  1. You cannot tell much but P tends to be on higher shelves and R on lower.
  2. 57 of the 65 cereals are enriched. Of the 5 cereals at 100%, they are all below the mean in fat and only one of them is above the median. The 3 with no vitamins all have zero fat.
  3. The items higher in fat tend to be on higher shelves but the relationship is not strong.
  4. It does not look like there is much of a relationship between carbohydrates and sugars.
  5. R, Q, P, and G tend to have lower fibre in their products.
  6. It does not look like there is much of a relationship between sodium and sugars.

Question 3

SimpleR 4.9

data(mtcars)
names(mtcars)
##  [1] "mpg"  "cyl"  "disp" "hp"   "drat" "wt"   "qsec" "vs"   "am"   "gear"
## [11] "carb"
summary(mtcars)
##       mpg             cyl             disp             hp       
##  Min.   :10.40   Min.   :4.000   Min.   : 71.1   Min.   : 52.0  
##  1st Qu.:15.43   1st Qu.:4.000   1st Qu.:120.8   1st Qu.: 96.5  
##  Median :19.20   Median :6.000   Median :196.3   Median :123.0  
##  Mean   :20.09   Mean   :6.188   Mean   :230.7   Mean   :146.7  
##  3rd Qu.:22.80   3rd Qu.:8.000   3rd Qu.:326.0   3rd Qu.:180.0  
##  Max.   :33.90   Max.   :8.000   Max.   :472.0   Max.   :335.0  
##       drat             wt             qsec             vs        
##  Min.   :2.760   Min.   :1.513   Min.   :14.50   Min.   :0.0000  
##  1st Qu.:3.080   1st Qu.:2.581   1st Qu.:16.89   1st Qu.:0.0000  
##  Median :3.695   Median :3.325   Median :17.71   Median :0.0000  
##  Mean   :3.597   Mean   :3.217   Mean   :17.85   Mean   :0.4375  
##  3rd Qu.:3.920   3rd Qu.:3.610   3rd Qu.:18.90   3rd Qu.:1.0000  
##  Max.   :4.930   Max.   :5.424   Max.   :22.90   Max.   :1.0000  
##        am              gear            carb      
##  Min.   :0.0000   Min.   :3.000   Min.   :1.000  
##  1st Qu.:0.0000   1st Qu.:3.000   1st Qu.:2.000  
##  Median :0.0000   Median :4.000   Median :2.000  
##  Mean   :0.4062   Mean   :3.688   Mean   :2.812  
##  3rd Qu.:1.0000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :1.0000   Max.   :5.000   Max.   :8.000
head(mtcars, 32)
##                      mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4           21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag       21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710          22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive      21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout   18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
## Valiant             18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
## Duster 360          14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
## Merc 240D           24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## Merc 230            22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## Merc 280            19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
## Merc 280C           17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
## Merc 450SE          16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
## Merc 450SL          17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3
## Merc 450SLC         15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
## Cadillac Fleetwood  10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
## Lincoln Continental 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
## Chrysler Imperial   14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
## Fiat 128            32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
## Honda Civic         30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
## Toyota Corolla      33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
## Toyota Corona       21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
## Dodge Challenger    15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
## AMC Javelin         15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
## Camaro Z28          13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
## Pontiac Firebird    19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2
## Fiat X1-9           27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
## Porsche 914-2       26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
## Lotus Europa        30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
## Ford Pantera L      15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
## Ferrari Dino        19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
## Maserati Bora       15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8
## Volvo 142E          21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2
str(mtcars)
## 'data.frame':    32 obs. of  11 variables:
##  $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ cyl : num  6 6 4 6 8 6 8 4 4 6 ...
##  $ disp: num  160 160 108 258 360 ...
##  $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
##  $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
##  $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
##  $ qsec: num  16.5 17 18.6 19.4 17 ...
##  $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
##  $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
##  $ carb: num  4 4 1 1 2 1 4 2 2 4 ...
describe(mtcars)
##      vars  n   mean     sd median trimmed    mad   min    max  range  skew
## mpg     1 32  20.09   6.03  19.20   19.70   5.41 10.40  33.90  23.50  0.61
## cyl     2 32   6.19   1.79   6.00    6.23   2.97  4.00   8.00   4.00 -0.17
## disp    3 32 230.72 123.94 196.30  222.52 140.48 71.10 472.00 400.90  0.38
## hp      4 32 146.69  68.56 123.00  141.19  77.10 52.00 335.00 283.00  0.73
## drat    5 32   3.60   0.53   3.70    3.58   0.70  2.76   4.93   2.17  0.27
## wt      6 32   3.22   0.98   3.33    3.15   0.77  1.51   5.42   3.91  0.42
## qsec    7 32  17.85   1.79  17.71   17.83   1.42 14.50  22.90   8.40  0.37
## vs      8 32   0.44   0.50   0.00    0.42   0.00  0.00   1.00   1.00  0.24
## am      9 32   0.41   0.50   0.00    0.38   0.00  0.00   1.00   1.00  0.36
## gear   10 32   3.69   0.74   4.00    3.62   1.48  3.00   5.00   2.00  0.53
## carb   11 32   2.81   1.62   2.00    2.65   1.48  1.00   8.00   7.00  1.05
##      kurtosis    se
## mpg     -0.37  1.07
## cyl     -1.76  0.32
## disp    -1.21 21.91
## hp      -0.14 12.12
## drat    -0.71  0.09
## wt      -0.02  0.17
## qsec     0.34  0.32
## vs      -2.00  0.09
## am      -1.92  0.09
## gear    -1.07  0.13
## carb     1.26  0.29
mtcars[mtcars$mpg>30,c("mpg")]
## [1] 32.4 30.4 33.9 30.4
table(mtcars$mpg)
## 
## 10.4 13.3 14.3 14.7   15 15.2 15.5 15.8 16.4 17.3 17.8 18.1 18.7 19.2 19.7 
##    2    1    1    1    1    2    1    1    1    1    1    1    1    2    1 
##   21 21.4 21.5 22.8 24.4   26 27.3 30.4 32.4 33.9 
##    2    2    1    2    1    1    1    2    1    1
  1. The variable names are mpg, cyl, disp, hp, drat, wt, qsec, vs, am, gear, and carb.
  2. The maximum mpg is 33.90.
  3. Toyota Corolla gets 33.90 mpg.
  4. The first 5 listed cars are Mazda RX4, Mazda RX4 Wag, Datsun 710, Hornet 4 Drive, and Hornet Sportabout.
  5. The Valiant has a horsepower of 105.
  6. The Mercedes 450slc gets 15.2 mpg, 8 cyl, 275.8 disp, 180 hp, 3.07 drat, 3.780 wt, 18.00 qsec, 0 vs, 0 am, 3 gear, and 3 carb.
  7. This is not a good candidate for linear regression.
attach(mtcars)
## The following object is masked from package:ggplot2:
## 
##     mpg
plot(cyl, mpg, main="Cyl and MPG Scatterplot", 
    xlab="Cylinders", ylab="Miles Per Gallon ", pch=19)
abline(lm(cyl ~ mpg))

Question 4

R for Data Science 3.3.1 Exercise 2

ggplot2::mpg
## # A tibble: 234 x 11
##    manufacturer model displ  year   cyl trans drv     cty   hwy fl    cla~
##    <chr>        <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <ch>
##  1 audi         a4      1.8  1999     4 auto~ f        18    29 p     com~
##  2 audi         a4      1.8  1999     4 manu~ f        21    29 p     com~
##  3 audi         a4      2    2008     4 manu~ f        20    31 p     com~
##  4 audi         a4      2    2008     4 auto~ f        21    30 p     com~
##  5 audi         a4      2.8  1999     6 auto~ f        16    26 p     com~
##  6 audi         a4      2.8  1999     6 manu~ f        18    26 p     com~
##  7 audi         a4      3.1  2008     6 auto~ f        18    27 p     com~
##  8 audi         a4 q~   1.8  1999     4 manu~ 4        18    26 p     com~
##  9 audi         a4 q~   1.8  1999     4 auto~ 4        16    25 p     com~
## 10 audi         a4 q~   2    2008     4 manu~ 4        20    28 p     com~
## # ... with 224 more rows
str(mpg)
##  num [1:32] 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...

Categorical: model, year, cyl, trans, drv, fl, class

Continuous: displ, cty, hwy

Question 5

R for Data Science 3.5.1 Exercise 3

data(mpg)
ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy)) +
  facet_grid(drv ~ .)

ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy)) +
  facet_grid(. ~ cyl)

The . indicates if the 3rd variable (drv or cyl should make horizontal or vertical graphs)

Question 6

R for Data Science 3.6.1 Exercise 2

ggplot(data = mpg, mapping = aes(x = displ, y = hwy, color = drv)) + 
  geom_point() + 
  geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

This is about what I expected but now I better understand color. I was thinking more about specifying which color the graph would be (“blue”“) as opposed to picking a variable as color and having the program select the colors.

Question 7

R for Data Science 3.7.1 Exercise 1

The default geom associated with stat_summary() is geom_linerange.

ggplot(diamonds, aes(cut, depth)) +
  geom_point() +
  stat_summary(geom = 'linerange', fun.ymin = min, fun.ymax = max, fun.y = median)

Question 8

R for Data Science 3.8.1 Exercise 1

ggplot(data = mpg, mapping = aes(x = cty, y = hwy)) + 
  geom_point()

ggplot(data = mpg) + 
  geom_point(mapping = aes(x = cty, y = hwy), position = "jitter")

By adding jitter, we can see a lot more datapoints giving us a better understanding of the data.

Question 9

R for Data Science 3.9.1 Exercise 4

ggplot(data = mpg, mapping = aes(x = cty, y = hwy)) +
  geom_point() + 
  geom_abline() +
  coord_fixed()

There’s a direct relationship between cars’ city mpg and hwy mpg. The higher the city mpg, the higher the highway mpg probably is.

coord(fixed) makes sure that the units of one variable are equivalent to the other so they can be properly compared.

geom_abline() adds in the straight line in the above graph.

Question 10

SQL Functions

id=c(1,2,3,4,5) 
age=c(31,42,51,55,70) 
gender=c(0,0,1,1,1) 
mydata1=data.frame(cbind(id,age)) 
colnames(mydata1)=c("id", "age") 
mydata2=data.frame(cbind(id,gender)) 
colnames(mydata1)=c("id", "gender")
mydata3 <- merge(mydata1, mydata2, by = "id")
colnames(mydata3) <- c("id", "age", "gender")
mydata3
##   id age gender
## 1  1  31      0
## 2  2  42      0
## 3  3  51      1
## 4  4  55      1
## 5  5  70      1

IMDB

movie=read_html("https://www.imdb.com/movies-in-theaters/")
newmovies=movie %>% html_nodes("h4 a") %>% html_text()
runtime=movie %>% html_nodes("time") %>% html_text()
runtime=as.numeric(gsub(" min","",runtime))
myimdbdata=data.frame(cbind(newmovies,runtime))
## Warning in cbind(newmovies, runtime): number of rows of result is not a
## multiple of vector length (arg 2)
myimdbdata
##                                           newmovies runtime
## 1                      The Happytime Murders (2018)      91
## 2                                     A.X.L. (2018)     100
## 3                                  Searching (2018)     102
## 4                                   Papillon (2017)     133
## 5                          Support the Girls (2018)      90
## 6                                   Replicas (2018)     113
## 7                               The Bookshop (2017)     108
## 8                         Beautifully Broken (2018)      95
## 9   John McEnroe: In the Realm of Perfection (2018)     120
## 10                            An L.A. Minute (2018)     113
## 11                         Crazy Rich Asians (2018)      94
## 12                                   The Meg (2018)      96
## 13                                   Mile 22 (2018)     147
## 14                                     Alpha (2018)     104
## 15             Mission: Impossible - Fallout (2018)     117
## 16                         Christopher Robin (2018)     114
## 17                     The Spy Who Dumped Me (2018)      97
## 18               Mamma Mia! Here We Go Again (2018)     135
## 19     Hotel Transylvania 3: Summer Vacation (2018)      93
## 20                            BlacKkKlansman (2018)      91
## 21                               Slender Man (2018)     100