library(tidyverse)
library("ggplot2")
library(psych)
library(XML)
library(data.table)
library(rvest)
library(xml2)
Note: I struggled with question 1 but I completed the rest of the assignment.
SimpleR 4.1
student = c(1, 2, 3, 4, 5, 6, 7, 8 ,9, 10)
q1 = c(3, 3, 3, 4, 3, 4, 3, 4, 3, 4)
q2 = c(5, 2, 5, 5, 2, 2, 5, 5, 4, 2)
q3 = c(1, 3, 1, 1, 1, 3, 1, 1, 1, 1)
table(student,q1,q2,q3)
## , , q2 = 2, q3 = 1
##
## q1
## student 3 4
## 1 0 0
## 2 0 0
## 3 0 0
## 4 0 0
## 5 1 0
## 6 0 0
## 7 0 0
## 8 0 0
## 9 0 0
## 10 0 1
##
## , , q2 = 4, q3 = 1
##
## q1
## student 3 4
## 1 0 0
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 0
## 6 0 0
## 7 0 0
## 8 0 0
## 9 1 0
## 10 0 0
##
## , , q2 = 5, q3 = 1
##
## q1
## student 3 4
## 1 1 0
## 2 0 0
## 3 1 0
## 4 0 1
## 5 0 0
## 6 0 0
## 7 1 0
## 8 0 1
## 9 0 0
## 10 0 0
##
## , , q2 = 2, q3 = 3
##
## q1
## student 3 4
## 1 0 0
## 2 1 0
## 3 0 0
## 4 0 0
## 5 0 0
## 6 0 1
## 7 0 0
## 8 0 0
## 9 0 0
## 10 0 0
##
## , , q2 = 4, q3 = 3
##
## q1
## student 3 4
## 1 0 0
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 0
## 6 0 0
## 7 0 0
## 8 0 0
## 9 0 0
## 10 0 0
##
## , , q2 = 5, q3 = 3
##
## q1
## student 3 4
## 1 0 0
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 0
## 6 0 0
## 7 0 0
## 8 0 0
## 9 0 0
## 10 0 0
SimpleR 4.2
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
data(UScereal)
attach(UScereal)
names(UScereal)
## [1] "mfr" "calories" "protein" "fat" "sodium"
## [6] "fibre" "carbo" "sugars" "shelf" "potassium"
## [11] "vitamins"
head(UScereal, 65)
## mfr calories protein fat
## 100% Bran N 212.12121 12.1212121 3.0303030
## All-Bran K 212.12121 12.1212121 3.0303030
## All-Bran with Extra Fiber K 100.00000 8.0000000 0.0000000
## Apple Cinnamon Cheerios G 146.66667 2.6666667 2.6666667
## Apple Jacks K 110.00000 2.0000000 0.0000000
## Basic 4 G 173.33333 4.0000000 2.6666667
## Bran Chex R 134.32836 2.9850746 1.4925373
## Bran Flakes P 134.32836 4.4776119 0.0000000
## Cap'n'Crunch Q 160.00000 1.3333333 2.6666667
## Cheerios G 88.00000 4.8000000 1.6000000
## Cinnamon Toast Crunch G 160.00000 1.3333333 4.0000000
## Clusters G 220.00000 6.0000000 4.0000000
## Cocoa Puffs G 110.00000 1.0000000 1.0000000
## Corn Chex R 110.00000 2.0000000 0.0000000
## Corn Flakes K 100.00000 2.0000000 0.0000000
## Corn Pops K 110.00000 1.0000000 0.0000000
## Count Chocula G 110.00000 1.0000000 1.0000000
## Cracklin' Oat Bran K 220.00000 6.0000000 6.0000000
## Crispix K 110.00000 2.0000000 0.0000000
## Crispy Wheat & Raisins G 133.33333 2.6666667 1.3333333
## Double Chex R 133.33333 2.6666667 0.0000000
## Froot Loops K 110.00000 2.0000000 1.0000000
## Frosted Flakes K 146.66667 1.3333333 0.0000000
## Frosted Mini-Wheats K 125.00000 3.7500000 0.0000000
## Fruit & Fibre: Dates Walnuts and Oats P 179.10448 4.4776119 2.9850746
## Fruitful Bran K 179.10448 4.4776119 0.0000000
## Fruity Pebbles P 146.66667 1.3333333 1.3333333
## Golden Crisp P 113.63636 2.2727273 0.0000000
## Golden Grahams G 146.66667 1.3333333 1.3333333
## Grape Nuts Flakes P 113.63636 3.4090909 1.1363636
## Grape-Nuts P 440.00000 12.0000000 0.0000000
## Great Grains Pecan P 363.63636 9.0909091 9.0909091
## Honey Graham Ohs Q 120.00000 1.0000000 2.0000000
## Honey Nut Cheerios G 146.66667 4.0000000 1.3333333
## Honey-comb P 82.70677 0.7518797 0.0000000
## Just Right Fruit & Nut K 186.66667 4.0000000 1.3333333
## Kix G 73.33333 1.3333333 0.6666667
## Life Q 149.25373 5.9701493 2.9850746
## Lucky Charms G 110.00000 2.0000000 1.0000000
## Mueslix Crispy Blend K 238.80597 4.4776119 2.9850746
## Multi-Grain Cheerios G 100.00000 2.0000000 1.0000000
## Nut&Honey Crunch K 179.10448 2.9850746 1.4925373
## Nutri-Grain Almond-Raisin K 208.95522 4.4776119 2.9850746
## Oatmeal Raisin Crisp G 260.00000 6.0000000 4.0000000
## Post Nat. Raisin Bran P 179.10448 4.4776119 1.4925373
## Product 19 K 100.00000 3.0000000 0.0000000
## Puffed Rice Q 50.00000 1.0000000 0.0000000
## Quaker Oat Squares Q 200.00000 8.0000000 2.0000000
## Raisin Bran K 160.00000 4.0000000 1.3333333
## Raisin Nut Bran G 200.00000 6.0000000 4.0000000
## Raisin Squares K 180.00000 4.0000000 0.0000000
## Rice Chex R 97.34513 0.8849558 0.0000000
## Rice Krispies K 110.00000 2.0000000 0.0000000
## Shredded Wheat 'n'Bran N 134.32836 4.4776119 0.0000000
## Shredded Wheat spoon size N 134.32836 4.4776119 0.0000000
## Smacks K 146.66667 2.6666667 1.3333333
## Special K K 110.00000 6.0000000 0.0000000
## Total Corn Flakes G 110.00000 2.0000000 1.0000000
## Total Raisin Bran G 140.00000 3.0000000 1.0000000
## Total Whole Grain G 100.00000 3.0000000 1.0000000
## Triples G 146.66667 2.6666667 1.3333333
## Trix G 110.00000 1.0000000 1.0000000
## Wheat Chex R 149.25373 4.4776119 1.4925373
## Wheaties G 100.00000 3.0000000 1.0000000
## Wheaties Honey Gold G 146.66667 2.6666667 1.3333333
## sodium fibre carbo
## 100% Bran 393.93939 30.303030 15.15152
## All-Bran 787.87879 27.272727 21.21212
## All-Bran with Extra Fiber 280.00000 28.000000 16.00000
## Apple Cinnamon Cheerios 240.00000 2.000000 14.00000
## Apple Jacks 125.00000 1.000000 11.00000
## Basic 4 280.00000 2.666667 24.00000
## Bran Chex 298.50746 5.970149 22.38806
## Bran Flakes 313.43284 7.462687 19.40299
## Cap'n'Crunch 293.33333 0.000000 16.00000
## Cheerios 232.00000 1.600000 13.60000
## Cinnamon Toast Crunch 280.00000 0.000000 17.33333
## Clusters 280.00000 4.000000 26.00000
## Cocoa Puffs 180.00000 0.000000 12.00000
## Corn Chex 280.00000 0.000000 22.00000
## Corn Flakes 290.00000 1.000000 21.00000
## Corn Pops 90.00000 1.000000 13.00000
## Count Chocula 180.00000 0.000000 12.00000
## Cracklin' Oat Bran 280.00000 8.000000 20.00000
## Crispix 220.00000 1.000000 21.00000
## Crispy Wheat & Raisins 186.66667 2.666667 14.66667
## Double Chex 253.33333 1.333333 24.00000
## Froot Loops 125.00000 1.000000 11.00000
## Frosted Flakes 266.66667 1.333333 18.66667
## Frosted Mini-Wheats 0.00000 3.750000 17.50000
## Fruit & Fibre: Dates Walnuts and Oats 238.80597 7.462687 17.91045
## Fruitful Bran 358.20896 7.462687 20.89552
## Fruity Pebbles 180.00000 0.000000 17.33333
## Golden Crisp 51.13636 0.000000 12.50000
## Golden Grahams 373.33333 0.000000 20.00000
## Grape Nuts Flakes 159.09091 3.409091 17.04545
## Grape-Nuts 680.00000 12.000000 68.00000
## Great Grains Pecan 227.27273 9.090909 39.39394
## Honey Graham Ohs 220.00000 1.000000 12.00000
## Honey Nut Cheerios 333.33333 2.000000 15.33333
## Honey-comb 135.33835 0.000000 10.52632
## Just Right Fruit & Nut 226.66667 2.666667 26.66667
## Kix 173.33333 0.000000 14.00000
## Life 223.88060 2.985075 17.91045
## Lucky Charms 180.00000 0.000000 12.00000
## Mueslix Crispy Blend 223.88060 4.477612 25.37313
## Multi-Grain Cheerios 220.00000 2.000000 15.00000
## Nut&Honey Crunch 283.58209 0.000000 22.38806
## Nutri-Grain Almond-Raisin 328.35821 4.477612 31.34328
## Oatmeal Raisin Crisp 340.00000 3.000000 27.00000
## Post Nat. Raisin Bran 298.50746 8.955224 16.41791
## Product 19 320.00000 1.000000 20.00000
## Puffed Rice 0.00000 0.000000 13.00000
## Quaker Oat Squares 270.00000 4.000000 28.00000
## Raisin Bran 280.00000 6.666667 18.66667
## Raisin Nut Bran 280.00000 5.000000 21.00000
## Raisin Squares 0.00000 4.000000 30.00000
## Rice Chex 212.38938 0.000000 20.35398
## Rice Krispies 290.00000 0.000000 22.00000
## Shredded Wheat 'n'Bran 0.00000 5.970149 28.35821
## Shredded Wheat spoon size 0.00000 4.477612 29.85075
## Smacks 93.33333 1.333333 12.00000
## Special K 230.00000 1.000000 16.00000
## Total Corn Flakes 200.00000 0.000000 21.00000
## Total Raisin Bran 190.00000 4.000000 15.00000
## Total Whole Grain 200.00000 3.000000 16.00000
## Triples 333.33333 0.000000 28.00000
## Trix 140.00000 0.000000 13.00000
## Wheat Chex 343.28358 4.477612 25.37313
## Wheaties 200.00000 3.000000 17.00000
## Wheaties Honey Gold 266.66667 1.333333 21.33333
## sugars shelf potassium vitamins
## 100% Bran 18.181818 3 848.48485 enriched
## All-Bran 15.151515 3 969.69697 enriched
## All-Bran with Extra Fiber 0.000000 3 660.00000 enriched
## Apple Cinnamon Cheerios 13.333333 1 93.33333 enriched
## Apple Jacks 14.000000 2 30.00000 enriched
## Basic 4 10.666667 3 133.33333 enriched
## Bran Chex 8.955224 1 186.56716 enriched
## Bran Flakes 7.462687 3 283.58209 enriched
## Cap'n'Crunch 16.000000 2 46.66667 enriched
## Cheerios 0.800000 1 84.00000 enriched
## Cinnamon Toast Crunch 12.000000 2 60.00000 enriched
## Clusters 14.000000 3 210.00000 enriched
## Cocoa Puffs 13.000000 2 55.00000 enriched
## Corn Chex 3.000000 1 25.00000 enriched
## Corn Flakes 2.000000 1 35.00000 enriched
## Corn Pops 12.000000 2 20.00000 enriched
## Count Chocula 13.000000 2 65.00000 enriched
## Cracklin' Oat Bran 14.000000 3 320.00000 enriched
## Crispix 3.000000 3 30.00000 enriched
## Crispy Wheat & Raisins 13.333333 3 160.00000 enriched
## Double Chex 6.666667 3 106.66667 enriched
## Froot Loops 13.000000 2 30.00000 enriched
## Frosted Flakes 14.666667 1 33.33333 enriched
## Frosted Mini-Wheats 8.750000 2 125.00000 enriched
## Fruit & Fibre: Dates Walnuts and Oats 14.925373 3 298.50746 enriched
## Fruitful Bran 17.910448 3 283.58209 enriched
## Fruity Pebbles 16.000000 2 33.33333 enriched
## Golden Crisp 17.045455 1 45.45455 enriched
## Golden Grahams 12.000000 2 60.00000 enriched
## Grape Nuts Flakes 5.681818 3 96.59091 enriched
## Grape-Nuts 12.000000 3 360.00000 enriched
## Great Grains Pecan 12.121212 3 303.03030 enriched
## Honey Graham Ohs 11.000000 2 45.00000 enriched
## Honey Nut Cheerios 13.333333 1 120.00000 enriched
## Honey-comb 8.270677 1 26.31579 enriched
## Just Right Fruit & Nut 12.000000 3 126.66667 100%
## Kix 2.000000 2 26.66667 enriched
## Life 8.955224 2 141.79104 enriched
## Lucky Charms 12.000000 2 55.00000 enriched
## Mueslix Crispy Blend 19.402985 3 238.80597 enriched
## Multi-Grain Cheerios 6.000000 1 90.00000 enriched
## Nut&Honey Crunch 13.432836 2 59.70149 enriched
## Nutri-Grain Almond-Raisin 10.447761 3 194.02985 enriched
## Oatmeal Raisin Crisp 20.000000 3 240.00000 enriched
## Post Nat. Raisin Bran 20.895522 3 388.05970 enriched
## Product 19 3.000000 3 45.00000 100%
## Puffed Rice 0.000000 3 15.00000 none
## Quaker Oat Squares 12.000000 3 220.00000 enriched
## Raisin Bran 16.000000 2 320.00000 enriched
## Raisin Nut Bran 16.000000 3 280.00000 enriched
## Raisin Squares 12.000000 3 220.00000 enriched
## Rice Chex 1.769912 1 26.54867 enriched
## Rice Krispies 3.000000 1 35.00000 enriched
## Shredded Wheat 'n'Bran 0.000000 1 208.95522 none
## Shredded Wheat spoon size 0.000000 1 179.10448 none
## Smacks 20.000000 2 53.33333 enriched
## Special K 3.000000 1 55.00000 enriched
## Total Corn Flakes 3.000000 3 35.00000 100%
## Total Raisin Bran 14.000000 3 230.00000 100%
## Total Whole Grain 3.000000 3 110.00000 100%
## Triples 4.000000 3 80.00000 enriched
## Trix 12.000000 2 25.00000 enriched
## Wheat Chex 4.477612 1 171.64179 enriched
## Wheaties 3.000000 1 110.00000 enriched
## Wheaties Honey Gold 10.666667 1 80.00000 enriched
summary(UScereal)
## mfr calories protein fat sodium
## G:22 Min. : 50.0 Min. : 0.7519 Min. :0.000 Min. : 0.0
## K:21 1st Qu.:110.0 1st Qu.: 2.0000 1st Qu.:0.000 1st Qu.:180.0
## N: 3 Median :134.3 Median : 3.0000 Median :1.000 Median :232.0
## P: 9 Mean :149.4 Mean : 3.6837 Mean :1.423 Mean :237.8
## Q: 5 3rd Qu.:179.1 3rd Qu.: 4.4776 3rd Qu.:2.000 3rd Qu.:290.0
## R: 5 Max. :440.0 Max. :12.1212 Max. :9.091 Max. :787.9
## fibre carbo sugars shelf
## Min. : 0.000 Min. :10.53 Min. : 0.00 Min. :1.000
## 1st Qu.: 0.000 1st Qu.:15.00 1st Qu.: 4.00 1st Qu.:1.000
## Median : 2.000 Median :18.67 Median :12.00 Median :2.000
## Mean : 3.871 Mean :19.97 Mean :10.05 Mean :2.169
## 3rd Qu.: 4.478 3rd Qu.:22.39 3rd Qu.:14.00 3rd Qu.:3.000
## Max. :30.303 Max. :68.00 Max. :20.90 Max. :3.000
## potassium vitamins
## Min. : 15.00 100% : 5
## 1st Qu.: 45.00 enriched:57
## Median : 96.59 none : 3
## Mean :159.12
## 3rd Qu.:220.00
## Max. :969.70
ggplot(data = UScereal) +
geom_point(mapping = aes(x = mfr, y = shelf), position = "jitter")
attach(UScereal)
## The following objects are masked from UScereal (pos = 3):
##
## calories, carbo, fat, fibre, mfr, potassium, protein, shelf,
## sodium, sugars, vitamins
plot(carbo, sugars, main="Carbo and Sugars Scatterplot",
xlab="Carbohydrates", ylab="Sugars ", pch=19)
abline(lm(carbo ~ sugars))
ggplot(data = UScereal) +
geom_point(mapping = aes(x = fat, y = shelf), position = "jitter")
ggplot(data = UScereal) +
geom_point(mapping = aes(x = fibre, y = mfr), position = "jitter")
attach(UScereal)
## The following objects are masked from UScereal (pos = 3):
##
## calories, carbo, fat, fibre, mfr, potassium, protein, shelf,
## sodium, sugars, vitamins
## The following objects are masked from UScereal (pos = 4):
##
## calories, carbo, fat, fibre, mfr, potassium, protein, shelf,
## sodium, sugars, vitamins
plot(sodium, sugars, main="Sodium and Sugars Scatterplot",
xlab="Sodium", ylab="Sugars ", pch=19)
abline(lm(sodium ~ sugars))
SimpleR 4.9
data(mtcars)
names(mtcars)
## [1] "mpg" "cyl" "disp" "hp" "drat" "wt" "qsec" "vs" "am" "gear"
## [11] "carb"
summary(mtcars)
## mpg cyl disp hp
## Min. :10.40 Min. :4.000 Min. : 71.1 Min. : 52.0
## 1st Qu.:15.43 1st Qu.:4.000 1st Qu.:120.8 1st Qu.: 96.5
## Median :19.20 Median :6.000 Median :196.3 Median :123.0
## Mean :20.09 Mean :6.188 Mean :230.7 Mean :146.7
## 3rd Qu.:22.80 3rd Qu.:8.000 3rd Qu.:326.0 3rd Qu.:180.0
## Max. :33.90 Max. :8.000 Max. :472.0 Max. :335.0
## drat wt qsec vs
## Min. :2.760 Min. :1.513 Min. :14.50 Min. :0.0000
## 1st Qu.:3.080 1st Qu.:2.581 1st Qu.:16.89 1st Qu.:0.0000
## Median :3.695 Median :3.325 Median :17.71 Median :0.0000
## Mean :3.597 Mean :3.217 Mean :17.85 Mean :0.4375
## 3rd Qu.:3.920 3rd Qu.:3.610 3rd Qu.:18.90 3rd Qu.:1.0000
## Max. :4.930 Max. :5.424 Max. :22.90 Max. :1.0000
## am gear carb
## Min. :0.0000 Min. :3.000 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:3.000 1st Qu.:2.000
## Median :0.0000 Median :4.000 Median :2.000
## Mean :0.4062 Mean :3.688 Mean :2.812
## 3rd Qu.:1.0000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :1.0000 Max. :5.000 Max. :8.000
head(mtcars, 32)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
str(mtcars)
## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
describe(mtcars)
## vars n mean sd median trimmed mad min max range skew
## mpg 1 32 20.09 6.03 19.20 19.70 5.41 10.40 33.90 23.50 0.61
## cyl 2 32 6.19 1.79 6.00 6.23 2.97 4.00 8.00 4.00 -0.17
## disp 3 32 230.72 123.94 196.30 222.52 140.48 71.10 472.00 400.90 0.38
## hp 4 32 146.69 68.56 123.00 141.19 77.10 52.00 335.00 283.00 0.73
## drat 5 32 3.60 0.53 3.70 3.58 0.70 2.76 4.93 2.17 0.27
## wt 6 32 3.22 0.98 3.33 3.15 0.77 1.51 5.42 3.91 0.42
## qsec 7 32 17.85 1.79 17.71 17.83 1.42 14.50 22.90 8.40 0.37
## vs 8 32 0.44 0.50 0.00 0.42 0.00 0.00 1.00 1.00 0.24
## am 9 32 0.41 0.50 0.00 0.38 0.00 0.00 1.00 1.00 0.36
## gear 10 32 3.69 0.74 4.00 3.62 1.48 3.00 5.00 2.00 0.53
## carb 11 32 2.81 1.62 2.00 2.65 1.48 1.00 8.00 7.00 1.05
## kurtosis se
## mpg -0.37 1.07
## cyl -1.76 0.32
## disp -1.21 21.91
## hp -0.14 12.12
## drat -0.71 0.09
## wt -0.02 0.17
## qsec 0.34 0.32
## vs -2.00 0.09
## am -1.92 0.09
## gear -1.07 0.13
## carb 1.26 0.29
mtcars[mtcars$mpg>30,c("mpg")]
## [1] 32.4 30.4 33.9 30.4
table(mtcars$mpg)
##
## 10.4 13.3 14.3 14.7 15 15.2 15.5 15.8 16.4 17.3 17.8 18.1 18.7 19.2 19.7
## 2 1 1 1 1 2 1 1 1 1 1 1 1 2 1
## 21 21.4 21.5 22.8 24.4 26 27.3 30.4 32.4 33.9
## 2 2 1 2 1 1 1 2 1 1
attach(mtcars)
## The following object is masked from package:ggplot2:
##
## mpg
plot(cyl, mpg, main="Cyl and MPG Scatterplot",
xlab="Cylinders", ylab="Miles Per Gallon ", pch=19)
abline(lm(cyl ~ mpg))
R for Data Science 3.3.1 Exercise 2
ggplot2::mpg
## # A tibble: 234 x 11
## manufacturer model displ year cyl trans drv cty hwy fl cla~
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <ch>
## 1 audi a4 1.8 1999 4 auto~ f 18 29 p com~
## 2 audi a4 1.8 1999 4 manu~ f 21 29 p com~
## 3 audi a4 2 2008 4 manu~ f 20 31 p com~
## 4 audi a4 2 2008 4 auto~ f 21 30 p com~
## 5 audi a4 2.8 1999 6 auto~ f 16 26 p com~
## 6 audi a4 2.8 1999 6 manu~ f 18 26 p com~
## 7 audi a4 3.1 2008 6 auto~ f 18 27 p com~
## 8 audi a4 q~ 1.8 1999 4 manu~ 4 18 26 p com~
## 9 audi a4 q~ 1.8 1999 4 auto~ 4 16 25 p com~
## 10 audi a4 q~ 2 2008 4 manu~ 4 20 28 p com~
## # ... with 224 more rows
str(mpg)
## num [1:32] 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
Categorical: model, year, cyl, trans, drv, fl, class
Continuous: displ, cty, hwy
R for Data Science 3.5.1 Exercise 3
data(mpg)
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_grid(drv ~ .)
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_grid(. ~ cyl)
The . indicates if the 3rd variable (drv or cyl should make horizontal or vertical graphs)
R for Data Science 3.6.1 Exercise 2
ggplot(data = mpg, mapping = aes(x = displ, y = hwy, color = drv)) +
geom_point() +
geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
This is about what I expected but now I better understand color. I was thinking more about specifying which color the graph would be (“blue”“) as opposed to picking a variable as color and having the program select the colors.
R for Data Science 3.7.1 Exercise 1
The default geom associated with stat_summary() is geom_linerange.
ggplot(diamonds, aes(cut, depth)) +
geom_point() +
stat_summary(geom = 'linerange', fun.ymin = min, fun.ymax = max, fun.y = median)
R for Data Science 3.8.1 Exercise 1
ggplot(data = mpg, mapping = aes(x = cty, y = hwy)) +
geom_point()
ggplot(data = mpg) +
geom_point(mapping = aes(x = cty, y = hwy), position = "jitter")
By adding jitter, we can see a lot more datapoints giving us a better understanding of the data.
R for Data Science 3.9.1 Exercise 4
ggplot(data = mpg, mapping = aes(x = cty, y = hwy)) +
geom_point() +
geom_abline() +
coord_fixed()
There’s a direct relationship between cars’ city mpg and hwy mpg. The higher the city mpg, the higher the highway mpg probably is.
coord(fixed) makes sure that the units of one variable are equivalent to the other so they can be properly compared.
geom_abline() adds in the straight line in the above graph.
SQL Functions
id=c(1,2,3,4,5)
age=c(31,42,51,55,70)
gender=c(0,0,1,1,1)
mydata1=data.frame(cbind(id,age))
colnames(mydata1)=c("id", "age")
mydata2=data.frame(cbind(id,gender))
colnames(mydata1)=c("id", "gender")
mydata3 <- merge(mydata1, mydata2, by = "id")
colnames(mydata3) <- c("id", "age", "gender")
mydata3
## id age gender
## 1 1 31 0
## 2 2 42 0
## 3 3 51 1
## 4 4 55 1
## 5 5 70 1
movie=read_html("https://www.imdb.com/movies-in-theaters/")
newmovies=movie %>% html_nodes("h4 a") %>% html_text()
runtime=movie %>% html_nodes("time") %>% html_text()
runtime=as.numeric(gsub(" min","",runtime))
myimdbdata=data.frame(cbind(newmovies,runtime))
## Warning in cbind(newmovies, runtime): number of rows of result is not a
## multiple of vector length (arg 2)
myimdbdata
## newmovies runtime
## 1 The Happytime Murders (2018) 91
## 2 A.X.L. (2018) 100
## 3 Searching (2018) 102
## 4 Papillon (2017) 133
## 5 Support the Girls (2018) 90
## 6 Replicas (2018) 113
## 7 The Bookshop (2017) 108
## 8 Beautifully Broken (2018) 95
## 9 John McEnroe: In the Realm of Perfection (2018) 120
## 10 An L.A. Minute (2018) 113
## 11 Crazy Rich Asians (2018) 94
## 12 The Meg (2018) 96
## 13 Mile 22 (2018) 147
## 14 Alpha (2018) 104
## 15 Mission: Impossible - Fallout (2018) 117
## 16 Christopher Robin (2018) 114
## 17 The Spy Who Dumped Me (2018) 97
## 18 Mamma Mia! Here We Go Again (2018) 135
## 19 Hotel Transylvania 3: Summer Vacation (2018) 93
## 20 BlacKkKlansman (2018) 91
## 21 Slender Man (2018) 100