library(MASS)
data("Cars93")
dat <- Cars93
head(dat)
## Manufacturer Model Type Min.Price Price Max.Price MPG.city MPG.highway
## 1 Acura Integra Small 12.9 15.9 18.8 25 31
## 2 Acura Legend Midsize 29.2 33.9 38.7 18 25
## 3 Audi 90 Compact 25.9 29.1 32.3 20 26
## 4 Audi 100 Midsize 30.8 37.7 44.6 19 26
## 5 BMW 535i Midsize 23.7 30.0 36.2 22 30
## 6 Buick Century Midsize 14.2 15.7 17.3 22 31
## AirBags DriveTrain Cylinders EngineSize Horsepower RPM
## 1 None Front 4 1.8 140 6300
## 2 Driver & Passenger Front 6 3.2 200 5500
## 3 Driver only Front 6 2.8 172 5500
## 4 Driver & Passenger Front 6 2.8 172 5500
## 5 Driver only Rear 4 3.5 208 5700
## 6 Driver only Front 4 2.2 110 5200
## Rev.per.mile Man.trans.avail Fuel.tank.capacity Passengers Length Wheelbase
## 1 2890 Yes 13.2 5 177 102
## 2 2335 Yes 18.0 5 195 115
## 3 2280 Yes 16.9 5 180 102
## 4 2535 Yes 21.1 6 193 106
## 5 2545 Yes 21.1 4 186 109
## 6 2565 No 16.4 6 189 105
## Width Turn.circle Rear.seat.room Luggage.room Weight Origin Make
## 1 68 37 26.5 11 2705 non-USA Acura Integra
## 2 71 38 30.0 15 3560 non-USA Acura Legend
## 3 67 37 28.0 14 3375 non-USA Audi 90
## 4 70 37 31.0 17 3405 non-USA Audi 100
## 5 69 39 27.0 13 3640 non-USA BMW 535i
## 6 69 41 28.0 16 2880 USA Buick Century
library(ggplot2)
boxplot_price <- ggplot(dat, aes(x = factor(Origin), y = Price, fill = factor(DriveTrain))) +
geom_boxplot(outliers = F) +
labs(x = "Origin", y = "price", fill = "DriveTrain") +
scale_fill_discrete(labels = c("4WD", "Front", "Rear"))
boxplot_price
- 독립변수를 origin, 종속변수를 price로 지정하고 생산국이 미국인지에
따라 DriveTrain의 MPG.city가 어떻게 달라지는가에 대해 박스플랏을
그렸다.
boxplot_MPG.city <- ggplot(dat, aes(x = factor(Origin), y = MPG.city, fill = factor(DriveTrain))) +
geom_boxplot(outliers = F) +
labs(x = "Origin", y = "MPG.city", fill = "DirveTrain") +
scale_fill_discrete(labels = c("4WD", "Front", "Rear"))
boxplot_MPG.city
library('gmodels')
CrossTable(dat$Origin, dat$DriveTrain, chisq = T,
expected = T, dnn = c("생산국", "구동 방식"),
prop.r = F, prop.c = F, prop.t = F)
## Warning in chisq.test(t, correct = FALSE, ...): Chi-squared approximation may
## be incorrect
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Expected N |
## | Chi-square contribution |
## |-------------------------|
##
##
## Total Observations in Table: 93
##
##
## | 구동 방식
## 생산국 | 4WD | Front | Rear | Row Total |
## -------------|-----------|-----------|-----------|-----------|
## USA | 5 | 34 | 9 | 48 |
## | 5.161 | 34.581 | 8.258 | |
## | 0.005 | 0.010 | 0.067 | |
## -------------|-----------|-----------|-----------|-----------|
## non-USA | 5 | 33 | 7 | 45 |
## | 4.839 | 32.419 | 7.742 | |
## | 0.005 | 0.010 | 0.071 | |
## -------------|-----------|-----------|-----------|-----------|
## Column Total | 10 | 67 | 16 | 93 |
## -------------|-----------|-----------|-----------|-----------|
##
##
## Statistics for All Table Factors
##
##
## Pearson's Chi-squared test
## ------------------------------------------------------------
## Chi^2 = 0.1683263 d.f. = 2 p = 0.9192812
##
##
##
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.