#---- datasets in R ----
library(help = "datasets")
data()
#---- IRIS ----
# iris datasets
data(iris)
# Top 5
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
tail(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 145 6.7 3.3 5.7 2.5 virginica
## 146 6.7 3.0 5.2 2.3 virginica
## 147 6.3 2.5 5.0 1.9 virginica
## 148 6.5 3.0 5.2 2.0 virginica
## 149 6.2 3.4 5.4 2.3 virginica
## 150 5.9 3.0 5.1 1.8 virginica
# Basic data
summary(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
# Mean, standard deviation
colMeans(iris[, 1:4]) # 只要前四個
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 5.843333 3.057333 3.758000 1.199333
apply(iris[, 1:4], 2, sd)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 0.8280661 0.4358663 1.7652982 0.7622377
hist(iris$Sepal.Length)

hist(iris$Sepal.Width)

hist(iris$Petal.Length)

boxplot(iris[,1:4])

boxplot(iris$Species,iris$Sepal.Length)

boxplot(Sepal.Length ~ Species, data = iris,
main = "不同品種的花萼長度比較",
ylab = "花萼長度 (cm)",
xlab = "品種",
col = c("lightblue", "lightgreen", "lightpink"))

plot(iris)

# 品種計數
table(iris$Species)
##
## setosa versicolor virginica
## 50 50 50
# 各品種的平均值
aggregate(. ~ Species, data = iris, mean)
## Species Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1 setosa 5.006 3.428 1.462 0.246
## 2 versicolor 5.936 2.770 4.260 1.326
## 3 virginica 6.588 2.974 5.552 2.026
#---- Inference ----
model <- lm(Petal.Width ~ Petal.Length, data = iris)
shapiro.test(residuals(model))
##
## Shapiro-Wilk normality test
##
## data: residuals(model)
## W = 0.98378, p-value = 0.07504
residuals(model)
## 1 2 3 4 5
## -1.898206e-02 -1.898206e-02 2.259348e-02 -6.055760e-02 -1.898206e-02
## 6 7 8 9 10
## 5.629131e-02 8.101794e-02 -6.055760e-02 -1.898206e-02 -1.605576e-01
## 11 12 13 14 15
## -6.055760e-02 -1.021331e-01 -1.189821e-01 5.744563e-03 6.416902e-02
## 16 17 18 19 20
## 1.394424e-01 2.225935e-01 8.101794e-02 -4.370869e-02 3.944240e-02
## 21 22 23 24 25
## -1.437087e-01 1.394424e-01 1.473201e-01 1.562913e-01 -2.268598e-01
## 26 27 28 29 30
## -1.021331e-01 9.786686e-02 -6.055760e-02 -1.898206e-02 -1.021331e-01
## 31 32 33 34 35
## -1.021331e-01 1.394424e-01 -1.605576e-01 -1.898206e-02 -6.055760e-02
## 36 37 38 39 40
## 6.416902e-02 2.259348e-02 -1.189821e-01 2.259348e-02 -6.055760e-02
## 41 42 43 44 45
## 1.225935e-01 1.225935e-01 2.259348e-02 2.978669e-01 -2.685977e-02
## 46 47 48 49 50
## 8.101794e-02 -1.021331e-01 -1.898206e-02 -6.055760e-02 -1.898206e-02
## 51 52 53 54 55
## -1.909749e-01 -7.823852e-03 -1.741260e-01 5.385591e-05 -4.939939e-02
## 56 57 58 59 60
## -2.078239e-01 9.025064e-03 -8.917353e-03 -2.493994e-01 1.416294e-01
## 61 62 63 64 65
## -9.206844e-02 1.169028e-01 -2.999461e-01 -1.909749e-01 1.663560e-01
## 66 67 68 69 70
## -6.624831e-02 -7.823852e-03 -3.415217e-01 -7.823852e-03 -1.583706e-01
## 71 72 73 74 75
## 1.674495e-01 5.385591e-05 -1.741260e-01 -3.909749e-01 -1.246728e-01
## 76 77 78 79 80
## -6.624831e-02 -2.325505e-01 -1.570156e-02 -7.823852e-03 -9.206844e-02
## 81 82 83 84 85
## -1.167951e-01 -1.752195e-01 -5.837060e-02 -1.572771e-01 -7.823852e-03
## 86 87 88 89 90
## 9.217615e-02 -9.097494e-02 -1.662483e-01 -4.152169e-02 5.385591e-05
## 91 92 93 94 95
## -2.662483e-01 -1.493994e-01 -9.994614e-02 -8.917353e-03 -8.309723e-02
## 96 97 98 99 100
## -1.830972e-01 -8.309723e-02 -1.246728e-01 2.158093e-01 -4.152169e-02
## 101 102 103 104 105
## 3.685430e-01 1.427229e-01 1.011856e-02 -1.651548e-01 1.516941e-01
## 106 107 108 109 110
## -2.809102e-01 1.921761e-01 -4.561836e-01 -2.483059e-01 3.269675e-01
## 111 112 113 114 115
## 2.427229e-01 5.957181e-02 1.764207e-01 2.842984e-01 6.427229e-01
## 116 117 118 119 120
## 4.595718e-01 -1.235793e-01 -2.224858e-01 -2.056369e-01 -2.157016e-01
## 121 122 123 124 125
## 2.932696e-01 3.258740e-01 -4.224858e-01 1.258740e-01 9.326965e-02
## 126 127 128 129 130
## -3.314570e-01 1.674495e-01 1.258740e-01 1.348452e-01 -4.483059e-01
## 131 132 133 134 135
## -2.730325e-01 -2.977591e-01 2.348452e-01 -2.572771e-01 -5.651548e-01
## 136 137 138 139 140
## 1.269675e-01 4.348452e-01 -1.235793e-01 1.674495e-01 2.179963e-01
## 141 142 143 144 145
## 4.348452e-01 5.427229e-01 1.427229e-01 2.101186e-01 4.932696e-01
## 146 147 148 149 150
## 5.011474e-01 1.842984e-01 2.011474e-01 4.179963e-01 4.272290e-02
# 查看模型摘要
summary(model)
##
## Call:
## lm(formula = Petal.Width ~ Petal.Length, data = iris)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.56515 -0.12358 -0.01898 0.13288 0.64272
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.363076 0.039762 -9.131 4.7e-16 ***
## Petal.Length 0.415755 0.009582 43.387 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2065 on 148 degrees of freedom
## Multiple R-squared: 0.9271, Adjusted R-squared: 0.9266
## F-statistic: 1882 on 1 and 148 DF, p-value: < 2.2e-16
plot(iris$Petal.Length, iris$Petal.Width,
main = "Petal.Length 與 Petal.Width 的線性模型",
xlab = "Petal Length", ylab = "Petal Width",
pch = 19, col = "steelblue")
abline(model, col = "red", lwd = 1) # 加迴歸線
