#---- datasets in R ----
library(help = "datasets")
data()

#---- IRIS ----
# iris datasets
data(iris)

# Top 5 
head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
tail(iris)
##     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species
## 145          6.7         3.3          5.7         2.5 virginica
## 146          6.7         3.0          5.2         2.3 virginica
## 147          6.3         2.5          5.0         1.9 virginica
## 148          6.5         3.0          5.2         2.0 virginica
## 149          6.2         3.4          5.4         2.3 virginica
## 150          5.9         3.0          5.1         1.8 virginica
# Basic data
summary(iris)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
# Mean, standard deviation
colMeans(iris[, 1:4]) # 只要前四個               
## Sepal.Length  Sepal.Width Petal.Length  Petal.Width 
##     5.843333     3.057333     3.758000     1.199333
apply(iris[, 1:4], 2, sd)          
## Sepal.Length  Sepal.Width Petal.Length  Petal.Width 
##    0.8280661    0.4358663    1.7652982    0.7622377
hist(iris$Sepal.Length) 

hist(iris$Sepal.Width) 

hist(iris$Petal.Length) 

boxplot(iris[,1:4])

boxplot(iris$Species,iris$Sepal.Length)

boxplot(Sepal.Length ~ Species, data = iris,
        main = "不同品種的花萼長度比較",
        ylab = "花萼長度 (cm)",
        xlab = "品種",
        col = c("lightblue", "lightgreen", "lightpink"))

plot(iris)

# 品種計數
table(iris$Species)
## 
##     setosa versicolor  virginica 
##         50         50         50
# 各品種的平均值
aggregate(. ~ Species, data = iris, mean)
##      Species Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1     setosa        5.006       3.428        1.462       0.246
## 2 versicolor        5.936       2.770        4.260       1.326
## 3  virginica        6.588       2.974        5.552       2.026
#---- Inference ----
model <- lm(Petal.Width ~ Petal.Length, data = iris)
shapiro.test(residuals(model))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(model)
## W = 0.98378, p-value = 0.07504
residuals(model)
##             1             2             3             4             5 
## -1.898206e-02 -1.898206e-02  2.259348e-02 -6.055760e-02 -1.898206e-02 
##             6             7             8             9            10 
##  5.629131e-02  8.101794e-02 -6.055760e-02 -1.898206e-02 -1.605576e-01 
##            11            12            13            14            15 
## -6.055760e-02 -1.021331e-01 -1.189821e-01  5.744563e-03  6.416902e-02 
##            16            17            18            19            20 
##  1.394424e-01  2.225935e-01  8.101794e-02 -4.370869e-02  3.944240e-02 
##            21            22            23            24            25 
## -1.437087e-01  1.394424e-01  1.473201e-01  1.562913e-01 -2.268598e-01 
##            26            27            28            29            30 
## -1.021331e-01  9.786686e-02 -6.055760e-02 -1.898206e-02 -1.021331e-01 
##            31            32            33            34            35 
## -1.021331e-01  1.394424e-01 -1.605576e-01 -1.898206e-02 -6.055760e-02 
##            36            37            38            39            40 
##  6.416902e-02  2.259348e-02 -1.189821e-01  2.259348e-02 -6.055760e-02 
##            41            42            43            44            45 
##  1.225935e-01  1.225935e-01  2.259348e-02  2.978669e-01 -2.685977e-02 
##            46            47            48            49            50 
##  8.101794e-02 -1.021331e-01 -1.898206e-02 -6.055760e-02 -1.898206e-02 
##            51            52            53            54            55 
## -1.909749e-01 -7.823852e-03 -1.741260e-01  5.385591e-05 -4.939939e-02 
##            56            57            58            59            60 
## -2.078239e-01  9.025064e-03 -8.917353e-03 -2.493994e-01  1.416294e-01 
##            61            62            63            64            65 
## -9.206844e-02  1.169028e-01 -2.999461e-01 -1.909749e-01  1.663560e-01 
##            66            67            68            69            70 
## -6.624831e-02 -7.823852e-03 -3.415217e-01 -7.823852e-03 -1.583706e-01 
##            71            72            73            74            75 
##  1.674495e-01  5.385591e-05 -1.741260e-01 -3.909749e-01 -1.246728e-01 
##            76            77            78            79            80 
## -6.624831e-02 -2.325505e-01 -1.570156e-02 -7.823852e-03 -9.206844e-02 
##            81            82            83            84            85 
## -1.167951e-01 -1.752195e-01 -5.837060e-02 -1.572771e-01 -7.823852e-03 
##            86            87            88            89            90 
##  9.217615e-02 -9.097494e-02 -1.662483e-01 -4.152169e-02  5.385591e-05 
##            91            92            93            94            95 
## -2.662483e-01 -1.493994e-01 -9.994614e-02 -8.917353e-03 -8.309723e-02 
##            96            97            98            99           100 
## -1.830972e-01 -8.309723e-02 -1.246728e-01  2.158093e-01 -4.152169e-02 
##           101           102           103           104           105 
##  3.685430e-01  1.427229e-01  1.011856e-02 -1.651548e-01  1.516941e-01 
##           106           107           108           109           110 
## -2.809102e-01  1.921761e-01 -4.561836e-01 -2.483059e-01  3.269675e-01 
##           111           112           113           114           115 
##  2.427229e-01  5.957181e-02  1.764207e-01  2.842984e-01  6.427229e-01 
##           116           117           118           119           120 
##  4.595718e-01 -1.235793e-01 -2.224858e-01 -2.056369e-01 -2.157016e-01 
##           121           122           123           124           125 
##  2.932696e-01  3.258740e-01 -4.224858e-01  1.258740e-01  9.326965e-02 
##           126           127           128           129           130 
## -3.314570e-01  1.674495e-01  1.258740e-01  1.348452e-01 -4.483059e-01 
##           131           132           133           134           135 
## -2.730325e-01 -2.977591e-01  2.348452e-01 -2.572771e-01 -5.651548e-01 
##           136           137           138           139           140 
##  1.269675e-01  4.348452e-01 -1.235793e-01  1.674495e-01  2.179963e-01 
##           141           142           143           144           145 
##  4.348452e-01  5.427229e-01  1.427229e-01  2.101186e-01  4.932696e-01 
##           146           147           148           149           150 
##  5.011474e-01  1.842984e-01  2.011474e-01  4.179963e-01  4.272290e-02
# 查看模型摘要
summary(model)
## 
## Call:
## lm(formula = Petal.Width ~ Petal.Length, data = iris)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.56515 -0.12358 -0.01898  0.13288  0.64272 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -0.363076   0.039762  -9.131  4.7e-16 ***
## Petal.Length  0.415755   0.009582  43.387  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2065 on 148 degrees of freedom
## Multiple R-squared:  0.9271, Adjusted R-squared:  0.9266 
## F-statistic:  1882 on 1 and 148 DF,  p-value: < 2.2e-16
plot(iris$Petal.Length, iris$Petal.Width,
     main = "Petal.Length 與 Petal.Width 的線性模型",
     xlab = "Petal Length", ylab = "Petal Width",
     pch = 19, col = "steelblue")

abline(model, col = "red", lwd = 1)  # 加迴歸線