CW3a

library(lattice)
#將environmental的資料輸入
data("environmental")
#檢視前6筆資料
head(environmental)
##   ozone radiation temperature wind
## 1    41       190          67  7.4
## 2    36       118          72  8.0
## 3    12       149          74 12.6
## 4    18       313          62 11.5
## 5    23       299          65  8.6
## 6    19        99          59 13.8
#觀看environmental的結構
str(environmental)
## 'data.frame':    111 obs. of  4 variables:
##  $ ozone      : num  41 36 12 18 23 19 8 16 11 14 ...
##  $ radiation  : num  190 118 149 313 299 99 19 256 290 274 ...
##  $ temperature: num  67 72 74 62 65 59 61 69 66 68 ...
##  $ wind       : num  7.4 8 12.6 11.5 8.6 13.8 20.1 9.7 9.2 10.9 ...
#摘要environmental
summary(environmental)
##      ozone         radiation      temperature         wind       
##  Min.   :  1.0   Min.   :  7.0   Min.   :57.00   Min.   : 2.300  
##  1st Qu.: 18.0   1st Qu.:113.5   1st Qu.:71.00   1st Qu.: 7.400  
##  Median : 31.0   Median :207.0   Median :79.00   Median : 9.700  
##  Mean   : 42.1   Mean   :184.8   Mean   :77.79   Mean   : 9.939  
##  3rd Qu.: 62.0   3rd Qu.:255.5   3rd Qu.:84.50   3rd Qu.:11.500  
##  Max.   :168.0   Max.   :334.0   Max.   :97.00   Max.   :20.700
#ozone的分布情形
hist(environmental$ozone,main="ozone",xlab="臭氧濃度",ylab="比率",col='gray')

#radiation的分布情形
hist(environmental$radiation,main="Radiation",xlab="輻射值",ylab="比率",col='yellow')

#temperature的分布情形
hist(environmental$temperature, main="Temperature",xlab="溫度(F)",ylab="頻率",col='red')

#wind的分布情形
hist(environmental$wind, main="Wind",xlab="風速",ylab="頻率",col='blue')

#安裝moments
install.packages("moments", repos='https://cran.rstudio.com/')
## 將程式套件安載入 'C:/Users/user/Documents/R/win-library/4.1'
## (因為 'lib' 沒有被指定)
## package 'moments' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\user\AppData\Local\Temp\RtmpegFGqc\downloaded_packages
library(moments)
## Warning: 套件 'moments' 是用 R 版本 4.1.1 來建造的
#一次算出多個變項的平均數、標準差、偏態與峰度
my_summary <- function(x) {
require(moments)
funs <- c(mean, sd, skewness, kurtosis)
sapply(funs, function(f) f(x, na.rm = TRUE))
}
sapply(environmental[, c(1:4)], my_summary)
##          ozone   radiation temperature      wind
## [1,] 42.099099 184.8018018  77.7927928 9.9387387
## [2,] 33.275969  91.1523021   9.5299691 3.5592178
## [3,]  1.248104  -0.4862466  -0.2250959 0.4537189
## [4,]  4.204408   2.0707751   2.3319550 3.2808871
#所有變項分數間的相關
round(cor(environmental), 3)
##              ozone radiation temperature   wind
## ozone        1.000     0.348       0.699 -0.613
## radiation    0.348     1.000       0.294 -0.127
## temperature  0.699     0.294       1.000 -0.497
## wind        -0.613    -0.127      -0.497  1.000
#temperature和wind的相關係數
round(cor(environmental$temperature,environmental$wind), 3)
## [1] -0.497
#temperature和wind的散布圖
plot(environmental$temperature,environmental$wind, col=5, pch = 16, xlab = "溫度(F)", ylab = "風速")

溫度和風速呈現負相關,代表風速越大氣溫越低

#temperature和ozone和的相關係數
round(cor(environmental$temperature,environmental$ozone), 3)
## [1] 0.699
#temperature和ozone的散布圖
plot(environmental$temperature,environmental$ozone, col=5, pch = 16, xlab = "溫度(F)", ylab = "臭氧濃度")

溫度和臭氧濃度呈正相關,代表溫度越高臭氧濃度也越高

#temperature和radiation和的相關係數
round(cor(environmental$temperature,environmental$radiation), 3)
## [1] 0.294
#temperature和radiation的散布圖
plot(environmental$temperature,environmental$ozone, col=5, pch = 16, xlab = "溫度(F)", ylab = "輻射值")

溫度和輻射值呈正相關,代表溫度越輻射值也越高

CW3b

#輸入資料anscombe
data(anscombe)
#檢視前6筆資料
head(anscombe)
##   x1 x2 x3 x4   y1   y2    y3   y4
## 1 10 10 10  8 8.04 9.14  7.46 6.58
## 2  8  8  8  8 6.95 8.14  6.77 5.76
## 3 13 13 13  8 7.58 8.74 12.74 7.71
## 4  9  9  9  8 8.81 8.77  7.11 8.84
## 5 11 11 11  8 8.33 9.26  7.81 8.47
## 6 14 14 14  8 9.96 8.10  8.84 7.04
#觀看anscombe的結構
str(anscombe)
## 'data.frame':    11 obs. of  8 variables:
##  $ x1: num  10 8 13 9 11 14 6 4 12 7 ...
##  $ x2: num  10 8 13 9 11 14 6 4 12 7 ...
##  $ x3: num  10 8 13 9 11 14 6 4 12 7 ...
##  $ x4: num  8 8 8 8 8 8 8 19 8 8 ...
##  $ y1: num  8.04 6.95 7.58 8.81 8.33 ...
##  $ y2: num  9.14 8.14 8.74 8.77 9.26 8.1 6.13 3.1 9.13 7.26 ...
##  $ y3: num  7.46 6.77 12.74 7.11 7.81 ...
##  $ y4: num  6.58 5.76 7.71 8.84 8.47 7.04 5.25 12.5 5.56 7.91 ...
#摘要anscombe
summary(anscombe)
##        x1             x2             x3             x4           y1        
##  Min.   : 4.0   Min.   : 4.0   Min.   : 4.0   Min.   : 8   Min.   : 4.260  
##  1st Qu.: 6.5   1st Qu.: 6.5   1st Qu.: 6.5   1st Qu.: 8   1st Qu.: 6.315  
##  Median : 9.0   Median : 9.0   Median : 9.0   Median : 8   Median : 7.580  
##  Mean   : 9.0   Mean   : 9.0   Mean   : 9.0   Mean   : 9   Mean   : 7.501  
##  3rd Qu.:11.5   3rd Qu.:11.5   3rd Qu.:11.5   3rd Qu.: 8   3rd Qu.: 8.570  
##  Max.   :14.0   Max.   :14.0   Max.   :14.0   Max.   :19   Max.   :10.840  
##        y2              y3              y4        
##  Min.   :3.100   Min.   : 5.39   Min.   : 5.250  
##  1st Qu.:6.695   1st Qu.: 6.25   1st Qu.: 6.170  
##  Median :8.140   Median : 7.11   Median : 7.040  
##  Mean   :7.501   Mean   : 7.50   Mean   : 7.501  
##  3rd Qu.:8.950   3rd Qu.: 7.98   3rd Qu.: 8.190  
##  Max.   :9.260   Max.   :12.74   Max.   :12.500
#檢視各變項的基本統計量
my_summary <- function(x) {
require(moments)
funs <- c(mean, sd, skewness, kurtosis)
sapply(funs, function(f) f(x, na.rm = TRUE))
}
sapply(anscombe[, c(1:8)], my_summary)
##            x1       x2       x3       x4          y1        y2       y3
## [1,] 9.000000 9.000000 9.000000 9.000000  7.50090909  7.500909 7.500000
## [2,] 3.316625 3.316625 3.316625 3.316625  2.03156814  2.031657 2.030424
## [3,] 0.000000 0.000000 0.000000 2.846050 -0.05580807 -1.129108 1.592231
## [4,] 1.780000 1.780000 1.780000 9.100000  2.17906136  3.007674 5.130453
##            y4
## [1,] 7.500909
## [2,] 2.030579
## [3,] 1.293025
## [4,] 4.390789
#所有變項分數間的相關
round(cor(anscombe), 3)
##        x1     x2     x3     x4     y1     y2     y3     y4
## x1  1.000  1.000  1.000 -0.500  0.816  0.816  0.816 -0.314
## x2  1.000  1.000  1.000 -0.500  0.816  0.816  0.816 -0.314
## x3  1.000  1.000  1.000 -0.500  0.816  0.816  0.816 -0.314
## x4 -0.500 -0.500 -0.500  1.000 -0.529 -0.718 -0.345  0.817
## y1  0.816  0.816  0.816 -0.529  1.000  0.750  0.469 -0.489
## y2  0.816  0.816  0.816 -0.718  0.750  1.000  0.588 -0.478
## y3  0.816  0.816  0.816 -0.345  0.469  0.588  1.000 -0.155
## y4 -0.314 -0.314 -0.314  0.817 -0.489 -0.478 -0.155  1.000
#(x1,y1)的相關係數
round(cor(anscombe$x1,anscombe$y1), 3)
## [1] 0.816
#(x1,y1)的散布圖
plot(anscombe$x1,anscombe$y1, col=blues9, pch = 16, xlab = "x1", ylab = "y1")

x1和y1呈正相關

#(x2,y2)的相關係數
round(cor(anscombe$x2,anscombe$y2), 3)
## [1] 0.816
#(x2,y2)的散布圖
plot(anscombe$x2,anscombe$y2, col=blues9, pch = 16, xlab = "x2", ylab = "y2")

x2和y2呈正相關

#(x3,y3)的相關係數
round(cor(anscombe$x3,anscombe$y3), 3)
## [1] 0.816
#(x3,y3)的散布圖
plot(anscombe$x3,anscombe$y3, col=blues9, pch = 16, xlab = "x3", ylab = "y3")

x3和y3呈正相關

#(x4,y4)的相關係數
round(cor(anscombe$x4,anscombe$y4), 3)
## [1] 0.817
#(x4,y4)的散布圖
plot(anscombe$x4,anscombe$y4, col=blues9, pch = 16, xlab = "x4", ylab = "y4")

x4和y4呈正相關

以上4筆相關係數有3筆皆為0.816,1筆為0.817,都相當接近,有可能其資料的散布情形非常相似或一致

#兩兩變項畫散佈圖
pairs(anscombe, pch = '.', upper.panel = panel.smooth, lower.panel = NULL, col = 'black')

從圖形來看,(x1,y1)、(x2,y2)兩組不完全相同但有一點相似,和(x3,y3)、(x4,y4)兩組的散布情形則呈現極大的差異,然而相關係數卻呈現一樣或相近的數值,唯有兩兩數值呈正相關這一點是所有資料皆相同。Amazing