library(lattice)
#將environmental的資料輸入
data("environmental")
#檢視前6筆資料
head(environmental)
## ozone radiation temperature wind
## 1 41 190 67 7.4
## 2 36 118 72 8.0
## 3 12 149 74 12.6
## 4 18 313 62 11.5
## 5 23 299 65 8.6
## 6 19 99 59 13.8
#觀看environmental的結構
str(environmental)
## 'data.frame': 111 obs. of 4 variables:
## $ ozone : num 41 36 12 18 23 19 8 16 11 14 ...
## $ radiation : num 190 118 149 313 299 99 19 256 290 274 ...
## $ temperature: num 67 72 74 62 65 59 61 69 66 68 ...
## $ wind : num 7.4 8 12.6 11.5 8.6 13.8 20.1 9.7 9.2 10.9 ...
#摘要environmental
summary(environmental)
## ozone radiation temperature wind
## Min. : 1.0 Min. : 7.0 Min. :57.00 Min. : 2.300
## 1st Qu.: 18.0 1st Qu.:113.5 1st Qu.:71.00 1st Qu.: 7.400
## Median : 31.0 Median :207.0 Median :79.00 Median : 9.700
## Mean : 42.1 Mean :184.8 Mean :77.79 Mean : 9.939
## 3rd Qu.: 62.0 3rd Qu.:255.5 3rd Qu.:84.50 3rd Qu.:11.500
## Max. :168.0 Max. :334.0 Max. :97.00 Max. :20.700
#ozone的分布情形
hist(environmental$ozone,main="ozone",xlab="臭氧濃度",ylab="比率",col='gray')
#radiation的分布情形
hist(environmental$radiation,main="Radiation",xlab="輻射值",ylab="比率",col='yellow')
#temperature的分布情形
hist(environmental$temperature, main="Temperature",xlab="溫度(F)",ylab="頻率",col='red')
#wind的分布情形
hist(environmental$wind, main="Wind",xlab="風速",ylab="頻率",col='blue')
#安裝moments
install.packages("moments", repos='https://cran.rstudio.com/')
## 將程式套件安載入 'C:/Users/user/Documents/R/win-library/4.1'
## (因為 'lib' 沒有被指定)
## package 'moments' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\user\AppData\Local\Temp\RtmpegFGqc\downloaded_packages
library(moments)
## Warning: 套件 'moments' 是用 R 版本 4.1.1 來建造的
#一次算出多個變項的平均數、標準差、偏態與峰度
my_summary <- function(x) {
require(moments)
funs <- c(mean, sd, skewness, kurtosis)
sapply(funs, function(f) f(x, na.rm = TRUE))
}
sapply(environmental[, c(1:4)], my_summary)
## ozone radiation temperature wind
## [1,] 42.099099 184.8018018 77.7927928 9.9387387
## [2,] 33.275969 91.1523021 9.5299691 3.5592178
## [3,] 1.248104 -0.4862466 -0.2250959 0.4537189
## [4,] 4.204408 2.0707751 2.3319550 3.2808871
#所有變項分數間的相關
round(cor(environmental), 3)
## ozone radiation temperature wind
## ozone 1.000 0.348 0.699 -0.613
## radiation 0.348 1.000 0.294 -0.127
## temperature 0.699 0.294 1.000 -0.497
## wind -0.613 -0.127 -0.497 1.000
#temperature和wind的相關係數
round(cor(environmental$temperature,environmental$wind), 3)
## [1] -0.497
#temperature和wind的散布圖
plot(environmental$temperature,environmental$wind, col=5, pch = 16, xlab = "溫度(F)", ylab = "風速")
溫度和風速呈現負相關,代表風速越大氣溫越低
#temperature和ozone和的相關係數
round(cor(environmental$temperature,environmental$ozone), 3)
## [1] 0.699
#temperature和ozone的散布圖
plot(environmental$temperature,environmental$ozone, col=5, pch = 16, xlab = "溫度(F)", ylab = "臭氧濃度")
溫度和臭氧濃度呈正相關,代表溫度越高臭氧濃度也越高
#temperature和radiation和的相關係數
round(cor(environmental$temperature,environmental$radiation), 3)
## [1] 0.294
#temperature和radiation的散布圖
plot(environmental$temperature,environmental$ozone, col=5, pch = 16, xlab = "溫度(F)", ylab = "輻射值")
溫度和輻射值呈正相關,代表溫度越輻射值也越高
#輸入資料anscombe
data(anscombe)
#檢視前6筆資料
head(anscombe)
## x1 x2 x3 x4 y1 y2 y3 y4
## 1 10 10 10 8 8.04 9.14 7.46 6.58
## 2 8 8 8 8 6.95 8.14 6.77 5.76
## 3 13 13 13 8 7.58 8.74 12.74 7.71
## 4 9 9 9 8 8.81 8.77 7.11 8.84
## 5 11 11 11 8 8.33 9.26 7.81 8.47
## 6 14 14 14 8 9.96 8.10 8.84 7.04
#觀看anscombe的結構
str(anscombe)
## 'data.frame': 11 obs. of 8 variables:
## $ x1: num 10 8 13 9 11 14 6 4 12 7 ...
## $ x2: num 10 8 13 9 11 14 6 4 12 7 ...
## $ x3: num 10 8 13 9 11 14 6 4 12 7 ...
## $ x4: num 8 8 8 8 8 8 8 19 8 8 ...
## $ y1: num 8.04 6.95 7.58 8.81 8.33 ...
## $ y2: num 9.14 8.14 8.74 8.77 9.26 8.1 6.13 3.1 9.13 7.26 ...
## $ y3: num 7.46 6.77 12.74 7.11 7.81 ...
## $ y4: num 6.58 5.76 7.71 8.84 8.47 7.04 5.25 12.5 5.56 7.91 ...
#摘要anscombe
summary(anscombe)
## x1 x2 x3 x4 y1
## Min. : 4.0 Min. : 4.0 Min. : 4.0 Min. : 8 Min. : 4.260
## 1st Qu.: 6.5 1st Qu.: 6.5 1st Qu.: 6.5 1st Qu.: 8 1st Qu.: 6.315
## Median : 9.0 Median : 9.0 Median : 9.0 Median : 8 Median : 7.580
## Mean : 9.0 Mean : 9.0 Mean : 9.0 Mean : 9 Mean : 7.501
## 3rd Qu.:11.5 3rd Qu.:11.5 3rd Qu.:11.5 3rd Qu.: 8 3rd Qu.: 8.570
## Max. :14.0 Max. :14.0 Max. :14.0 Max. :19 Max. :10.840
## y2 y3 y4
## Min. :3.100 Min. : 5.39 Min. : 5.250
## 1st Qu.:6.695 1st Qu.: 6.25 1st Qu.: 6.170
## Median :8.140 Median : 7.11 Median : 7.040
## Mean :7.501 Mean : 7.50 Mean : 7.501
## 3rd Qu.:8.950 3rd Qu.: 7.98 3rd Qu.: 8.190
## Max. :9.260 Max. :12.74 Max. :12.500
#檢視各變項的基本統計量
my_summary <- function(x) {
require(moments)
funs <- c(mean, sd, skewness, kurtosis)
sapply(funs, function(f) f(x, na.rm = TRUE))
}
sapply(anscombe[, c(1:8)], my_summary)
## x1 x2 x3 x4 y1 y2 y3
## [1,] 9.000000 9.000000 9.000000 9.000000 7.50090909 7.500909 7.500000
## [2,] 3.316625 3.316625 3.316625 3.316625 2.03156814 2.031657 2.030424
## [3,] 0.000000 0.000000 0.000000 2.846050 -0.05580807 -1.129108 1.592231
## [4,] 1.780000 1.780000 1.780000 9.100000 2.17906136 3.007674 5.130453
## y4
## [1,] 7.500909
## [2,] 2.030579
## [3,] 1.293025
## [4,] 4.390789
#所有變項分數間的相關
round(cor(anscombe), 3)
## x1 x2 x3 x4 y1 y2 y3 y4
## x1 1.000 1.000 1.000 -0.500 0.816 0.816 0.816 -0.314
## x2 1.000 1.000 1.000 -0.500 0.816 0.816 0.816 -0.314
## x3 1.000 1.000 1.000 -0.500 0.816 0.816 0.816 -0.314
## x4 -0.500 -0.500 -0.500 1.000 -0.529 -0.718 -0.345 0.817
## y1 0.816 0.816 0.816 -0.529 1.000 0.750 0.469 -0.489
## y2 0.816 0.816 0.816 -0.718 0.750 1.000 0.588 -0.478
## y3 0.816 0.816 0.816 -0.345 0.469 0.588 1.000 -0.155
## y4 -0.314 -0.314 -0.314 0.817 -0.489 -0.478 -0.155 1.000
#(x1,y1)的相關係數
round(cor(anscombe$x1,anscombe$y1), 3)
## [1] 0.816
#(x1,y1)的散布圖
plot(anscombe$x1,anscombe$y1, col=blues9, pch = 16, xlab = "x1", ylab = "y1")
x1和y1呈正相關
#(x2,y2)的相關係數
round(cor(anscombe$x2,anscombe$y2), 3)
## [1] 0.816
#(x2,y2)的散布圖
plot(anscombe$x2,anscombe$y2, col=blues9, pch = 16, xlab = "x2", ylab = "y2")
x2和y2呈正相關
#(x3,y3)的相關係數
round(cor(anscombe$x3,anscombe$y3), 3)
## [1] 0.816
#(x3,y3)的散布圖
plot(anscombe$x3,anscombe$y3, col=blues9, pch = 16, xlab = "x3", ylab = "y3")
x3和y3呈正相關
#(x4,y4)的相關係數
round(cor(anscombe$x4,anscombe$y4), 3)
## [1] 0.817
#(x4,y4)的散布圖
plot(anscombe$x4,anscombe$y4, col=blues9, pch = 16, xlab = "x4", ylab = "y4")
x4和y4呈正相關
以上4筆相關係數有3筆皆為0.816,1筆為0.817,都相當接近,有可能其資料的散布情形非常相似或一致
#兩兩變項畫散佈圖
pairs(anscombe, pch = '.', upper.panel = panel.smooth, lower.panel = NULL, col = 'black')
從圖形來看,(x1,y1)、(x2,y2)兩組不完全相同但有一點相似,和(x3,y3)、(x4,y4)兩組的散布情形則呈現極大的差異,然而相關係數卻呈現一樣或相近的數值,唯有兩兩數值呈正相關這一點是所有資料皆相同。Amazing