##換成中文語系

Sys.setlocale(category = "LC_ALL", locale = "cht")
## [1] "LC_COLLATE=Chinese (Traditional)_Taiwan.950;LC_CTYPE=Chinese (Traditional)_Taiwan.950;LC_MONETARY=Chinese (Traditional)_Taiwan.950;LC_NUMERIC=C;LC_TIME=Chinese (Traditional)_Taiwan.950"
library("faraway")
## Warning: 套件 'faraway' 是用 R 版本 4.1.3 來建造的
library(lattice)
## 
## 載入套件:'lattice'
## 下列物件被遮斷自 'package:faraway':
## 
##     melanoma

##看看資料的屬性

str(insulgas)
## 'data.frame':    44 obs. of  3 variables:
##  $ Insulate: Factor w/ 2 levels "After","Before": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Temp    : num  -0.8 -0.7 0.4 2.5 2.9 3.2 3.6 3.9 4.2 4.3 ...
##  $ Gas     : num  7.2 6.9 6.4 6 5.8 5.8 5.6 4.7 5.8 5.2 ...

##看看前六筆資料

head(insulgas)
##   Insulate Temp Gas
## 1   Before -0.8 7.2
## 2   Before -0.7 6.9
## 3   Before  0.4 6.4
## 4   Before  2.5 6.0
## 5   Before  2.9 5.8
## 6   Before  3.2 5.8

#看看資料基本統計

summary(insulgas)
##    Insulate       Temp             Gas       
##  After :18   Min.   :-0.800   Min.   :2.600  
##  Before:26   1st Qu.: 2.500   1st Qu.:3.675  
##              Median : 4.200   Median :4.150  
##              Mean   : 4.311   Mean   :4.398  
##              3rd Qu.: 6.225   3rd Qu.:4.825  
##              Max.   :10.200   Max.   :7.200

類別變項的描述統計

Frequencies: The number of observations for a particular category

Proportions: The percent that each category accounts for out of the whole

#Frequencies

table(insulgas$Insulate)
## 
##  After Before 
##     18     26

#Proportions

prop.table(table(insulgas$Insulate))
## 
##     After    Before 
## 0.4090909 0.5909091

練續變項與類別變項

#裝牆前後的房子氣溫平均與數標差

aggregate(Temp ~ Insulate, data = insulgas, FUN = mean)
##   Insulate     Temp
## 1    After 2.811111
## 2   Before 5.350000
aggregate(Temp ~ Insulate, data = insulgas, FUN = sd)
##   Insulate     Temp
## 1    After 1.620780
## 2   Before 2.872804

兩個類別

#看看裝牆前後跟氣溫間的關係

densityplot(~ Temp, groups = Insulate, data = insulgas, xlab = "溫度", lty = c(1,2), plot.points = F, type = "g", main = "裝牆(前 = 虛線, 後 = 實線)")

boxplot(Temp ~ Insulate, insulgas, col = "pink", border = "blue")

qq(Insulate ~ Temp, data = insulgas, type = c('p', 'g'), pch = 2, aspect = 1, xlab = '溫度 (裝牆前)', ylab = '溫度 (裝牆後)')

超過兩個類別

#裝牆前後的房子氣溫平均與數標差

aggregate(Temp ~ Insulate, data = insulgas, FUN = mean)
##   Insulate     Temp
## 1    After 2.811111
## 2   Before 5.350000
aggregate(Temp ~ Insulate, data = insulgas, FUN = sd)
##   Insulate     Temp
## 1    After 1.620780
## 2   Before 2.872804

##多個類別

#看看裝牆前後的房子氣溫直方圖

histogram(~ Temp | Insulate, data = insulgas, xlab = '溫度', ylab='機率',type = 'density', layout = c(2, 1))

boxplot(Temp ~ Insulate, insulgas, xlab = "有無裝牆", ylab = "溫度", frame = F, col = c("red", "green"))

#裝牆前後的房子氣溫平均與數標差

aggregate(Temp ~ Insulate, data = insulgas, FUN = mean)
##   Insulate     Temp
## 1    After 2.811111
## 2   Before 5.350000
aggregate(Temp ~ Insulate, data = insulgas, FUN = sd)
##   Insulate     Temp
## 1    After 1.620780
## 2   Before 2.872804

#平均數標準誤

aggregate(Temp ~ Insulate, data = insulgas, function(x) sd(x)/sqrt(length(x)))
##   Insulate      Temp
## 1    After 0.3820215
## 2   Before 0.5634031
plot(insulgas$Temp, insulgas$Gas, col=blues9, pch = 16, xlab = "溫度", ylab = "天然氣用量")

#看看裝牆前後的房子,氣溫與天然氣用量間的關係是否類似

xyplot(Temp ~ Gas |  Insulate, data = insulgas, xlab = '溫度', ylab = '天然氣用量',type = c("g", "p", "r"), cex = 0.1, layout = c(2, 1))

從上面的統計資料可以看出,在加裝了中空牆面之後,房子的表面溫度與氣體消耗量都較加裝前少很多。且下方T檢定的結果顯示,以氣溫來說,在加裝了中空牆面之後,氣溫有顯著降低。

類別變項-t檢定和ANOVA

以t檢定比較有無電腦的學生數學差異

#預設為Welch

t.test(Temp ~ Insulate, data = insulgas)
## 
##  Welch Two Sample t-test
## 
## data:  Temp by Insulate
## t = -3.7298, df = 40.64, p-value = 0.0005856
## alternative hypothesis: true difference in means between group After and group Before is not equal to 0
## 95 percent confidence interval:
##  -3.913977 -1.163801
## sample estimates:
##  mean in group After mean in group Before 
##             2.811111             5.350000

#這才是一般假設變異數相同的t檢定

t.test(Temp ~ Insulate, data = insulgas, var.equal = T)
## 
##  Two Sample t-test
## 
## data:  Temp by Insulate
## t = -3.3872, df = 42, p-value = 0.001544
## alternative hypothesis: true difference in means between group After and group Before is not equal to 0
## 95 percent confidence interval:
##  -4.051542 -1.026235
## sample estimates:
##  mean in group After mean in group Before 
##             2.811111             5.350000

ANOVA

aov(Temp ~ Insulate, data = insulgas)
## Call:
##    aov(formula = Temp ~ Insulate, data = insulgas)
## 
## Terms:
##                  Insulate Residuals
## Sum of Squares   68.56154 250.98278
## Deg. of Freedom         1        42
## 
## Residual standard error: 2.444541
## Estimated effects may be unbalanced
summary(aov(Temp ~ Insulate, insulgas))
##             Df Sum Sq Mean Sq F value  Pr(>F)   
## Insulate     1  68.56   68.56   11.47 0.00154 **
## Residuals   42 250.98    5.98                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1