#알고보면쉬운R 113p T검정
#새 변수 만들기
rm(list=ls())
getwd()
## [1] "C:/Users/cic/Downloads"
setwd("c:/data")
test<-read.csv("Data1.csv")
attach(test)
names(test)
## [1] "Q1" "Q2" "Q3" "Q4" "Q5" "Q6"
## [7] "Q7" "Q8" "Q9" "Q10" "Q11" "Q12"
## [13] "Q13" "Q14" "Q15" "Q16" "Q17" "Q18"
## [19] "Q19" "Q20" "Gender1" "EDU1" "BF" "BM"
## [25] "Happiness" "Peace"
test<-transform(test,Happiness1=(Q11+Q12+Q13+Q14+Q15)/5)
names(test)
## [1] "Q1" "Q2" "Q3" "Q4" "Q5"
## [6] "Q6" "Q7" "Q8" "Q9" "Q10"
## [11] "Q11" "Q12" "Q13" "Q14" "Q15"
## [16] "Q16" "Q17" "Q18" "Q19" "Q20"
## [21] "Gender1" "EDU1" "BF" "BM" "Happiness"
## [26] "Peace" "Happiness1"
#대립가설 : 대한민국 성인남녀 두 집단의 평균 행복지수는 같은가?
#분산동질성검증
var.test(Happiness~Gender1,data=test)#물어보지 않음
##
## F test to compare two variances
##
## data: Happiness by Gender1
## F = 0.96433, num df = 1135, denom df = 788, p-value = 0.5766
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.8473093 1.0958434
## sample estimates:
## ratio of variances
## 0.9643256
nrow(test)
## [1] 1925
#등분산성검정
#p-value=0.5715 : 등분산성검정_귀무가설을 두 집단의 등분산성이 같다. 대립가설은 두 집단의 등분산성이 같지 않다
##0.5715>유의수준 0.05보다 크므로 귀무가설을 채택한다.
#대립가설검증
t.test(Happiness~Gender1,var.equal=TRUE,data=test)
##
## Two Sample t-test
##
## data: Happiness by Gender1
## t = 1.3282, df = 1923, p-value = 0.1843
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## -0.02193698 0.11400596
## sample estimates:
## mean in group 0 mean in group 1
## 3.565933 3.519899
#Two Sample t-test : 독립표본 t.test
##t = 1.3585
##df=1923 : 자유도
##표본크기(1925-2=1923
NROW(test)#행개수
## [1] 1925
tail(test)
## Q1 Q2 Q3 Q4 Q5 Q6 Q7 Q8 Q9 Q10 Q11 Q12 Q13 Q14 Q15 Q16 Q17 Q18 Q19 Q20
## 1920 4 4 3 4 4 2 2 3 4 2 2 4 3 4 4 3 4 4 3 4
## 1921 2 2 2 1 2 2 2 2 2 2 1 3 2 1 3 2 2 2 2 2
## 1922 3 2 2 2 3 1 1 1 1 1 3 3 3 4 4 4 4 5 2 2
## 1923 5 4 4 4 4 2 2 2 2 3 3 4 3 4 3 3 3 4 4 4
## 1924 4 4 4 2 2 4 2 4 4 3 3 2 3 4 3 4 4 4 3 4
## 1925 3 3 1 1 2 1 1 1 1 1 4 4 3 2 2 3 4 4 3 2
## Gender1 EDU1 BF BM Happiness Peace Happiness1
## 1920 1 3 3.8 2.6 3.4 3.6 3.4
## 1921 1 2 1.8 2.0 2.0 2.0 2.0
## 1922 0 2 2.4 1.0 3.4 3.4 3.4
## 1923 0 2 4.2 2.2 3.4 3.6 3.4
## 1924 1 2 3.2 3.4 3.0 3.8 3.0
## 1925 0 3 2.0 1.0 3.0 3.2 3.0
#p-value = 0.1745 : 귀무가설 맞다는 가정 하에 확률
#귀무가설 : 남녀 행복차이 없다.
#대립가설 : 남녀 행복차이 있다.
#p-value = 0.1745>유의수준 0.05보다 크다.
#따라서 귀무가설을 채택하게 된다.
#alternative hypothesis : 대립가설
#95 percent confidence interval : 95% 신뢰구간
# -0.0208855 0.1150460 : 구간 내 0이 있으므로 귀무가설 해당
#구간 내 0이 없으면 대립가설
boxplot(Happiness~Gender1,data=test)
#편차 있으나 평균차이 없음
#집단에 따른 기술통계량
library(psych)
## Warning: 패키지 'psych'는 R 버전 4.2.3에서 작성되었습니다

describeBy(test$Happiness,test$Gender1)
##
## Descriptive statistics by group
## group: 0
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 1136 3.57 0.74 3.6 3.59 0.59 1.4 5 3.6 -0.38 -0.22 0.02
## ------------------------------------------------------------
## group: 1
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 789 3.52 0.76 3.6 3.54 0.59 1.4 5 3.6 -0.35 -0.18 0.03
library(psych)
attach(test)
## The following objects are masked from test (pos = 4):
##
## BF, BM, EDU1, Gender1, Happiness, Peace, Q1, Q10, Q11, Q12, Q13,
## Q14, Q15, Q16, Q17, Q18, Q19, Q2, Q20, Q3, Q4, Q5, Q6, Q7, Q8, Q9
describeBy(Happiness,Gender1)
##
## Descriptive statistics by group
## group: 0
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 1136 3.57 0.74 3.6 3.59 0.59 1.4 5 3.6 -0.38 -0.22 0.02
## ------------------------------------------------------------
## group: 1
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 789 3.52 0.76 3.6 3.54 0.59 1.4 5 3.6 -0.35 -0.18 0.03
#R고보면쉬운 117p
##짝지어진 T검증의 분석방법
###독립표본 t.test 각각 두 집단의 평균차이 검정
###짝지어진 t.test 한 집단의 전후 차이 검정
###예를 들면 다이어트 약 복용 전과 후의 체중의 차이 = 대응표본 t.test
####예제 4
before<-c(59,72,85,69,78,82,55)#전 체중
after<-c(54,65,84,63,72,83,51)#복용 후 체중
#가설검정 : 귀무가설_다이어트약의 효과가 없다(복용 전 체중-복용 후 체중=0)
# 대립가설_다이어트약의 효과가 있다.(복용 전 체중-복용 후 체중>0)
#평균차이검정
library(psych)
t.test(before,after,mu=0,alternative='greater',paired=TRUE)
##
## Paired t-test
##
## data: before and after
## t = 3.5949, df = 6, p-value = 0.005718
## alternative hypothesis: true mean difference is greater than 0
## 95 percent confidence interval:
## 1.837829 Inf
## sample estimates:
## mean difference
## 4
#Q.대응표본t.test의 결과
#mtcars data :
#단일표본 t.test : 한 개의 집단의 평균 검정
#독립표본 t.test : 독립적 2개 집단 평균 검정
#대응비표 t.test : 찍지어진 집단의 평균차이 검정
library(psych)
attach(test)
## The following objects are masked from test (pos = 3):
##
## BF, BM, EDU1, Gender1, Happiness, Happiness1, Peace, Q1, Q10, Q11,
## Q12, Q13, Q14, Q15, Q16, Q17, Q18, Q19, Q2, Q20, Q3, Q4, Q5, Q6,
## Q7, Q8, Q9
## The following objects are masked from test (pos = 5):
##
## BF, BM, EDU1, Gender1, Happiness, Peace, Q1, Q10, Q11, Q12, Q13,
## Q14, Q15, Q16, Q17, Q18, Q19, Q2, Q20, Q3, Q4, Q5, Q6, Q7, Q8, Q9
describeBy(Happiness,Gender1)
##
## Descriptive statistics by group
## group: 0
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 1136 3.57 0.74 3.6 3.59 0.59 1.4 5 3.6 -0.38 -0.22 0.02
## ------------------------------------------------------------
## group: 1
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 789 3.52 0.76 3.6 3.54 0.59 1.4 5 3.6 -0.35 -0.18 0.03
data("mtcars")
library(dplyr)
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
glimpse(mtcars)
## Rows: 32
## Columns: 11
## $ mpg <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2, 17.8,…
## $ cyl <dbl> 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 8,…
## $ disp <dbl> 160.0, 160.0, 108.0, 258.0, 360.0, 225.0, 360.0, 146.7, 140.8, 16…
## $ hp <dbl> 110, 110, 93, 110, 175, 105, 245, 62, 95, 123, 123, 180, 180, 180…
## $ drat <dbl> 3.90, 3.90, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92, 3.92,…
## $ wt <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.190, 3.150, 3.…
## $ qsec <dbl> 16.46, 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20.00, 22.90, 18…
## $ vs <dbl> 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,…
## $ am <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0,…
## $ gear <dbl> 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4, 3, 3,…
## $ carb <dbl> 4, 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1, 1, 2,…
#통계적 가설검증
#alternative(대립가설)=("less"|"greater"|"two.sided")
#t.test((데이터명)$mpg,mu=20,alternative="greater")
#One Sample t-test
mtcars %>% NROW
## [1] 32
mtcars %>% filter(mpg>=22.0)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
t.test(mtcars$mpg,mu=20,alternative="greater") #t, df, p-value 해석하는 문제 출제
##
## One Sample t-test
##
## data: mtcars$mpg
## t = 0.08506, df = 31, p-value = 0.4664
## alternative hypothesis: true mean is greater than 20
## 95 percent confidence interval:
## 18.28418 Inf
## sample estimates:
## mean of x
## 20.09062
#기술통계
##예제 1
data(iris)
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
#예제 2
summary(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
#예제 3
mean(iris$Sepal.Length)#평균
## [1] 5.843333
median(iris$Sepal.Length)#중앙값
## [1] 5.8
sd(iris$Sepal.Length)#표준편차
## [1] 0.8280661
var(iris$Sepal.Length)#분산
## [1] 0.6856935
quantile(iris$Sepal.Length,1/4)#1사분위수
## 25%
## 5.1
quantile(iris$Sepal.Length,3/4)#3사분위수
## 75%
## 6.4
max(iris$Sepal.Length)#최대값
## [1] 7.9
min(iris$Sepal.Length)#최소값
## [1] 4.3
#예제 4
library(MASS)
##
## 다음의 패키지를 부착합니다: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
data(Animals)
head(Animals)
## body brain
## Mountain beaver 1.35 8.1
## Cow 465.00 423.0
## Grey wolf 36.33 119.5
## Goat 27.66 115.0
## Guinea pig 1.04 5.5
## Dipliodocus 11700.00 50.0
quantile(Animals$body)
## 0% 25% 50% 75% 100%
## 0.023 3.100 53.830 479.000 87000.000
quantile(Animals$brain)
## 0% 25% 50% 75% 100%
## 0.400 22.225 137.000 420.000 5712.000
#회귀분석
library(ISLR)
## Warning: 패키지 'ISLR'는 R 버전 4.2.3에서 작성되었습니다
data("attitude")
glimpse(attitude)
## Rows: 30
## Columns: 7
## $ rating <dbl> 43, 63, 71, 61, 81, 43, 58, 71, 72, 67, 64, 67, 69, 68, 77,…
## $ complaints <dbl> 51, 64, 70, 63, 78, 55, 67, 75, 82, 61, 53, 60, 62, 83, 77,…
## $ privileges <dbl> 30, 51, 68, 45, 56, 49, 42, 50, 72, 45, 53, 47, 57, 83, 54,…
## $ learning <dbl> 39, 54, 69, 47, 66, 44, 56, 55, 67, 47, 58, 39, 42, 45, 72,…
## $ raises <dbl> 61, 63, 76, 54, 71, 54, 66, 70, 71, 62, 58, 59, 55, 59, 79,…
## $ critical <dbl> 92, 73, 86, 84, 83, 49, 68, 66, 83, 80, 67, 74, 63, 77, 77,…
## $ advance <dbl> 45, 47, 48, 35, 47, 34, 35, 41, 31, 41, 34, 41, 25, 35, 46,…
out<-lm(rating~.,data=attitude)
summary(out)
##
## Call:
## lm(formula = rating ~ ., data = attitude)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.9418 -4.3555 0.3158 5.5425 11.5990
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.78708 11.58926 0.931 0.361634
## complaints 0.61319 0.16098 3.809 0.000903 ***
## privileges -0.07305 0.13572 -0.538 0.595594
## learning 0.32033 0.16852 1.901 0.069925 .
## raises 0.08173 0.22148 0.369 0.715480
## critical 0.03838 0.14700 0.261 0.796334
## advance -0.21706 0.17821 -1.218 0.235577
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.068 on 23 degrees of freedom
## Multiple R-squared: 0.7326, Adjusted R-squared: 0.6628
## F-statistic: 10.5 on 6 and 23 DF, p-value: 1.24e-05
#회귀분석의 결과는 summary()로 확인한다
#lm() : 기본함수, 종속변수~,(모든 변수를 독립변수로 투입)
#out<-lm(rating~complaints+privileges,data=attitude)
#
# 0.61319 / 0.16098 : t.test
#표본크기-(독립변수의 수+1) = 자유도