# 2025-06-04
# ✅ 1. 상관계수 계산
# 데이터 불러오기
data(mtcars)
str(mtcars)
## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
# 피어슨 상관계수 작성할 수 있음
# 상관계수 계산 (피어슨 상관계수)
cor_matrix <- cor(mtcars)
print(cor_matrix)
## mpg cyl disp hp drat wt
## mpg 1.0000000 -0.8521620 -0.8475514 -0.7761684 0.68117191 -0.8676594
## cyl -0.8521620 1.0000000 0.9020329 0.8324475 -0.69993811 0.7824958
## disp -0.8475514 0.9020329 1.0000000 0.7909486 -0.71021393 0.8879799
## hp -0.7761684 0.8324475 0.7909486 1.0000000 -0.44875912 0.6587479
## drat 0.6811719 -0.6999381 -0.7102139 -0.4487591 1.00000000 -0.7124406
## wt -0.8676594 0.7824958 0.8879799 0.6587479 -0.71244065 1.0000000
## qsec 0.4186840 -0.5912421 -0.4336979 -0.7082234 0.09120476 -0.1747159
## vs 0.6640389 -0.8108118 -0.7104159 -0.7230967 0.44027846 -0.5549157
## am 0.5998324 -0.5226070 -0.5912270 -0.2432043 0.71271113 -0.6924953
## gear 0.4802848 -0.4926866 -0.5555692 -0.1257043 0.69961013 -0.5832870
## carb -0.5509251 0.5269883 0.3949769 0.7498125 -0.09078980 0.4276059
## qsec vs am gear carb
## mpg 0.41868403 0.6640389 0.59983243 0.4802848 -0.55092507
## cyl -0.59124207 -0.8108118 -0.52260705 -0.4926866 0.52698829
## disp -0.43369788 -0.7104159 -0.59122704 -0.5555692 0.39497686
## hp -0.70822339 -0.7230967 -0.24320426 -0.1257043 0.74981247
## drat 0.09120476 0.4402785 0.71271113 0.6996101 -0.09078980
## wt -0.17471588 -0.5549157 -0.69249526 -0.5832870 0.42760594
## qsec 1.00000000 0.7445354 -0.22986086 -0.2126822 -0.65624923
## vs 0.74453544 1.0000000 0.16834512 0.2060233 -0.56960714
## am -0.22986086 0.1683451 1.00000000 0.7940588 0.05753435
## gear -0.21268223 0.2060233 0.79405876 1.0000000 0.27407284
## carb -0.65624923 -0.5696071 0.05753435 0.2740728 1.00000000
# ✅ 2. 특정 변수(wt)와의 상관관계만 보기
cor(mtcars$wt, mtcars$mpg)
## [1] -0.8676594
# ✅ 3. 스피어만 상관계수 사용 (순위 기반)
data(attitude)
head(attitude)
## rating complaints privileges learning raises critical advance
## 1 43 51 30 39 61 92 45
## 2 63 64 51 54 63 73 47
## 3 71 70 68 69 76 86 48
## 4 61 63 45 47 54 84 35
## 5 81 78 56 66 71 83 47
## 6 43 55 49 44 54 49 34
str(attitude)
## 'data.frame': 30 obs. of 7 variables:
## $ rating : num 43 63 71 61 81 43 58 71 72 67 ...
## $ complaints: num 51 64 70 63 78 55 67 75 82 61 ...
## $ privileges: num 30 51 68 45 56 49 42 50 72 45 ...
## $ learning : num 39 54 69 47 66 44 56 55 67 47 ...
## $ raises : num 61 63 76 54 71 54 66 70 71 62 ...
## $ critical : num 92 73 86 84 83 49 68 66 83 80 ...
## $ advance : num 45 47 48 35 47 34 35 41 31 41 ...
# 서열척도란?
# **순위(rank)**는 있지만, 값 간 간격이 일정하지 않은 척도를 의미
# 서열척도 기반 스피어만 상관계수 계산
cor(attitude, method = "spearman")
## rating complaints privileges learning raises critical
## rating 1.00000000 0.8322006 0.4842879 0.6172702 0.5981496 0.04820357
## complaints 0.83220056 1.0000000 0.5250139 0.5801025 0.6466786 0.11246246
## privileges 0.48428794 0.5250139 1.0000000 0.5088579 0.4553562 0.11437189
## learning 0.61727020 0.5801025 0.5088579 1.0000000 0.6213777 0.12897481
## raises 0.59814963 0.6466786 0.4553562 0.6213777 1.0000000 0.28675090
## critical 0.04820357 0.1124625 0.1143719 0.1289748 0.2867509 1.00000000
## advance 0.20294560 0.2241189 0.3377220 0.5392687 0.4893975 0.25516702
## advance
## rating 0.2029456
## complaints 0.2241189
## privileges 0.3377220
## learning 0.5392687
## raises 0.4893975
## critical 0.2551670
## advance 1.0000000
out <- lm(rating~.,data= attitude)
summary(out)
##
## Call:
## lm(formula = rating ~ ., data = attitude)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.9418 -4.3555 0.3158 5.5425 11.5990
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.78708 11.58926 0.931 0.361634
## complaints 0.61319 0.16098 3.809 0.000903 ***
## privileges -0.07305 0.13572 -0.538 0.595594
## learning 0.32033 0.16852 1.901 0.069925 .
## raises 0.08173 0.22148 0.369 0.715480
## critical 0.03838 0.14700 0.261 0.796334
## advance -0.21706 0.17821 -1.218 0.235577
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.068 on 23 degrees of freedom
## Multiple R-squared: 0.7326, Adjusted R-squared: 0.6628
## F-statistic: 10.5 on 6 and 23 DF, p-value: 1.24e-05
library(MASS)
step_model <- step(lm(rating ~ 1, data = attitude),
scope = ~ complaints + privileges + learning + raises + critical + advance,
direction = "forward")
## Start: AIC=150.93
## rating ~ 1
##
## Df Sum of Sq RSS AIC
## + complaints 1 2927.58 1369.4 118.63
## + learning 1 1671.41 2625.6 138.16
## + raises 1 1496.48 2800.5 140.09
## + privileges 1 780.22 3516.7 146.92
## <none> 4297.0 150.93
## + critical 1 105.16 4191.8 152.19
## + advance 1 103.35 4193.6 152.20
##
## Step: AIC=118.63
## rating ~ complaints
##
## Df Sum of Sq RSS AIC
## + learning 1 114.733 1254.7 118.00
## <none> 1369.4 118.63
## + raises 1 11.102 1358.3 120.38
## + privileges 1 7.519 1361.9 120.46
## + advance 1 4.151 1365.2 120.54
## + critical 1 0.010 1369.4 120.63
##
## Step: AIC=118
## rating ~ complaints + learning
##
## Df Sum of Sq RSS AIC
## <none> 1254.7 118.00
## + advance 1 75.540 1179.1 118.14
## + privileges 1 30.033 1224.6 119.28
## + raises 1 1.188 1253.5 119.97
## + critical 1 0.002 1254.7 120.00
# forward 전진선택법
out1<-lm(rating ~ complaints + learning,data=attitude)
summary(out1)
##
## Call:
## lm(formula = rating ~ complaints + learning, data = attitude)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.5568 -5.7331 0.6701 6.5341 10.3610
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.8709 7.0612 1.398 0.174
## complaints 0.6435 0.1185 5.432 9.57e-06 ***
## learning 0.2112 0.1344 1.571 0.128
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.817 on 27 degrees of freedom
## Multiple R-squared: 0.708, Adjusted R-squared: 0.6864
## F-statistic: 32.74 on 2 and 27 DF, p-value: 6.058e-08
# ✅ 예시 1: Pearson 상관계수 유의성 검정
data(mtcars)
cor.test(mtcars$mpg, mtcars$hp)
##
## Pearson's product-moment correlation
##
## data: mtcars$mpg and mtcars$hp
## t = -6.7424, df = 30, p-value = 1.788e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.8852686 -0.5860994
## sample estimates:
## cor
## -0.7761684
#✅ 귀무가설 (H₀)
#두 변수 간에 상관관계가 없다.
#즉, 상관계수 ρ = 0
#✅ 대립가설 (H₁)
#두 변수 간에 상관관계가 있다.
#즉, 상관계수 ρ ≠ 0 (양 또는 음의 상관 존재)
# 다차원척도법
# 1. 커피 브랜드 간 거리행렬 (가상의 인지 거리)
coffee_dist <- matrix(c(
0, 2, 4, 5, 3, 6, 7,
2, 0, 3, 4, 2, 5, 6,
4, 3, 0, 2, 4, 3, 5,
5, 4, 2, 0, 3, 2, 4,
3, 2, 4, 3, 0, 5, 6,
6, 5, 3, 2, 5, 0, 3,
7, 6, 5, 4, 6, 3, 0
), nrow = 7, byrow = TRUE)
# 2. 브랜드 이름 지정
brand_names <- c("Starbucks", "Ediya", "The Venti", "Paik's Coffee", "Hollys", "Twosome Place", "Mega Coffee")
rownames(coffee_dist) <- colnames(coffee_dist) <- brand_names
# 3. 거리 객체 변환
coffee_dist <- as.dist(coffee_dist)
# 4. 고전적 MDS 수행
mds_result <- cmdscale(coffee_dist, k = 2)
# 5. 결과 시각화
plot(mds_result, type = "n", main = "커피 브랜드 다차원척도법 (MDS)", xlab = "Dimension 1", ylab = "Dimension 2")
text(mds_result, labels = brand_names, cex = 1.1, col = "brown")
