250604

# 2025-06-04

# ✅ 1. 상관계수 계산
# 데이터 불러오기
data(mtcars)
str(mtcars)

## 'data.frame':    32 obs. of  11 variables:
##  $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ cyl : num  6 6 4 6 8 6 8 4 4 6 ...
##  $ disp: num  160 160 108 258 360 ...
##  $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
##  $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
##  $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
##  $ qsec: num  16.5 17 18.6 19.4 17 ...
##  $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
##  $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
##  $ carb: num  4 4 1 1 2 1 4 2 2 4 ...

# 피어슨 상관계수 작성할 수 있음
# 상관계수 계산 (피어슨 상관계수)
cor_matrix <- cor(mtcars)
print(cor_matrix)

##             mpg        cyl       disp         hp        drat         wt
## mpg   1.0000000 -0.8521620 -0.8475514 -0.7761684  0.68117191 -0.8676594
## cyl  -0.8521620  1.0000000  0.9020329  0.8324475 -0.69993811  0.7824958
## disp -0.8475514  0.9020329  1.0000000  0.7909486 -0.71021393  0.8879799
## hp   -0.7761684  0.8324475  0.7909486  1.0000000 -0.44875912  0.6587479
## drat  0.6811719 -0.6999381 -0.7102139 -0.4487591  1.00000000 -0.7124406
## wt   -0.8676594  0.7824958  0.8879799  0.6587479 -0.71244065  1.0000000
## qsec  0.4186840 -0.5912421 -0.4336979 -0.7082234  0.09120476 -0.1747159
## vs    0.6640389 -0.8108118 -0.7104159 -0.7230967  0.44027846 -0.5549157
## am    0.5998324 -0.5226070 -0.5912270 -0.2432043  0.71271113 -0.6924953
## gear  0.4802848 -0.4926866 -0.5555692 -0.1257043  0.69961013 -0.5832870
## carb -0.5509251  0.5269883  0.3949769  0.7498125 -0.09078980  0.4276059
##             qsec         vs          am       gear        carb
## mpg   0.41868403  0.6640389  0.59983243  0.4802848 -0.55092507
## cyl  -0.59124207 -0.8108118 -0.52260705 -0.4926866  0.52698829
## disp -0.43369788 -0.7104159 -0.59122704 -0.5555692  0.39497686
## hp   -0.70822339 -0.7230967 -0.24320426 -0.1257043  0.74981247
## drat  0.09120476  0.4402785  0.71271113  0.6996101 -0.09078980
## wt   -0.17471588 -0.5549157 -0.69249526 -0.5832870  0.42760594
## qsec  1.00000000  0.7445354 -0.22986086 -0.2126822 -0.65624923
## vs    0.74453544  1.0000000  0.16834512  0.2060233 -0.56960714
## am   -0.22986086  0.1683451  1.00000000  0.7940588  0.05753435
## gear -0.21268223  0.2060233  0.79405876  1.0000000  0.27407284
## carb -0.65624923 -0.5696071  0.05753435  0.2740728  1.00000000

# ✅ 2. 특정 변수(wt)와의 상관관계만 보기
cor(mtcars$wt, mtcars$mpg)

## [1] -0.8676594

# ✅  3.  스피어만 상관계수 사용 (순위 기반)
data(attitude)
head(attitude)

##   rating complaints privileges learning raises critical advance
## 1     43         51         30       39     61       92      45
## 2     63         64         51       54     63       73      47
## 3     71         70         68       69     76       86      48
## 4     61         63         45       47     54       84      35
## 5     81         78         56       66     71       83      47
## 6     43         55         49       44     54       49      34

str(attitude)

## 'data.frame':    30 obs. of  7 variables:
##  $ rating    : num  43 63 71 61 81 43 58 71 72 67 ...
##  $ complaints: num  51 64 70 63 78 55 67 75 82 61 ...
##  $ privileges: num  30 51 68 45 56 49 42 50 72 45 ...
##  $ learning  : num  39 54 69 47 66 44 56 55 67 47 ...
##  $ raises    : num  61 63 76 54 71 54 66 70 71 62 ...
##  $ critical  : num  92 73 86 84 83 49 68 66 83 80 ...
##  $ advance   : num  45 47 48 35 47 34 35 41 31 41 ...

# 서열척도란?
# **순위(rank)**는 있지만, 값 간 간격이 일정하지 않은 척도를 의미
# 서열척도 기반 스피어만 상관계수 계산
cor(attitude, method = "spearman")

##                rating complaints privileges  learning    raises   critical
## rating     1.00000000  0.8322006  0.4842879 0.6172702 0.5981496 0.04820357
## complaints 0.83220056  1.0000000  0.5250139 0.5801025 0.6466786 0.11246246
## privileges 0.48428794  0.5250139  1.0000000 0.5088579 0.4553562 0.11437189
## learning   0.61727020  0.5801025  0.5088579 1.0000000 0.6213777 0.12897481
## raises     0.59814963  0.6466786  0.4553562 0.6213777 1.0000000 0.28675090
## critical   0.04820357  0.1124625  0.1143719 0.1289748 0.2867509 1.00000000
## advance    0.20294560  0.2241189  0.3377220 0.5392687 0.4893975 0.25516702
##              advance
## rating     0.2029456
## complaints 0.2241189
## privileges 0.3377220
## learning   0.5392687
## raises     0.4893975
## critical   0.2551670
## advance    1.0000000

out <- lm(rating~.,data= attitude)
summary(out)

## 
## Call:
## lm(formula = rating ~ ., data = attitude)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.9418  -4.3555   0.3158   5.5425  11.5990 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 10.78708   11.58926   0.931 0.361634    
## complaints   0.61319    0.16098   3.809 0.000903 ***
## privileges  -0.07305    0.13572  -0.538 0.595594    
## learning     0.32033    0.16852   1.901 0.069925 .  
## raises       0.08173    0.22148   0.369 0.715480    
## critical     0.03838    0.14700   0.261 0.796334    
## advance     -0.21706    0.17821  -1.218 0.235577    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.068 on 23 degrees of freedom
## Multiple R-squared:  0.7326, Adjusted R-squared:  0.6628 
## F-statistic:  10.5 on 6 and 23 DF,  p-value: 1.24e-05

library(MASS)
step_model <- step(lm(rating ~ 1, data = attitude), 
                   scope = ~ complaints + privileges + learning + raises + critical + advance,
                   direction = "forward")

## Start:  AIC=150.93
## rating ~ 1
## 
##              Df Sum of Sq    RSS    AIC
## + complaints  1   2927.58 1369.4 118.63
## + learning    1   1671.41 2625.6 138.16
## + raises      1   1496.48 2800.5 140.09
## + privileges  1    780.22 3516.7 146.92
## <none>                    4297.0 150.93
## + critical    1    105.16 4191.8 152.19
## + advance     1    103.35 4193.6 152.20
## 
## Step:  AIC=118.63
## rating ~ complaints
## 
##              Df Sum of Sq    RSS    AIC
## + learning    1   114.733 1254.7 118.00
## <none>                    1369.4 118.63
## + raises      1    11.102 1358.3 120.38
## + privileges  1     7.519 1361.9 120.46
## + advance     1     4.151 1365.2 120.54
## + critical    1     0.010 1369.4 120.63
## 
## Step:  AIC=118
## rating ~ complaints + learning
## 
##              Df Sum of Sq    RSS    AIC
## <none>                    1254.7 118.00
## + advance     1    75.540 1179.1 118.14
## + privileges  1    30.033 1224.6 119.28
## + raises      1     1.188 1253.5 119.97
## + critical    1     0.002 1254.7 120.00

# forward 전진선택법

out1<-lm(rating ~ complaints + learning,data=attitude)
summary(out1)

## 
## Call:
## lm(formula = rating ~ complaints + learning, data = attitude)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.5568  -5.7331   0.6701   6.5341  10.3610 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   9.8709     7.0612   1.398    0.174    
## complaints    0.6435     0.1185   5.432 9.57e-06 ***
## learning      0.2112     0.1344   1.571    0.128    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.817 on 27 degrees of freedom
## Multiple R-squared:  0.708,  Adjusted R-squared:  0.6864 
## F-statistic: 32.74 on 2 and 27 DF,  p-value: 6.058e-08

# ✅ 예시 1: Pearson 상관계수 유의성 검정
data(mtcars)
cor.test(mtcars$mpg, mtcars$hp)

## 
##  Pearson's product-moment correlation
## 
## data:  mtcars$mpg and mtcars$hp
## t = -6.7424, df = 30, p-value = 1.788e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.8852686 -0.5860994
## sample estimates:
##        cor 
## -0.7761684

#✅ 귀무가설 (H₀)
#두 변수 간에 상관관계가 없다.
#즉, 상관계수 ρ = 0

#✅ 대립가설 (H₁)
#두 변수 간에 상관관계가 있다.
#즉, 상관계수 ρ ≠ 0 (양 또는 음의 상관 존재)


# 다차원척도법
# 1. 커피 브랜드 간 거리행렬 (가상의 인지 거리)
coffee_dist <- matrix(c(
  0, 2, 4, 5, 3, 6, 7,
  2, 0, 3, 4, 2, 5, 6,
  4, 3, 0, 2, 4, 3, 5,
  5, 4, 2, 0, 3, 2, 4,
  3, 2, 4, 3, 0, 5, 6,
  6, 5, 3, 2, 5, 0, 3,
  7, 6, 5, 4, 6, 3, 0
), nrow = 7, byrow = TRUE)

# 2. 브랜드 이름 지정
brand_names <- c("Starbucks", "Ediya", "The Venti", "Paik's Coffee", "Hollys", "Twosome Place", "Mega Coffee")
rownames(coffee_dist) <- colnames(coffee_dist) <- brand_names

# 3. 거리 객체 변환
coffee_dist <- as.dist(coffee_dist)

# 4. 고전적 MDS 수행
mds_result <- cmdscale(coffee_dist, k = 2)

# 5. 결과 시각화
plot(mds_result, type = "n", main = "커피 브랜드 다차원척도법 (MDS)", xlab = "Dimension 1", ylab = "Dimension 2")
text(mds_result, labels = brand_names, cex = 1.1, col = "brown")

250604_r

정용하

2025-06-04