library(agricolae)
## Warning: 패키지 'agricolae'는 R 버전 4.3.3에서 작성되었습니다
library(readxl)
## Warning: 패키지 'readxl'는 R 버전 4.3.3에서 작성되었습니다
library(dplyr)
## Warning: 패키지 'dplyr'는 R 버전 4.3.3에서 작성되었습니다
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(moonBook)
## Warning: 패키지 'moonBook'는 R 버전 4.3.3에서 작성되었습니다
### 분산분석 (Anlysis of Variance: AOV) ###
setwd("C:/Users/Admin/Desktop/2025 용역/난지 - 제주흑돼지")
dat <- read_excel("Data.xlsx")
dat_summary <- dat %>%
group_by(Type) %>%
summarise(
n_Male = sum(Sex == 1, na.rm = TRUE),
n_Female = sum(Sex == 2, na.rm = TRUE),
Total = n_Male + n_Female,
Female_ratio = round(n_Female / Total, 3), # 암컷 비율
Male_ratio = round(n_Male / Total, 3) # 수컷 비율
) %>%
arrange(desc(Female_ratio)) # 암컷 비율 순으로 정렬
dat_summary
## # A tibble: 11 × 6
## Type n_Male n_Female Total Female_ratio Male_ratio
## <chr> <int> <int> <int> <dbl> <dbl>
## 1 3<9 1 5 6 0.833 0.167
## 2 2<0 1 2 3 0.667 0.333
## 3 4<0 1 2 3 0.667 0.333
## 4 3<1 14 19 33 0.576 0.424
## 5 3<2 22 28 50 0.56 0.44
## 6 3<7 14 16 30 0.533 0.467
## 7 3<3 35 39 74 0.527 0.473
## 8 3<4 69 68 137 0.496 0.504
## 9 3<8 15 12 27 0.444 0.556
## 10 3<5 94 60 154 0.39 0.61
## 11 3<6 60 37 97 0.381 0.619
###############################################
### 3<9 타입에서 암컷의 비율이 가장 높았다. ###
###############################################
### 1) Type에 따른 Sex 차이 분석 ###
aov_model <- aov(dat$Sex ~ dat$Type, data=dat)
summary(aov_model)
## Df Sum Sq Mean Sq F value Pr(>F)
## dat$Type 10 4.03 0.4027 1.631 0.094 .
## Residuals 603 148.88 0.2469
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
###############################################################################
### Type에 따른 성비 차이에 대한 일원분산분석 결과, ###
### 유의확률(p-value)은 0.094로 5% 유의수준에서 통계적으로 유의하지 않았다. ###
###############################################################################
### BOX plot ###
boxplot(dat$Sex ~ dat$Type)

### DUNCAN 사후 검정 ###
duncan <- duncan.test(aov_model, "dat$Type", alpha=0.05, console=TRUE)
##
## Study: aov_model ~ "dat$Type"
##
## Duncan's new multiple range test
## for dat$Sex
##
## Mean Square Error: 0.2469066
##
## dat$Type, means
##
## dat.Sex std r se Min Max Q25 Q50 Q75
## 2<0 1.666667 0.5773503 3 0.28688362 1 2 1.5 2 2
## 3<1 1.575758 0.5018904 33 0.08649867 1 2 1.0 2 2
## 3<2 1.560000 0.5014265 50 0.07027185 1 2 1.0 2 2
## 3<3 1.527027 0.5026770 74 0.05776310 1 2 1.0 2 2
## 3<4 1.496350 0.5018215 137 0.04245278 1 2 1.0 1 2
## 3<5 1.389610 0.4892530 154 0.04004110 1 2 1.0 1 2
## 3<6 1.381443 0.4882643 97 0.05045225 1 2 1.0 1 2
## 3<7 1.533333 0.5074163 30 0.09072057 1 2 1.0 2 2
## 3<8 1.444444 0.5063697 27 0.09562787 1 2 1.0 1 2
## 3<9 1.833333 0.4082483 6 0.20285735 1 2 2.0 2 2
## 4<0 1.666667 0.5773503 3 0.28688362 1 2 1.5 2 2
##
## Groups according to probability of means differences and alpha level( 0.05 )
##
## Means with the same letter are not significantly different.
##
## dat$Sex groups
## 3<9 1.833333 a
## 2<0 1.666667 a
## 4<0 1.666667 a
## 3<1 1.575758 a
## 3<2 1.560000 a
## 3<7 1.533333 a
## 3<3 1.527027 a
## 3<4 1.496350 a
## 3<8 1.444444 a
## 3<5 1.389610 a
## 3<6 1.381443 a
###############################################################################################################################
### Typte 그룹별 성비 (암컷 = 2, 수컷 = 1)의 평균 차이가 있는지 검정하기 위해 DUNCAN Method를 통한 사후 검정을 실시하였으며,###
### 사후 검정 결과, 모든 Type 수준 간 평균 성비가 통계적으로 유의한 차이를 보이지 않음 ###
###############################################################################################################################
#############################################################################################
### 분산분석 결과, Type에 따른 성별 분포에 유의한 차이는 나타나지 않았으나,
### 성별에 영향을 미치는 개별 Type 수준의 영향을 확인하고자 로지스틱 회귀 분석을 실시하였다.
#############################################################################################
### 2) Logistic Reg ###
dat <- dat %>%
mutate(Sex01 = ifelse(Sex == 1, 0,
ifelse(Sex == 2, 1, NA)))
dat$Type <- relevel(factor(dat$Type), ref = "3<9") # 기준 그룹 3<9로 설정
logit_model <- glm(Sex01 ~ Type, data = dat, family = binomial)
summary(logit_model)
##
## Call:
## glm(formula = Sex01 ~ Type, family = binomial, data = dat)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.6094 1.0954 1.469 0.1418
## Type2<0 -0.9163 1.6432 -0.558 0.5771
## Type3<1 -1.3041 1.1507 -1.133 0.2571
## Type3<2 -1.3683 1.1319 -1.209 0.2267
## Type3<3 -1.5012 1.1199 -1.340 0.1801
## Type3<4 -1.6240 1.1087 -1.465 0.1430
## Type3<5 -2.0584 1.1078 -1.858 0.0632 .
## Type3<6 -2.0929 1.1152 -1.877 0.0606 .
## Type3<7 -1.4759 1.1550 -1.278 0.2013
## Type3<8 -1.8326 1.1619 -1.577 0.1147
## Type4<0 -0.9163 1.6432 -0.558 0.5771
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 848.83 on 613 degrees of freedom
## Residual deviance: 832.35 on 603 degrees of freedom
## AIC: 854.35
##
## Number of Fisher Scoring iterations: 4
# 3) OR Function
ORtable = function(x, digits = 2) {
suppressMessages(a <- confint(x))
result = data.frame(exp(coef(x)), exp(a))
result = round(result, digits)
result = cbind(result, round(summary(x)$coefficients[, 4], 3))
colnames(result) = c("OR", "2.5%", "97.5%", "p")
return(result)
}
ORtable(logit_model)
## OR 2.5% 97.5% p
## (Intercept) 5.00 0.81 95.80 0.142
## Type2<0 0.40 0.01 13.62 0.577
## Type3<1 0.27 0.01 1.94 0.257
## Type3<2 0.25 0.01 1.73 0.227
## Type3<3 0.22 0.01 1.47 0.180
## Type3<4 0.20 0.01 1.26 0.143
## Type3<5 0.13 0.01 0.82 0.063
## Type3<6 0.12 0.01 0.80 0.061
## Type3<7 0.23 0.01 1.65 0.201
## Type3<8 0.16 0.01 1.17 0.115
## Type4<0 0.40 0.01 13.62 0.577
########################################################################################################################
### 기준 그룹은 3<9 그룹이며, OR 의 기준은 1이다.
### 95% 신뢰구간
### OR = Odds Ratio (오즈비) -> EXP(Type별 Estimate값)
### 결과 해석 예시 :: 3<1 - OR (0.27) 암컷일 가능성이 기준 그룹보다 73% 낮음
### 기준 그룹 (3<9)이 암컷 비율이 가장 높은 그룹이라는 점에서, 다른 그룹은 상대적으로 암컷일 가능성이 낮음을 보여줌
### 3<5 , 3<6 그룹은 p value 0.063, 0.061 수준
########################################################################################################################
# 4) Odds ratio Plot
odds_ratio = ORtable(logit_model)
odds_ratio = odds_ratio[2:nrow(odds_ratio),]
HRplot(odds_ratio, type=2, show.CI=TRUE, cex=1)
