library(agricolae)
## Warning: 패키지 'agricolae'는 R 버전 4.3.3에서 작성되었습니다
library(readxl)
## Warning: 패키지 'readxl'는 R 버전 4.3.3에서 작성되었습니다
library(dplyr)
## Warning: 패키지 'dplyr'는 R 버전 4.3.3에서 작성되었습니다
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(moonBook)
## Warning: 패키지 'moonBook'는 R 버전 4.3.3에서 작성되었습니다
### 분산분석 (Anlysis of Variance: AOV) ###
setwd("C:/Users/Admin/Desktop/2025 용역/난지 - 제주흑돼지")
dat <- read_excel("Data.xlsx")
dat_summary <- dat %>%
group_by(Type) %>%
summarise(
n_Male = sum(Sex == 1, na.rm = TRUE),
n_Female = sum(Sex == 2, na.rm = TRUE),
Total = n_Male + n_Female,
Female_ratio = round(n_Female / Total, 3), # 암컷 비율
Male_ratio = round(n_Male / Total, 3) # 수컷 비율
) %>%
arrange(desc(Female_ratio)) # 암컷 비율 순으로 정렬
dat_summary
## # A tibble: 11 × 6
## Type n_Male n_Female Total Female_ratio Male_ratio
## <chr> <int> <int> <int> <dbl> <dbl>
## 1 3<9 1 5 6 0.833 0.167
## 2 2<0 1 2 3 0.667 0.333
## 3 4<0 1 2 3 0.667 0.333
## 4 3<1 14 19 33 0.576 0.424
## 5 3<2 22 28 50 0.56 0.44
## 6 3<7 14 16 30 0.533 0.467
## 7 3<3 35 39 74 0.527 0.473
## 8 3<4 69 68 137 0.496 0.504
## 9 3<8 15 12 27 0.444 0.556
## 10 3<5 94 60 154 0.39 0.61
## 11 3<6 60 37 97 0.381 0.619
###############################################
### 3<9 타입에서 암컷의 비율이 가장 높았다. ###
###############################################
### 1) Type에 따른 Sex 차이 분석 ###
aov_model <- aov(dat$Sex ~ dat$Type, data=dat)
summary(aov_model)
## Df Sum Sq Mean Sq F value Pr(>F)
## dat$Type 10 4.03 0.4027 1.631 0.094 .
## Residuals 603 148.88 0.2469
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
###############################################################################
### Type에 따른 성비 차이에 대한 일원분산분석 결과, ###
### 유의확률(p-value)은 0.094로 5% 유의수준에서 통계적으로 유의하지 않았다. ###
###############################################################################
### BOX plot ###
boxplot(dat$Sex ~ dat$Type)

### DUNCAN 사후 검정 ###
duncan <- duncan.test(aov_model, "dat$Type", alpha=0.05, console=TRUE)
##
## Study: aov_model ~ "dat$Type"
##
## Duncan's new multiple range test
## for dat$Sex
##
## Mean Square Error: 0.2469066
##
## dat$Type, means
##
## dat.Sex std r se Min Max Q25 Q50 Q75
## 2<0 1.666667 0.5773503 3 0.28688362 1 2 1.5 2 2
## 3<1 1.575758 0.5018904 33 0.08649867 1 2 1.0 2 2
## 3<2 1.560000 0.5014265 50 0.07027185 1 2 1.0 2 2
## 3<3 1.527027 0.5026770 74 0.05776310 1 2 1.0 2 2
## 3<4 1.496350 0.5018215 137 0.04245278 1 2 1.0 1 2
## 3<5 1.389610 0.4892530 154 0.04004110 1 2 1.0 1 2
## 3<6 1.381443 0.4882643 97 0.05045225 1 2 1.0 1 2
## 3<7 1.533333 0.5074163 30 0.09072057 1 2 1.0 2 2
## 3<8 1.444444 0.5063697 27 0.09562787 1 2 1.0 1 2
## 3<9 1.833333 0.4082483 6 0.20285735 1 2 2.0 2 2
## 4<0 1.666667 0.5773503 3 0.28688362 1 2 1.5 2 2
##
## Groups according to probability of means differences and alpha level( 0.05 )
##
## Means with the same letter are not significantly different.
##
## dat$Sex groups
## 3<9 1.833333 a
## 2<0 1.666667 a
## 4<0 1.666667 a
## 3<1 1.575758 a
## 3<2 1.560000 a
## 3<7 1.533333 a
## 3<3 1.527027 a
## 3<4 1.496350 a
## 3<8 1.444444 a
## 3<5 1.389610 a
## 3<6 1.381443 a
###############################################################################################################################
### Typte 그룹별 성비 (암컷 = 2, 수컷 = 1)의 평균 차이가 있는지 검정하기 위해 DUNCAN Method를 통한 사후 검정을 실시하였으며,###
### 사후 검정 결과, 모든 Type 수준 간 평균 성비가 통계적으로 유의한 차이를 보이지 않음 ###
###############################################################################################################################
#############################################################################################
### 분산분석 결과, Type에 따른 성별 분포에 유의한 차이는 나타나지 않았으나,
### 성별에 영향을 미치는 개별 Type 수준의 영향을 확인하고자 로지스틱 회귀 분석을 실시하였다.
#############################################################################################
### 2) Logistic Reg ###
dat <- dat %>%
mutate(Sex01 = ifelse(Sex == 1, 0,
ifelse(Sex == 2, 1, NA)))
logit_model <- glm(Sex01 ~ Type, data = dat, family = binomial)
summary(logit_model)
##
## Call:
## glm(formula = Sex01 ~ Type, family = binomial, data = dat)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 6.931e-01 1.225e+00 0.566 0.571
## Type3<1 -3.878e-01 1.274e+00 -0.304 0.761
## Type3<2 -4.520e-01 1.257e+00 -0.359 0.719
## Type3<3 -5.849e-01 1.247e+00 -0.469 0.639
## Type3<4 -7.077e-01 1.237e+00 -0.572 0.567
## Type3<5 -1.142e+00 1.236e+00 -0.924 0.355
## Type3<6 -1.177e+00 1.242e+00 -0.947 0.344
## Type3<7 -5.596e-01 1.278e+00 -0.438 0.662
## Type3<8 -9.163e-01 1.285e+00 -0.713 0.476
## Type3<9 9.163e-01 1.643e+00 0.558 0.577
## Type4<0 1.821e-13 1.732e+00 0.000 1.000
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 848.83 on 613 degrees of freedom
## Residual deviance: 832.35 on 603 degrees of freedom
## AIC: 854.35
##
## Number of Fisher Scoring iterations: 4
# 3) OR Function
ORtable = function(x, digits = 2) {
suppressMessages(a <- confint(x))
result = data.frame(exp(coef(x)), exp(a))
result = round(result, digits)
result = cbind(result, round(summary(x)$coefficients[, 4], 3))
colnames(result) = c("OR", "2.5%", "97.5%", "p")
return(result)
}
ORtable(logit_model)
## OR 2.5% 97.5% p
## (Intercept) 2.00 0.19 43.04 0.571
## Type3<1 0.68 0.03 7.78 0.761
## Type3<2 0.64 0.03 7.06 0.719
## Type3<3 0.56 0.03 6.06 0.639
## Type3<4 0.49 0.02 5.26 0.567
## Type3<5 0.32 0.01 3.40 0.355
## Type3<6 0.31 0.01 3.33 0.344
## Type3<7 0.57 0.02 6.60 0.662
## Type3<8 0.40 0.02 4.67 0.476
## Type3<9 2.50 0.07 90.37 0.577
## Type4<0 1.00 0.02 40.40 1.000
########################################################################################################################
### 기준 그룹은 2<0그룹이며, OR 의 기준은 1이다.
### 95% 신뢰구간
### OR = Odds Ratio (오즈비) -> EXP(Type별 Estimate값)
### 결과 해석 예시 :: 3<9 - OR (2.5)
### 3<9 그룹은 기준 그룹에 비해 암컷일 확률의 Odds가 2.5배 높다
########################################################################################################################
# 4) Odds ratio Plot
odds_ratio = ORtable(logit_model)
odds_ratio = odds_ratio[2:nrow(odds_ratio),]
HRplot(odds_ratio, type=2, show.CI=TRUE, cex=1)
