library(agricolae)
## Warning: 패키지 'agricolae'는 R 버전 4.3.3에서 작성되었습니다
library(readxl)
## Warning: 패키지 'readxl'는 R 버전 4.3.3에서 작성되었습니다
library(dplyr)
## Warning: 패키지 'dplyr'는 R 버전 4.3.3에서 작성되었습니다
## 
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(moonBook)
## Warning: 패키지 'moonBook'는 R 버전 4.3.3에서 작성되었습니다
### 분산분석 (Anlysis of Variance: AOV) ###

setwd("C:/Users/Admin/Desktop/2025 용역/난지 - 제주흑돼지")
dat <- read_excel("Data.xlsx")
dat_summary <- dat %>%
  group_by(Type) %>%
  summarise(
    n_Male = sum(Sex == 1, na.rm = TRUE),
    n_Female = sum(Sex == 2, na.rm = TRUE),
    Total = n_Male + n_Female,
    Female_ratio = round(n_Female / Total, 3),  # 암컷 비율
    Male_ratio = round(n_Male / Total, 3)       # 수컷 비율
  ) %>%
  arrange(desc(Female_ratio))  # 암컷 비율 순으로 정렬
dat_summary
## # A tibble: 11 × 6
##    Type  n_Male n_Female Total Female_ratio Male_ratio
##    <chr>  <int>    <int> <int>        <dbl>      <dbl>
##  1 3<9        1        5     6        0.833      0.167
##  2 2<0        1        2     3        0.667      0.333
##  3 4<0        1        2     3        0.667      0.333
##  4 3<1       14       19    33        0.576      0.424
##  5 3<2       22       28    50        0.56       0.44 
##  6 3<7       14       16    30        0.533      0.467
##  7 3<3       35       39    74        0.527      0.473
##  8 3<4       69       68   137        0.496      0.504
##  9 3<8       15       12    27        0.444      0.556
## 10 3<5       94       60   154        0.39       0.61 
## 11 3<6       60       37    97        0.381      0.619
###############################################
### 3<9 타입에서 암컷의 비율이 가장 높았다. ###
###############################################
### 1) Type에 따른 Sex 차이 분석 ###
aov_model <- aov(dat$Sex ~ dat$Type, data=dat)
summary(aov_model)
##              Df Sum Sq Mean Sq F value Pr(>F)  
## dat$Type     10   4.03  0.4027   1.631  0.094 .
## Residuals   603 148.88  0.2469                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
###############################################################################
### Type에 따른 성비 차이에 대한 일원분산분석 결과,                         ###
### 유의확률(p-value)은 0.094로 5% 유의수준에서 통계적으로 유의하지 않았다. ###
###############################################################################
### BOX plot ###
boxplot(dat$Sex ~ dat$Type)

### DUNCAN 사후 검정 ###
duncan <- duncan.test(aov_model, "dat$Type", alpha=0.05, console=TRUE)
## 
## Study: aov_model ~ "dat$Type"
## 
## Duncan's new multiple range test
## for dat$Sex 
## 
## Mean Square Error:  0.2469066 
## 
## dat$Type,  means
## 
##      dat.Sex       std   r         se Min Max Q25 Q50 Q75
## 2<0 1.666667 0.5773503   3 0.28688362   1   2 1.5   2   2
## 3<1 1.575758 0.5018904  33 0.08649867   1   2 1.0   2   2
## 3<2 1.560000 0.5014265  50 0.07027185   1   2 1.0   2   2
## 3<3 1.527027 0.5026770  74 0.05776310   1   2 1.0   2   2
## 3<4 1.496350 0.5018215 137 0.04245278   1   2 1.0   1   2
## 3<5 1.389610 0.4892530 154 0.04004110   1   2 1.0   1   2
## 3<6 1.381443 0.4882643  97 0.05045225   1   2 1.0   1   2
## 3<7 1.533333 0.5074163  30 0.09072057   1   2 1.0   2   2
## 3<8 1.444444 0.5063697  27 0.09562787   1   2 1.0   1   2
## 3<9 1.833333 0.4082483   6 0.20285735   1   2 2.0   2   2
## 4<0 1.666667 0.5773503   3 0.28688362   1   2 1.5   2   2
## 
## Groups according to probability of means differences and alpha level( 0.05 )
## 
## Means with the same letter are not significantly different.
## 
##      dat$Sex groups
## 3<9 1.833333      a
## 2<0 1.666667      a
## 4<0 1.666667      a
## 3<1 1.575758      a
## 3<2 1.560000      a
## 3<7 1.533333      a
## 3<3 1.527027      a
## 3<4 1.496350      a
## 3<8 1.444444      a
## 3<5 1.389610      a
## 3<6 1.381443      a
###############################################################################################################################
### Typte 그룹별 성비 (암컷 = 2, 수컷 = 1)의 평균 차이가 있는지 검정하기 위해 DUNCAN Method를 통한 사후 검정을 실시하였으며,###
### 사후 검정 결과, 모든 Type 수준 간 평균 성비가 통계적으로 유의한 차이를 보이지 않음                                      ###
###############################################################################################################################
#############################################################################################
### 분산분석 결과, Type에 따른 성별 분포에 유의한 차이는 나타나지 않았으나, 
### 성별에 영향을 미치는 개별 Type 수준의 영향을 확인하고자 로지스틱 회귀 분석을 실시하였다.
#############################################################################################

### 2) Logistic Reg ###
dat <- dat %>%
  mutate(Sex01 = ifelse(Sex == 1, 0, 
                        ifelse(Sex == 2, 1, NA)))

dat$Type <- relevel(factor(dat$Type), ref = "3<9") # 기준 그룹 3<9로 설정
logit_model <- glm(Sex01 ~ Type, data = dat, family = binomial)
summary(logit_model)
## 
## Call:
## glm(formula = Sex01 ~ Type, family = binomial, data = dat)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)   1.6094     1.0954   1.469   0.1418  
## Type2<0      -0.9163     1.6432  -0.558   0.5771  
## Type3<1      -1.3041     1.1507  -1.133   0.2571  
## Type3<2      -1.3683     1.1319  -1.209   0.2267  
## Type3<3      -1.5012     1.1199  -1.340   0.1801  
## Type3<4      -1.6240     1.1087  -1.465   0.1430  
## Type3<5      -2.0584     1.1078  -1.858   0.0632 .
## Type3<6      -2.0929     1.1152  -1.877   0.0606 .
## Type3<7      -1.4759     1.1550  -1.278   0.2013  
## Type3<8      -1.8326     1.1619  -1.577   0.1147  
## Type4<0      -0.9163     1.6432  -0.558   0.5771  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 848.83  on 613  degrees of freedom
## Residual deviance: 832.35  on 603  degrees of freedom
## AIC: 854.35
## 
## Number of Fisher Scoring iterations: 4
# 3) OR Function
ORtable = function(x, digits = 2) {
  suppressMessages(a <- confint(x))  
  result = data.frame(exp(coef(x)), exp(a))  
  result = round(result, digits)
  result = cbind(result, round(summary(x)$coefficients[, 4], 3))
  colnames(result) = c("OR", "2.5%", "97.5%", "p")
  return(result)
}

ORtable(logit_model)
##               OR 2.5% 97.5%     p
## (Intercept) 5.00 0.81 95.80 0.142
## Type2<0     0.40 0.01 13.62 0.577
## Type3<1     0.27 0.01  1.94 0.257
## Type3<2     0.25 0.01  1.73 0.227
## Type3<3     0.22 0.01  1.47 0.180
## Type3<4     0.20 0.01  1.26 0.143
## Type3<5     0.13 0.01  0.82 0.063
## Type3<6     0.12 0.01  0.80 0.061
## Type3<7     0.23 0.01  1.65 0.201
## Type3<8     0.16 0.01  1.17 0.115
## Type4<0     0.40 0.01 13.62 0.577
########################################################################################################################
### 기준 그룹은 3<9 그룹이며, OR 의 기준은 1이다.
### 95% 신뢰구간
### OR = Odds Ratio (오즈비) -> EXP(Type별 Estimate값)
### 결과 해석 예시 :: 3<1 - OR (0.27) 암컷일 가능성이 기준 그룹보다 73% 낮음
### 기준 그룹 (3<9)이 암컷 비율이 가장 높은 그룹이라는 점에서, 다른 그룹은 상대적으로 암컷일 가능성이 낮음을 보여줌
### 3<5 , 3<6 그룹은 p value 0.063, 0.061 수준
########################################################################################################################
# 4) Odds ratio Plot
odds_ratio = ORtable(logit_model)
odds_ratio = odds_ratio[2:nrow(odds_ratio),]
HRplot(odds_ratio, type=2, show.CI=TRUE, cex=1)