#exam7)다음은 "PimaIndiansDiabetes"에서 age를 1:20~40,2:41:60,3:60세 이상으로 범주형 변수로 변환 후에
# 가장 높은 연령 범주의 발병률을 출력하시오. 단 발병률는 diabetes 중 pos수/연령 범주별*100이다.
library(mlbench)
## Warning: 패키지 'mlbench'는 R 버전 4.1.3에서 작성되었습니다
library(dplyr)
## Warning: 패키지 'dplyr'는 R 버전 4.1.3에서 작성되었습니다
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data("PimaIndiansDiabetes")
pima<-PimaIndiansDiabetes
pima %>% glimpse
## Rows: 768
## Columns: 9
## $ pregnant <dbl> 6, 1, 8, 1, 0, 5, 3, 10, 2, 8, 4, 10, 10, 1, 5, 7, 0, 7, 1, 1~
## $ glucose <dbl> 148, 85, 183, 89, 137, 116, 78, 115, 197, 125, 110, 168, 139,~
## $ pressure <dbl> 72, 66, 64, 66, 40, 74, 50, 0, 70, 96, 92, 74, 80, 60, 72, 0,~
## $ triceps <dbl> 35, 29, 0, 23, 35, 0, 32, 0, 45, 0, 0, 0, 0, 23, 19, 0, 47, 0~
## $ insulin <dbl> 0, 0, 0, 94, 168, 0, 88, 0, 543, 0, 0, 0, 0, 846, 175, 0, 230~
## $ mass <dbl> 33.6, 26.6, 23.3, 28.1, 43.1, 25.6, 31.0, 35.3, 30.5, 0.0, 37~
## $ pedigree <dbl> 0.627, 0.351, 0.672, 0.167, 2.288, 0.201, 0.248, 0.134, 0.158~
## $ age <dbl> 50, 31, 32, 21, 33, 30, 26, 29, 53, 54, 30, 34, 57, 59, 51, 3~
## $ diabetes <fct> pos, neg, pos, neg, pos, neg, pos, neg, pos, pos, neg, pos, n~
colSums(is.na(pima))
## pregnant glucose pressure triceps insulin mass pedigree age
## 0 0 0 0 0 0 0 0
## diabetes
## 0
pima %>% mutate(age_class=ifelse(age>=60,"3",
ifelse(age>=41,"2","1")))->pima1
# ifelse missing error,) error
table(pima1$age_class)
##
## 1 2 3
## 574 162 32
pima1 %>% group_by(age_class) %>% summarize(n=n(),fre=sum(diabetes=='pos')) %>%
mutate(ill_rate=fre/n*100)->df
df
## # A tibble: 3 x 4
## age_class n fre ill_rate
## <chr> <int> <int> <dbl>
## 1 1 574 166 28.9
## 2 2 162 93 57.4
## 3 3 32 9 28.1
print(df$ill_rate[2])
## [1] 57.40741