Gerekli kütüphaneleri yükleme
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(openintro)
## Loading required package: airports
## Loading required package: cherryblossom
## Loading required package: usdata
data(ucb_admit) #veri seti
ucb_admit %>% count(gender)
## gender n
## 1 female 1835
## 2 male 2691
ucb_admit %>% count(dept)
## dept n
## 1 A 933
## 2 B 585
## 3 C 918
## 4 D 792
## 5 E 584
## 6 F 714
ucb_admit %>% count(admit)
## admit n
## 1 admitted 1755
## 2 rejected 2771
ucb_admit %>% count(gender, admit)
## gender admit n
## 1 female admitted 557
## 2 female rejected 1278
## 3 male admitted 1198
## 4 male rejected 1493
cinsiyet ve kabul çapraz dağılımına oran ekleyelim
ucb_admit %>%
count(gender, admit) %>%
group_by(gender) %>%
mutate(prop_admit = n/sum(n))
## # A tibble: 4 × 4
## # Groups: gender [2]
## gender admit n prop_admit
## <chr> <chr> <int> <dbl>
## 1 female admitted 557 0.304
## 2 female rejected 1278 0.696
## 3 male admitted 1198 0.445
## 4 male rejected 1493 0.555
kabul oranlarını grafikte verelim frekans olarak
library(ggplot2)
ggplot(ucb_admit, aes(x = gender, fill = admit)) +
geom_bar()
ggplot(ucb_admit, aes(x = gender, fill = admit)) +
geom_bar(position = "fill")
bölümlere göre dağılım
ucb_admit %>% count(dept, gender, admit)
## dept gender admit n
## 1 A female admitted 89
## 2 A female rejected 19
## 3 A male admitted 512
## 4 A male rejected 313
## 5 B female admitted 17
## 6 B female rejected 8
## 7 B male admitted 353
## 8 B male rejected 207
## 9 C female admitted 202
## 10 C female rejected 391
## 11 C male admitted 120
## 12 C male rejected 205
## 13 D female admitted 131
## 14 D female rejected 244
## 15 D male admitted 138
## 16 D male rejected 279
## 17 E female admitted 94
## 18 E female rejected 299
## 19 E male admitted 53
## 20 E male rejected 138
## 21 F female admitted 24
## 22 F female rejected 317
## 23 F male admitted 22
## 24 F male rejected 351
uzun tabloları genis tabloya donusturme
library(tidyr)
ucb_admit %>% count(dept, gender, admit) %>% pivot_wider(names_from = dept, values_from = n)
## # A tibble: 4 × 8
## gender admit A B C D E F
## <chr> <chr> <int> <int> <int> <int> <int> <int>
## 1 female admitted 89 17 202 131 94 24
## 2 female rejected 19 8 391 244 299 317
## 3 male admitted 512 353 120 138 53 22
## 4 male rejected 313 207 205 279 138 351
grafik
ggplot(ucb_admit, aes(y=gender, fill = admit)) +
geom_bar(position = "fill") +
facet_wrap(.~dept) +
scale_x_continuous(labels= scales:: label_percent()) +
theme(legend.position = "bottom") + #left right top
labs(x= "yuzde")
grafigi tablo yapma
ucb_admit %>%
count(dept, gender, admit) %>%
group_by(dept, gender) %>% mutate(toplam = sum(n), oran = round(n/toplam, 3)) %>%
filter(admit == "admitted") %>%
rename(bolum=dept, cinsiyet=gender, kabul=admit)
## # A tibble: 12 × 6
## # Groups: bolum, cinsiyet [12]
## bolum cinsiyet kabul n toplam oran
## <chr> <chr> <chr> <int> <int> <dbl>
## 1 A female admitted 89 108 0.824
## 2 A male admitted 512 825 0.621
## 3 B female admitted 17 25 0.68
## 4 B male admitted 353 560 0.63
## 5 C female admitted 202 593 0.341
## 6 C male admitted 120 325 0.369
## 7 D female admitted 131 375 0.349
## 8 D male admitted 138 417 0.331
## 9 E female admitted 94 393 0.239
## 10 E male admitted 53 191 0.277
## 11 F female admitted 24 341 0.07
## 12 F male admitted 22 373 0.059
adlandırma yapma
ucb_admit_2 <- ucb_admit %>%
mutate( cinsiyet = case_when(
gender == "female" ~ "kadin",
gender == "male" ~ "erkek"),
kabul_durum = case_when(
admit == "admitted" ~ "kabul",
.default = "ret"
)) %>%
select( -admit, -gender) %>%
rename(bolum = dept) %>%
count ( bolum , cinsiyet , kabul_durum) %>%
group_by(bolum, cinsiyet) %>%
mutate( toplam = sum(n), oran = round(n/toplam,3)) %>%
filter(kabul_durum == "kabul")