library(tidyverse) # 패키지 로딩
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
getwd() # R 에서 패키지를 로딩한다는 뜻(어디에 있는지 확인하려면 매번 해야함)
## [1] "/Users/jangsieun/Desktop/리서치(R class)"
library(carData) # BEPS가 안나올 때 쓰는 함수
dat <- BEPS
head(dat)
## vote age economic.cond.national economic.cond.household Blair
## 1 Liberal Democrat 43 3 3 4
## 2 Labour 36 4 4 4
## 3 Labour 35 4 4 5
## 4 Labour 24 4 2 2
## 5 Labour 41 2 2 1
## 6 Labour 47 3 4 4
## Hague Kennedy Europe political.knowledge gender
## 1 1 4 2 2 female
## 2 4 4 5 2 male
## 3 2 3 3 2 male
## 4 1 3 4 0 female
## 5 1 4 6 2 male
## 6 4 2 4 2 male
summary(dat)
## vote age economic.cond.national
## Conservative :462 Min. :24.00 Min. :1.000
## Labour :720 1st Qu.:41.00 1st Qu.:3.000
## Liberal Democrat:343 Median :53.00 Median :3.000
## Mean :54.18 Mean :3.246
## 3rd Qu.:67.00 3rd Qu.:4.000
## Max. :93.00 Max. :5.000
## economic.cond.household Blair Hague Kennedy
## Min. :1.00 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:3.00 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000
## Median :3.00 Median :4.000 Median :2.000 Median :3.000
## Mean :3.14 Mean :3.334 Mean :2.747 Mean :3.135
## 3rd Qu.:4.00 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :5.00 Max. :5.000 Max. :5.000 Max. :5.000
## Europe political.knowledge gender
## Min. : 1.000 Min. :0.000 female:812
## 1st Qu.: 4.000 1st Qu.:0.000 male :713
## Median : 6.000 Median :2.000
## Mean : 6.729 Mean :1.542
## 3rd Qu.:10.000 3rd Qu.:2.000
## Max. :11.000 Max. :3.000
library('gmodels')
CrossTable(dat$gender, dat$vote, chisq = T, # gender가 독립변수, vote가 종속변수을 나타냄. 카이제곱 테스트를 해주세요 라고 명령
expected = T, dnn = c("성별", "정치 타입"), # 표 cell에 기대빈도 표시 하라는 것, column 이름을 성별과 정치로 바꾸라는것
prop.r = F, prop.c = F, prop.t = F) # 행, 열, 전체 비율을 표시 하지 않도록 하는 것
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Expected N |
## | Chi-square contribution |
## |-------------------------|
##
##
## Total Observations in Table: 1525
##
##
## | 정치 타입
## 성별 | Conservative | Labour | Liberal Democrat | Row Total |
## -------------|------------------|------------------|------------------|------------------|
## female | 259 | 372 | 181 | 812 |
## | 245.996 | 383.370 | 182.633 | |
## | 0.687 | 0.337 | 0.015 | |
## -------------|------------------|------------------|------------------|------------------|
## male | 203 | 348 | 162 | 713 |
## | 216.004 | 336.630 | 160.367 | |
## | 0.783 | 0.384 | 0.017 | |
## -------------|------------------|------------------|------------------|------------------|
## Column Total | 462 | 720 | 343 | 1525 |
## -------------|------------------|------------------|------------------|------------------|
##
##
## Statistics for All Table Factors
##
##
## Pearson's Chi-squared test
## ------------------------------------------------------------
## Chi^2 = 2.22284 d.f. = 2 p = 0.3290914
##
##
##
library(ggplot2)
boxplot_age <- ggplot(dat, aes(x = factor(gender), y = age, fill = factor(vote))) +
geom_boxplot(outliers = F) +
labs(x = "gender", y = "age", fill = "vote") +
scale_fill_discrete(labels = c("Liberal Democrat", "Labour", "Conservative"))
boxplot_age
library(dplyr)
summary_data <- dat %>%
group_by(gender, vote) %>% summarise(count=n(), .groups = 'drop') %>% # gender와 vote의 비율을 각각 구하기 위해 group_by 사용, group을 따로따로 묶어서 퍼센트 계산하려고 group = 'drop' 사용
mutate(percentage = count/sum(count)*100)
summary_data
## # A tibble: 6 × 4
## gender vote count percentage
## <fct> <fct> <int> <dbl>
## 1 female Conservative 259 17.0
## 2 female Labour 372 24.4
## 3 female Liberal Democrat 181 11.9
## 4 male Conservative 203 13.3
## 5 male Labour 348 22.8
## 6 male Liberal Democrat 162 10.6
ggplot(summary_data, aes(x = factor(gender), y = percentage, fill = factor(vote))) + # 비율을 나타낸 표를 가지고 그래프 그리기, 모든 변수들을 캐릭터로 변환
geom_bar(stat = 'identity', position = 'dodge', alpha = 0.75) + # 막대그래프가 서로 겹치지 않게 하기 위해 dodge 입력
labs(x = "Gender", y = "Percentage(%)", fill = "Vote Types") + # 축이름 나타내기
scale_fill_discrete(labels = c("Liberal Democrat", "Labour", "Conservative"))
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.