gni <- import("../dataclass/gnipc_update.xlsx") %>%
select(-SeriesName, -CountryName, -SeriesCode)%>%
rename("scode"= CountryCode) %>%
filter(!is.na(scode))%>%
melt(id=c("scode")) %>%
rename(c("gnipc"=value, "year"=variable))
# Gross National Income per Capita 人均國民生產總值
deflator <- import("../dataclass/deflator.xlsx", col_names = T) %>%
select(-`Series Name`, -`Country Name`, -`Series Code`) %>%
rename("scode"=`Country Code`) %>%
filter(scode != "") %>%
melt(id=c("scode")) %>%
rename(c("deflator"=value, "year"=variable))
#deflator GDP平减指數
edu <- import("../dataclass/edu.xlsx", col_names = T) %>%
select(-`Series Name`, -`Country Name`, -`Series Code`) %>%
rename("scode"=`Country Code`) %>%
filter(scode != "") %>%
melt(id=c("scode")) %>%
rename(c("edu_gdp"=value, "year"=variable))
regime <- import("../dataclass/p5v2018.xls") %>%
select(scode,country,year,polity2) %>% #polity2測民主指數(10到-10),為了要方便跑線性回歸
mutate(democracy=ifelse(polity2>=7, "democracy","non-democ") ) %>%
filter(year>=1961)
mydata <- gni %>%
merge(edu, by = c("scode", "year")) %>%
merge(deflator, by = c("scode", "year")) %>%
merge(regime, by = c("scode", "year")) %>%
mutate(gnipc_r = gnipc/deflator) %>%
#計算實質人均所得qnipc_r=gnipc人均國民生產總值/deflator GDP平減指數
mutate(developed=ifelse(gnipc_r<=17000, "開發中","已開發")) %>% #2021年的兩萬美金以2015年來說只有一萬七
mutate(deve_factor = recode_factor(
developed, "開發中" = 0, "已開發" = 1)) %>% #開發中給0,已開發給1
relocate(country, scode, year)
## 1. 請用中文``逐#’’說明指令的用途(沒有#的就不用回答了)
auth <- mydata %>% #將經過下面流程的data命名為auth
filter(polity2<7, year==2018) %>% #篩選mydata中polity2欄位小於7且year欄位等於2018的資料
select(edu_gdp)%>% #選取mydata經過上述條件篩選後的edu_gdp值
filter(!is.na(edu_gdp))
demo <- mydata %>%
filter(polity2>=7, year==2018) %>% #2018時非威權國家
select(edu_gdp) %>%
filter(!is.na(edu_gdp))
t.test(auth,demo)
##
## Welch Two Sample t-test
##
## data: auth and demo
## t = -0.98045, df = 47.199, p-value = 0.3319
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.251632 0.431328
## sample estimates:
## mean of x mean of y
## 4.187756 4.597908
提示: (1) 注意變數資料格式為何?應該使用哪種方法? (2) 是否使用了正確的人均所得資料?
#變數:edu_gdp(連續)、polity2(連續)
auth <- mydata %>%
filter(polity2<7, year==2018) %>% #2018時威權國家
select(edu_gdp)%>%
filter(!is.na(edu_gdp))
demo <- mydata %>%
filter(polity2>=7, year==2018) %>% #2018時非威權國家
select(edu_gdp) %>%
filter(!is.na(edu_gdp))
t.test(auth,demo)
##
## Welch Two Sample t-test
##
## data: auth and demo
## t = -0.98045, df = 47.199, p-value = 0.3319
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.251632 0.431328
## sample estimates:
## mean of x mean of y
## 4.187756 4.597908
# p-value = 0.3319 ,不具顯著性,2018年民主國家與威權國家間在教育的投資上較無差別
#信賴區間(95 percent confidence interval):-1.251632 0.431328,信賴區間介於-1至0.43間
#以平均來說民主國家較威權國家多投資,但差距僅0.41
提示:(1) 請將民主與非民主國家的教育支出以視覺化方式呈現 (2) 試試看bar chart (3) 記得要有路人可以理解的標題和軸線名稱
mydata %>%
filter(!is.na(democracy))%>%
group_by(democracy)%>%
ggplot(aes(democracy, edu_gdp))+
geom_bar(stat="identity",
width = 0.5,
fill="antiquewhite3") +
labs(title="民主國家與非民主主國家教育支出總數") +
theme(axis.text.x = element_text(angle=65, vjust=0.6))