行列轉換

gni <- import("../dataclass/gnipc_update.xlsx") %>% 
  select(-SeriesName, -CountryName, -SeriesCode)%>% 
  rename("scode"= CountryCode) %>%  
  filter(!is.na(scode))%>% 
  melt(id=c("scode")) %>% 
  rename(c("gnipc"=value, "year"=variable))
# Gross National Income per Capita 人均國民生產總值

deflator <- import("../dataclass/deflator.xlsx", col_names = T) %>% 
  select(-`Series Name`, -`Country Name`, -`Series Code`) %>% 
  rename("scode"=`Country Code`) %>%  
  filter(scode != "") %>% 
  melt(id=c("scode")) %>% 
  rename(c("deflator"=value, "year"=variable))
#deflator GDP平减指數

edu <- import("../dataclass/edu.xlsx", col_names = T) %>% 
  select(-`Series Name`, -`Country Name`, -`Series Code`) %>% 
  rename("scode"=`Country Code`) %>%  
  filter(scode != "") %>% 
  melt(id=c("scode")) %>% 
  rename(c("edu_gdp"=value, "year"=variable))

regime <- import("../dataclass/p5v2018.xls") %>% 
  select(scode,country,year,polity2) %>% #polity2測民主指數(10到-10),為了要方便跑線性回歸
  mutate(democracy=ifelse(polity2>=7, "democracy","non-democ") ) %>%
  filter(year>=1961)

資料合併(merge)

mydata <- gni %>% 
  merge(edu, by = c("scode", "year")) %>% 
  merge(deflator, by = c("scode", "year")) %>% 
  merge(regime, by = c("scode", "year")) %>% 
  mutate(gnipc_r = gnipc/deflator) %>% 
  #計算實質人均所得qnipc_r=gnipc人均國民生產總值/deflator GDP平減指數
  mutate(developed=ifelse(gnipc_r<=17000, "開發中","已開發")) %>% #2021年的兩萬美金以2015年來說只有一萬七
  mutate(deve_factor = recode_factor(
    developed, "開發中" = 0, "已開發" = 1)) %>%  #開發中給0,已開發給1
  relocate(country, scode, year)

作業六

## 1. 請用中文``逐#’’說明指令的用途(沒有#的就不用回答了)

auth <- mydata %>% #將經過下面流程的data命名為auth
  filter(polity2<7, year==2018) %>%  #篩選mydata中polity2欄位小於7且year欄位等於2018的資料
  select(edu_gdp)%>% #選取mydata經過上述條件篩選後的edu_gdp值
  filter(!is.na(edu_gdp))


demo <- mydata %>% 
  filter(polity2>=7, year==2018) %>% #2018時非威權國家
  select(edu_gdp) %>% 
  filter(!is.na(edu_gdp))

t.test(auth,demo)
## 
##  Welch Two Sample t-test
## 
## data:  auth and demo
## t = -0.98045, df = 47.199, p-value = 0.3319
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.251632  0.431328
## sample estimates:
## mean of x mean of y 
##  4.187756  4.597908

2. 以2018年來說,民主的國家是否投入較多資源在教育(edu_gdp)之上?

提示: (1) 注意變數資料格式為何?應該使用哪種方法? (2) 是否使用了正確的人均所得資料?

#變數:edu_gdp(連續)、polity2(連續)

auth <- mydata %>% 
  filter(polity2<7, year==2018) %>% #2018時威權國家
  select(edu_gdp)%>%
  filter(!is.na(edu_gdp))


demo <- mydata %>% 
  filter(polity2>=7, year==2018) %>% #2018時非威權國家
  select(edu_gdp) %>% 
  filter(!is.na(edu_gdp))

t.test(auth,demo)
## 
##  Welch Two Sample t-test
## 
## data:  auth and demo
## t = -0.98045, df = 47.199, p-value = 0.3319
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.251632  0.431328
## sample estimates:
## mean of x mean of y 
##  4.187756  4.597908
# p-value = 0.3319 ,不具顯著性,2018年民主國家與威權國家間在教育的投資上較無差別
#信賴區間(95 percent confidence interval):-1.251632  0.431328,信賴區間介於-1至0.43間
#以平均來說民主國家較威權國家多投資,但差距僅0.41

3. 請以圖形輔助回答第二題

提示:(1) 請將民主與非民主國家的教育支出以視覺化方式呈現 (2) 試試看bar chart (3) 記得要有路人可以理解的標題和軸線名稱

mydata %>% 
  filter(!is.na(democracy))%>%
  group_by(democracy)%>%
  ggplot(aes(democracy, edu_gdp))+
  geom_bar(stat="identity",
             width = 0.5,
             fill="antiquewhite3") + 
  labs(title="民主國家與非民主主國家教育支出總數") +
  theme(axis.text.x = element_text(angle=65, vjust=0.6))