tutorial

2018年5月26日

intsvy介紹

intsvy 是由 Daniel Caro 與 Przemyslaw Biecek 針對 TIMSS、PIRLS、PISA 與 PIAAC 四個大型教育資料庫所開發的套件，具有整理數據與分析數據的功能（Caro & Biecek, 2014）

數據的選擇與合併 pisa.select.merge
次數表的計算與繪製 pisa.table
變項平均數的計算與繪製 pisa.mean
迴歸分析的計算與繪製 pisa.reg.pv

第一部分

套件安裝及資料整理

library(intsvy)

讀取資料並篩選數據

dta <- pisa.select.merge(folder = "D:/data",

student.file = "Student questionnaire data file.sav",

student = c("ST04Q01","ST28Q01","ST13Q01","ST26Q01","ST26Q02",

"ST26Q03","ST26Q04","ST26Q05","ST26Q06","ST26Q07","ST26Q08",

"ST26Q09","ST26Q10","ST26Q11","ST26Q12","ST26Q13","ST26Q14"),

countries = c("AUS","CAN","DEU","ESP","FIN","FRA","GBR","IDN",

"ITA","JPN","MEX","MYS","NZL","TAP","USA"))

檢視資料結構 (表1)

str(dta)

轉換變數之資料型態

dta$CNT <- as.factor(dta$CNT)
for (i in 137:153) {dta[,i] <- as.numeric(dta[,i])}

改變變數與變數名稱

dta$Country_ID <- dta$CNT

dta$ST04Q01 <- abs(dta$ST04Q01-1)

dta$Gender <- dta$ST04Q01

dta$Gender <- factor(dta$Gender, levels = c(0,1), labels = c("F","M"))

dta$Book <- dta$ST28Q01

dta$ST13Q01 <- abs(dta$ST13Q01-6)

dta$Mother <- dta$ST13Q01

for (i in 140:153) {dta[,i] <- abs(dta[,i]-2)}

dta$Possessions <- dta$ST26Q01+dta$ST26Q02+dta$ST26Q03+dta
$ST26Q04+dta$ST26Q05+dta$ST26Q06+dta$ST26Q07+dta$ST26Q08+dta
$ST26Q09+dta$ST26Q10+dta$ST26Q11+dta$ST26Q12+dta$ST26Q13+dta$ST26Q14

第二部分

計算臺灣與其它十四個國家男、女學生家中藏書量的前六筆次數分配 (表3)

ptableCB <- pisa.table(variable = "Book", 
by = c("Country_ID", "Gender"), data = dta)
head(ptableCB)

繪製臺灣與其它十四個國家的結果 (圖1)

plot(na.omit(ptableCB), stacked = TRUE)

以學生性別與家中藏書量計算臺灣與其它十四個國家的前六筆平均數學成績 (表4)

pmeansMCGB <- pisa.mean.pv(pvlabel = "MATH", 
by = c("Country_ID", "Gender", "Book"), 
data = dta, export = F)
head(pmeansMCGB)

繪製臺灣與其它十四個國家的結果 (圖2)

plot(na.omit(pmeansMCGB), sort = TRUE)

第三部分

以學生性別、家中藏書量、母親教育程度、家中擁有物預測臺灣與其它十四個國家數學成績的前四個國家 (表5)

dta$Gender <- as.numeric(dta$Gender)
dta$Gender <- abs(dta$Gender-1)
rmodelMGBMP <- pisa.reg.pv(pvlabel = "MATH", 
x = c("Gender","Book","Mother","Possessions"), 
by = "Country_ID", data = dta, export = FALSE)
head(rmodelMGBMP)

繪製臺灣與其它十四個國家的結果 (圖3)

plot(rmodelMGBMP, se = TRUE)

第四部分

將迴歸分析結果的資料框型態轉換成矩陣 (表6)

(替代方法) 函數 lapply() 搭配套件 dplyr

(try it) lapply(rmodelMGBMP, FUN = function(lst) lst$reg) bind_rows(): Efficiently bind multiple data frames by row

library(dplyr)
rmodelMGBMP.df <- bind_rows(lapply(rmodelMGBMP, 
FUN = function(lst) lst$reg),.id = "id")
head(rmodelMGBMP.df)

取出矩陣內臺灣與其它十四個國家的學生性別估計值、家中藏書量估計值、母親教育程度估計值與家中擁有物估計值，並將其資料框型態轉換成列表，且重新命名變項名稱

(替代方法) 函數 subset()

index.logical <- (1:nrow(rmodelMGBMP.df) %% 6) == 2
dataG <- subset(rmodelMGBMP.df, select = c("id", "Estimate"), 
subset = index.logical)
colnames(dataG) <- c("id","GenderEstimate")

index.logical <- (1:nrow(rmodelMGBMP.df) %% 6) == 3
dataB <- subset(rmodelMGBMP.df, select = c("id", "Estimate"), 
subset = index.logical)
colnames(dataB) <- c("id","BookEstimate")

index.logical <- (1:nrow(rmodelMGBMP.df) %% 6) == 4
dataM <- subset(rmodelMGBMP.df, select = c("id", "Estimate"), 
subset = index.logical)
colnames(dataM) <- c("id","MotherEstimate")

index.logical <- (1:nrow(rmodelMGBMP.df) %% 6) == 5
dataP <- subset(rmodelMGBMP.df, select = c("id", "Estimate"), 
subset = index.logical)
colnames(dataP) <- c("id","PossessionsEstimate")

合併臺灣與其它十四個國家的學生性別估計值、家中藏書量估計值、母親教育程度估計值與家中擁有物估計值，並存入新資料框

dataGB <- merge(dataG, dataB, by = "id", all = TRUE)
dataGBM <- merge(dataGB, dataM, by ="id", all = TRUE)
dataGBMP <- merge(dataGBM, dataP, by = "id", all = TRUE)

在新資料框中新增重新命名的國家變項

dataGBMP$COUNTRY<-c("Australia","Canada","Germany","Spain","Finland",
                    "France","United Kingdom","Indonesia","Italy",
                    "Japan","Mexico","Malaysia",
                    "New Zealand","Taiwan","United States")

檢視資料結構 (表7)

str(dataGBMP)

轉換變數之資料型態

dataGBMP$id <- as.factor(dataGBMP$id)
dataGBMP$COUNTRY <- as.factor(dataGBMP$COUNTRY)

再次檢視資料結構 (表8)

str(dataGBMP)

計算學生性別估計值的前面25%與後面25%的數值

quantile(dataGBMP$GenderEstimate,probs = c(0.25,0.75))

建立新增學生性別估計值程度變項

dataGBMP$Genderlevel <- with(dataGBMP, cut(GenderEstimate, 
breaks = c(-6.41,10.310,20.575,23.59), 
labels = c("後面25%","中間50%","前面25%")))

檢視新資料框內的資料 (表9)

dataGBMP

第五部分

載入套件 maptools 與 RColorBrewer

library(maptools)
library(RColorBrewer)

讀取 world.shp

world.shp <- readShapePoly("D:/world/world.shp")
str(world.shp)

檢視在 world.shp 中的國家名稱

unique(world.shp$COUNTRY)

合併 world.shp 與新資料框

rwrldGBMP <- merge(world.shp, dataGBMP, 
by.y = "COUNTRY", all.x = TRUE)

設定顏色漸層

cols <- colorRampPalette(brewer.pal(6, "Reds"))(53)

繪製臺灣與其它十四個國家的學生性別估計值 (圖4)

spplot(rwrldGBMP, "Genderlevel", col.regions = cols, 
main = "參與 PISA 2012 臺灣與其它十四個國家的學生性別估計值")

繪製臺灣與其它十四個國家的家中藏書量估計值 (圖5)

spplot(rwrldGBMP, "BookEstimate", col.regions = cols, 
main = "參與 PISA 2012 臺灣與其它十四個國家的家中藏書量估計值")

繪製臺灣與其它十四個國家的母親教育程度估計值 (圖6)

spplot(rwrldGBMP, "MotherEstimate", col.regions = cols, 
main = "參與 PISA 2012 臺灣與其它十四個國家的母親教育程度估計值")

繪製臺灣與其它十四個國家的家中擁有物估計值 (圖7)

spplot(rwrldGBMP, "PossessionsEstimate", col.regions = cols, 
main = "參與 PISA 2012 臺灣與其它十四個國家的家中擁有物估計值")