1 Research hypothesis

1.閱讀的類型與成績相關為何？

2.使用何種方式閱讀（紙本or數位）與成績相關為何?

3.花多久時間閱讀與成績相關為何？

2 load packages

library('lattice')
library('pastecs')
library('ggplot2')

3 input data

#讀取資料
dta <- read.csv("C:/Users/pc/Desktop/Rfolder/pisa2018m.csv", h=T)

4 Data Management

#檢視資料
head(dta) #6 rows

##   CNT CNTRYID ST004D01T ST150Q01IA ST150Q02IA ST150Q03IA ST150Q04HA ST168Q01HA
## 1 JPN     392         2          4          4          4          3          2
## 2 JPN     392         2          3          2          3          2          2
## 3 JPN     392         1          2          4          2          1          2
## 4 JPN     392         1          1          1          4          1          4
## 5 JPN     392         1          1          4          1          1          2
## 6 JPN     392         2          1          2          1          1          2
##   ST175Q01IA PV1READ.1 PV2READ.1 PV3READ.1 PV4READ.1 PV5READ.1 PV6READ.1
## 1          1   704.541   718.389   664.275   635.257   699.314   603.372
## 2          2   569.687   595.746   547.363   586.503   634.028   569.807
## 3          5   647.678   588.173   665.623   612.453   569.214   633.645
## 4          3   672.170   677.166   694.125   663.460   734.162   678.477
## 5          3   671.836   705.063   676.206   669.906   680.843   685.230
## 6          2   770.257   613.520   667.713   712.632   717.189   695.723
##   PV7READ.1 PV8READ.1 PV9READ.1 PV10READ.1 Gender read_AVG
## 1   645.901   647.710   672.825    664.549      1 665.6133
## 2   628.912   573.683   584.897    594.917      1 588.5543
## 3   608.496   657.709   610.045    612.545      0 620.5581
## 4   634.430   703.867   658.512    711.202      0 682.7571
## 5   650.126   669.240   727.488    638.997      0 677.4935
## 6   639.970   694.663   637.939    705.787      1 685.5393

summary(dta)

##      CNT               CNTRYID        ST004D01T       ST150Q01IA 
##  Length:20002       Min.   :158.0   Min.   :1.000   Min.   :1.0  
##  Class :character   1st Qu.:158.0   1st Qu.:1.000   1st Qu.:1.0  
##  Mode  :character   Median :392.0   Median :2.000   Median :2.0  
##                     Mean   :313.2   Mean   :1.503   Mean   :2.4  
##                     3rd Qu.:410.0   3rd Qu.:2.000   3rd Qu.:3.0  
##                     Max.   :410.0   Max.   :2.000   Max.   :4.0  
##                                                     NA's   :256  
##    ST150Q02IA      ST150Q03IA      ST150Q04HA      ST168Q01HA   
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:2.000  
##  Median :3.000   Median :2.000   Median :2.000   Median :2.000  
##  Mean   :2.565   Mean   :2.395   Mean   :2.195   Mean   :2.412  
##  3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:3.000  
##  Max.   :4.000   Max.   :4.000   Max.   :4.000   Max.   :4.000  
##  NA's   :233     NA's   :258     NA's   :272     NA's   :239    
##    ST175Q01IA      PV1READ.1        PV2READ.1       PV3READ.1    
##  Min.   :1.000   Min.   : 89.43   Min.   :124.6   Min.   :130.6  
##  1st Qu.:1.000   1st Qu.:438.96   1st Qu.:438.7   1st Qu.:439.5  
##  Median :2.000   Median :510.78   Median :510.5   Median :511.0  
##  Mean   :2.409   Mean   :505.22   Mean   :505.3   Mean   :505.7  
##  3rd Qu.:3.000   3rd Qu.:576.88   3rd Qu.:576.8   3rd Qu.:576.8  
##  Max.   :5.000   Max.   :835.34   Max.   :815.6   Max.   :861.6  
##  NA's   :201                                                     
##    PV4READ.1       PV5READ.1       PV6READ.1       PV7READ.1    
##  Min.   :115.3   Min.   :148.1   Min.   :121.2   Min.   :141.0  
##  1st Qu.:439.9   1st Qu.:438.7   1st Qu.:439.2   1st Qu.:439.9  
##  Median :511.3   Median :512.1   Median :510.9   Median :511.7  
##  Mean   :505.9   Mean   :505.8   Mean   :505.5   Mean   :506.1  
##  3rd Qu.:577.2   3rd Qu.:576.6   3rd Qu.:576.8   3rd Qu.:577.6  
##  Max.   :863.6   Max.   :816.3   Max.   :833.2   Max.   :842.0  
##                                                                 
##    PV8READ.1        PV9READ.1        PV10READ.1        Gender      
##  Min.   : 96.43   Min.   : 92.92   Min.   :141.4   Min.   :0.0000  
##  1st Qu.:438.97   1st Qu.:439.14   1st Qu.:438.4   1st Qu.:0.0000  
##  Median :510.92   Median :510.59   Median :511.2   Median :1.0000  
##  Mean   :505.64   Mean   :505.47   Mean   :505.2   Mean   :0.5033  
##  3rd Qu.:577.89   3rd Qu.:576.72   3rd Qu.:577.1   3rd Qu.:1.0000  
##  Max.   :814.14   Max.   :841.22   Max.   :806.5   Max.   :1.0000  
##                                                                    
##     read_AVG    
##  Min.   :136.5  
##  1st Qu.:441.7  
##  Median :512.5  
##  Mean   :505.6  
##  3rd Qu.:576.1  
##  Max.   :785.0  
##

5 初步分析與研究背景

# 統計閱讀時間的出現次數
times.df <- data.frame(table(dta$ST175Q01IA))
names(times.df)<-c("ST175Q01IA", "Freq")
head(times.df)

##   ST175Q01IA Freq
## 1          1 6792
## 2          2 4465
## 3          3 4010
## 4          4 2712
## 5          5 1822

# 國家的出現次數
CNT.df <- data.frame(table(dta$CNT))
names(CNT.df)<-c("CNT", "Freq")
head(CNT.df)

##   CNT Freq
## 1 JPN 6109
## 2 KOR 6650
## 3 TAP 7243

#顯示日本、韓國、台灣參與測驗人數

ggplot(dta, aes(x = CNT, fill = ST175Q01IA)) + 
  geom_bar(position = "stack")

6 Data visualisation

#多類別變數(不同國家在閱讀時間上的差異)
ggplot(data = dta, aes(x = ST175Q01IA, fill = CNT)) + 
     geom_bar() +
xlab('閱讀時間') +
   ylab('次數') +
   ggtitle('各國閱讀時間差異')

## Warning: Removed 201 rows containing non-finite values (stat_count).

由圖可知，日本學生花費較少時間閱讀(選擇1的次數最多，選擇5的次數最少)
韓國學生花費在閱讀的時間之情形與日本差不多(選擇1的次數最多，選擇5的次數最少)
然而，台灣的分布較為平均，雖然閱讀時間少的比率仍高，但閱讀時間多的比率相較於日韓屬高。
因此，以下擬探究在閱讀方面的得分是否與閱讀時間相關。

ggplot(data = dta, aes(x=ST175Q01IA))+
  geom_smooth(aes(y=read_AVG), method = 'lm')+
  geom_point(aes(y=read_AVG))+
xlab('閱讀時間') +
   ylab('閱讀得分') +
   ggtitle('閱讀時間與閱讀成績')

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 201 rows containing non-finite values (stat_smooth).

## Warning: Removed 201 rows containing missing values (geom_point).

分析這三個國家在閱讀時間與閱讀得分是否有相關。
根據圖示，僅可得知趨勢線為低度正相關，為進一步了解其分布情形，故以下圖表示。

ggplot(data = dta, aes(x = ST175Q01IA, y = read_AVG)) + 
  geom_jitter(size = 5, alpha = 0.3) +   geom_smooth(aes(y=read_AVG), method = 'lm')+
  geom_rug(col = "steelblue", alpha = 0.1, size = 1.5)+
xlab('閱讀時間') +
   ylab('閱讀得分') +
   ggtitle('閱讀時間與閱讀成績')

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 201 rows containing non-finite values (stat_smooth).

## Warning: Removed 201 rows containing missing values (geom_point).

cor.test(dta$read_AVG,dta$ST175Q01IA)

## 
##  Pearson's product-moment correlation
## 
## data:  dta$read_AVG and dta$ST175Q01IA
## t = 34.524, df = 19799, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2251111 0.2513869
## sample estimates:
##       cor 
## 0.2382926

根據圖示，閱讀時間較少的學生獲得高分的個數略少於其他組別(閱讀時間)，但差別不大。
然而，閱讀時間較少的學生獲得低分的次數卻明顯多餘閱讀時間較多的學生。
由此可知，閱讀時間正向影響閱讀成績。
尤其，以各個國家為分組來個別探討國內情形時，其高低分差別會更明顯，如下圖示。

6.1 不同國家差異，閱讀時間與閱讀成績是否相關?

ggplot(data = dta, aes(x = ST175Q01IA, y = read_AVG)) + facet_grid(.~CNT) + 
  geom_jitter(size = 5, alpha = 0.3) +   geom_smooth(aes(y=read_AVG), method = 'lm')+
  geom_rug(col = "steelblue", alpha = 0.1, size = 1.5)+
xlab('閱讀時間') +
   ylab('閱讀得分') +
   ggtitle('閱讀時間與閱讀成績')

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 201 rows containing non-finite values (stat_smooth).

## Warning: Removed 201 rows containing missing values (geom_point).

ggplot(data = dta, aes(x = CNT, y = read_AVG)) + 
  geom_jitter(size = 5, alpha = 0.3) +  geom_smooth(aes(y=read_AVG), method = 'lm')+
  geom_rug(col = "steelblue", alpha = 0.1, size = 1.5)+ geom_boxplot(col = "steelblue", alpha = 0.1, size = 1.5)+
xlab('國家') +
   ylab('閱讀得分') +
   ggtitle('國家閱讀成績差異')

## `geom_smooth()` using formula 'y ~ x'

特別的是，這三個國家的學生閱讀成績卻大致相同，因此推得國家差異對於學生閱讀成績解釋量小，較無直接相關(如上圖示)

6.2 性別差異與閱讀成績是否相關?

ggplot(data = dta, aes(x = ST175Q01IA, y = read_AVG)) + facet_grid(.~Gender) + 
  geom_jitter(size = 5, alpha = 0.3) +   geom_smooth(aes(y=read_AVG), method = 'lm')+
  geom_rug(col = "steelblue", alpha = 0.1, size = 1.5)+
xlab('閱讀時間') +
   ylab('閱讀得分') +
   ggtitle('閱讀時間與閱讀成績')

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 201 rows containing non-finite values (stat_smooth).

## Warning: Removed 201 rows containing missing values (geom_point).

性別間無明顯相關

7 閱讀的類型與成績相關為何？

本研究以文章或資料的四種類型作為預測成績之假設因素，分別做回歸分析。
四種類型分別為包含:

1.圖表或地圖的文章
2.小說（例如：長篇小說、短篇故事）
3.包含表格或統計圖的文章
4.包含網站連結的數位文章

7.1 閱讀圖表或地圖文章之頻率對閱讀得分部分

ggplot(data = dta, aes(x=ST150Q01IA))+
  geom_smooth(aes(y=read_AVG), method = 'lm')+
  geom_point(aes(y=read_AVG))+ 
xlab('閱讀圖表或地圖文章之頻率') +
   ylab('閱讀得分') +
   ggtitle('閱讀類型與閱讀成績')

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 256 rows containing non-finite values (stat_smooth).

## Warning: Removed 256 rows containing missing values (geom_point).

ggplot(data = dta, aes(x=ST150Q01IA))+
  geom_smooth(aes(y=read_AVG), method = 'lm')+
  geom_boxplot(aes(y=read_AVG))+ facet_grid(.~ST150Q01IA)+
xlab('閱讀圖表或地圖文章之頻率') +
   ylab('閱讀得分') +
   ggtitle('閱讀類型與閱讀成績')

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 256 rows containing non-finite values (stat_smooth).

## Warning: Continuous x aesthetic -- did you forget aes(group=...)?

## Warning: Removed 256 rows containing missing values (stat_boxplot).

cor.test(dta$read_AVG,dta$ST150Q01IA)

## 
##  Pearson's product-moment correlation
## 
## data:  dta$read_AVG and dta$ST150Q01IA
## t = 27.141, df = 19744, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1761696 0.2030625
## sample estimates:
##       cor 
## 0.1896516

model001 <- lm(formula= read_AVG ~ ST150Q01IA, data=dta)
summary(model001)

## 
## Call:
## lm(formula = read_AVG ~ ST150Q01IA, data = dta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -382.70  -62.17    6.43   68.60  284.84 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 467.8176     1.5806  295.97   <2e-16 ***
## ST150Q01IA   16.1569     0.5953   27.14   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 95.05 on 19744 degrees of freedom
##   (因為不存在，256 個觀察量被刪除了)
## Multiple R-squared:  0.03597,    Adjusted R-squared:  0.03592 
## F-statistic: 736.6 on 1 and 19744 DF,  p-value: < 2.2e-16

7.2 閱讀小說長文之頻率對閱讀得分部分

ggplot(data = dta, aes(x=ST150Q02IA))+
  geom_smooth(aes(y=read_AVG), method = 'lm')+
  geom_point(aes(y=read_AVG))+ 
xlab('閱讀小說長文之頻率') +
   ylab('閱讀得分') +
   ggtitle('閱讀類型與閱讀成績')

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 233 rows containing non-finite values (stat_smooth).

## Warning: Removed 233 rows containing missing values (geom_point).

ggplot(data = dta, aes(x=ST150Q02IA))+
  geom_smooth(aes(y=read_AVG), method = 'lm')+
  geom_boxplot(aes(y=read_AVG))+ facet_grid(.~ST150Q02IA)+
xlab('閱讀小說長文之頻率') +
   ylab('閱讀得分') +
   ggtitle('閱讀類型與閱讀成績')

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 233 rows containing non-finite values (stat_smooth).

## Warning: Continuous x aesthetic -- did you forget aes(group=...)?

## Warning: Removed 233 rows containing missing values (stat_boxplot).

cor.test(dta$read_AVG,dta$ST150Q02IA)

## 
##  Pearson's product-moment correlation
## 
## data:  dta$read_AVG and dta$ST150Q02IA
## t = 27.456, df = 19767, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1782029 0.2050588
## sample estimates:
##       cor 
## 0.1916667

model002 <- lm(formula= read_AVG ~ ST150Q02IA, data=dta)
summary(model002)

## 
## Call:
## lm(formula = read_AVG ~ ST150Q02IA, data = dta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -381.30  -62.44    5.86   67.74  280.18 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 462.7831     1.7315  267.27   <2e-16 ***
## ST150Q02IA   17.0643     0.6215   27.46   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 95 on 19767 degrees of freedom
##   (因為不存在，233 個觀察量被刪除了)
## Multiple R-squared:  0.03674,    Adjusted R-squared:  0.03669 
## F-statistic: 753.9 on 1 and 19767 DF,  p-value: < 2.2e-16

7.3 閱讀統計圖類型之頻率對閱讀得分部分

ggplot(data = dta, aes(x=ST150Q03IA))+
  geom_smooth(aes(y=read_AVG), method = 'lm')+
  geom_point(aes(y=read_AVG))+ 
xlab('閱讀統計圖類型之頻率') +
   ylab('閱讀得分') +
   ggtitle('閱讀類型與閱讀成績')

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 258 rows containing non-finite values (stat_smooth).

## Warning: Removed 258 rows containing missing values (geom_point).

ggplot(data = dta, aes(x=ST150Q03IA))+
  geom_smooth(aes(y=read_AVG), method = 'lm')+
  geom_boxplot(aes(y=read_AVG))+ facet_grid(.~ST150Q03IA)+
xlab('閱讀統計圖類型之頻率') +
   ylab('閱讀得分') +
   ggtitle('閱讀類型與閱讀成績')

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 258 rows containing non-finite values (stat_smooth).

## Warning: Continuous x aesthetic -- did you forget aes(group=...)?

## Warning: Removed 258 rows containing missing values (stat_boxplot).

cor.test(dta$read_AVG,dta$ST150Q03IA)

## 
##  Pearson's product-moment correlation
## 
## data:  dta$read_AVG and dta$ST150Q03IA
## t = 30.315, df = 19742, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1975329 0.2241897
## sample estimates:
##       cor 
## 0.2109005

model003 <- lm(formula= read_AVG ~ ST150Q03IA, data=dta)
summary(model003)

## 
## Call:
## lm(formula = read_AVG ~ ST150Q03IA, data = dta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -367.72  -62.18    6.28   67.77  278.35 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 463.7882     1.5656  296.24   <2e-16 ***
## ST150Q03IA   17.8897     0.5901   30.32   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 94.58 on 19742 degrees of freedom
##   (因為不存在，258 個觀察量被刪除了)
## Multiple R-squared:  0.04448,    Adjusted R-squared:  0.04443 
## F-statistic:   919 on 1 and 19742 DF,  p-value: < 2.2e-16

7.4 閱讀數位連結文章之頻率對閱讀得分部分

ggplot(data = dta, aes(x=ST150Q04HA))+
  geom_smooth(aes(y=read_AVG), method = 'lm')+
  geom_point(aes(y=read_AVG))+ 
xlab('閱讀數位連結文章之頻率') +
   ylab('閱讀得分') +
   ggtitle('閱讀類型與閱讀成績')

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 272 rows containing non-finite values (stat_smooth).

## Warning: Removed 272 rows containing missing values (geom_point).

ggplot(data = dta, aes(x=ST150Q04HA))+
  geom_smooth(aes(y=read_AVG), method = 'lm')+
  geom_boxplot(aes(y=read_AVG))+ facet_grid(.~ST150Q04HA)+
xlab('閱讀數位連結文章之頻率') +
   ylab('閱讀得分') +
   ggtitle('閱讀類型與閱讀成績')

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 272 rows containing non-finite values (stat_smooth).

## Warning: Continuous x aesthetic -- did you forget aes(group=...)?

## Warning: Removed 272 rows containing missing values (stat_boxplot).

cor.test(dta$read_AVG,dta$ST150Q04HA)

## 
##  Pearson's product-moment correlation
## 
## data:  dta$read_AVG and dta$ST150Q04HA
## t = 10.693, df = 19728, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.06202087 0.08976750
## sample estimates:
##        cor 
## 0.07590888

model004 <- lm(formula= read_AVG ~ ST150Q04HA, data=dta)
summary(model004)

## 
## Call:
## lm(formula = read_AVG ~ ST150Q04HA, data = dta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -375.28  -63.92    6.72   69.72  273.15 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 492.6490     1.4808  332.69   <2e-16 ***
## ST150Q04HA    6.3900     0.5976   10.69   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 96.46 on 19728 degrees of freedom
##   (因為不存在，272 個觀察量被刪除了)
## Multiple R-squared:  0.005762,   Adjusted R-squared:  0.005712 
## F-statistic: 114.3 on 1 and 19728 DF,  p-value: < 2.2e-16

整體看來，閱讀的類型與成績相關性不高，各分項目對成績的相關係數在低相關。

8 使用何種方式閱讀（紙本or數位）與成績相關為何?

xyplot(read_AVG ~ ST168Q01HA, data=dta,
       ylab="閱讀成績", 
       xlab="以何種方式閱讀",
       type=c("p", "g", "r"))

cor.test(dta$read_AVG,dta$ST168Q01HA)

## 
##  Pearson's product-moment correlation
## 
## data:  dta$read_AVG and dta$ST168Q01HA
## t = 20.609, df = 19761, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1313804 0.1586779
## sample estimates:
##       cor 
## 0.1450568

整體看來，學生使用何種方式閱讀對成績的影響較低，且相關係數也顯示低相關。
以下進一步探究是否「國家」與「性別」間存在閱讀方式對成績的影響。

8.1 不同國家使用不同方式閱讀（紙本or數位）與成績相關是否有異?

xyplot(read_AVG ~ ST168Q01HA | CNT, data=dta,
       ylab="閱讀成績", 
       xlab="閱讀圖表或地圖",
       type=c("p", "g", "r"))

圖表顯示國家間僅台灣稍有明顯相關，日韓關聯性低。

8.2 不同性別使用不同方式閱讀（紙本or數位）與成績相關是否有異?

xyplot(read_AVG ~ ST168Q01HA | Gender, data=dta,
       ylab="閱讀成績", 
       xlab="閱讀圖表或地圖",
       type=c("p", "g", "r"))

圖表顯示性別間無明顯相關。

9 linear models

model0 <- lm(formula= read_AVG ~ ST150Q04HA, data=dta)
summary(model0)

## 
## Call:
## lm(formula = read_AVG ~ ST150Q04HA, data = dta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -375.28  -63.92    6.72   69.72  273.15 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 492.6490     1.4808  332.69   <2e-16 ***
## ST150Q04HA    6.3900     0.5976   10.69   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 96.46 on 19728 degrees of freedom
##   (因為不存在，272 個觀察量被刪除了)
## Multiple R-squared:  0.005762,   Adjusted R-squared:  0.005712 
## F-statistic: 114.3 on 1 and 19728 DF,  p-value: < 2.2e-16

model1 <- lm(formula= read_AVG ~ ST150Q04HA + ST168Q01HA, data=dta)
summary(model1)

## 
## Call:
## lm(formula = read_AVG ~ ST150Q04HA + ST168Q01HA, data = dta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -355.54  -62.59    6.14   68.51  279.38 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 464.2468     2.1132 219.685  < 2e-16 ***
## ST150Q04HA    4.7728     0.5977   7.986 1.47e-15 ***
## ST168Q01HA   13.5148     0.7096  19.045  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 95.2 on 19615 degrees of freedom
##   (因為不存在，384 個觀察量被刪除了)
## Multiple R-squared:  0.02391,    Adjusted R-squared:  0.02381 
## F-statistic: 240.3 on 2 and 19615 DF,  p-value: < 2.2e-16

model2 <- lm(formula= read_AVG ~ ST150Q04HA + ST168Q01HA + ST175Q01IA, data=dta)
summary(model2)

## 
## Call:
## lm(formula = read_AVG ~ ST150Q04HA + ST168Q01HA + ST175Q01IA, 
##     data = dta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -381.81  -60.78    5.78   66.79  283.93 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 447.2539     2.1570 207.353  < 2e-16 ***
## ST150Q04HA    3.2779     0.5882   5.573 2.54e-08 ***
## ST168Q01HA    6.6160     0.7358   8.992  < 2e-16 ***
## ST175Q01IA   15.3629     0.5368  28.619  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 93.22 on 19567 degrees of freedom
##   (因為不存在，431 個觀察量被刪除了)
## Multiple R-squared:  0.06306,    Adjusted R-squared:  0.06291 
## F-statistic:   439 on 3 and 19567 DF,  p-value: < 2.2e-16

-在model0與model1中，發現R-squared解釋量小，故證實前述分析之「閱讀的類型」與「使用何種閱讀方式」對閱讀成績較無直接相關。

-但在加入「閱讀時間」後發現，解釋量增加幅度較大。

10 conclusion

1.閱讀的類型與成績相關為低相關。

2.使用何種方式閱讀（紙本or數位）與成績相關為低相關。

3.花多久時間閱讀與閱讀成績相關為低相關。

上述三種假設對於學生閱讀成績之預測效果並不明顯，其中僅「花多久時間閱讀」稍微明顯之改變；此外，「國家」與「性別」的影響亦不彰。

Final-PISA 2018 Analysis

Queen Su

Thu Dec 23 23:53:31 2021