VerbalIQ.utf8

#‘— #’ title: “Homework #4 IQ and language” #’ author: “Szu-Yu Chen” #’ date: 9 September 2020 #’—

# input data
dta <- read.table("C:/Users/ASUS/Desktop/data/verbalIQ.txt",header=T)
head (dta)

##   school pupil  viq language csize ses
## 1      1 17001 15.0       46    29  23
## 2      1 17002 14.5       45    29  10
## 3      1 17003  9.5       33    29  15
## 4      1 17004 11.0       46    29  23
## 5      1 17005  8.0       20    29  10
## 6      1 17006  9.5       30    29  10

# plot 131 school-level regression lines of language score against verbal IQ
library(tidyverse)

## -- Attaching packages ------------------------------------- tidyverse 1.3.0 --

## √ ggplot2 3.3.2     √ purrr   0.3.4
## √ tibble  3.0.3     √ dplyr   1.0.2
## √ tidyr   1.1.2     √ stringr 1.4.0
## √ readr   1.3.1     √ forcats 0.5.0

## -- Conflicts ---------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

ggplot(data=dta, aes(x=viq, y=language, group = school)) +
  geom_point() +
  stat_smooth(method="lm", 
              formula= y ~ x,
              se=F, 
              color="gray", 
              linetype="dashed", 
              size=rel(.5)) +
  labs(x="Verbal IQ", 
       y="language") +
  theme_bw()

g_lang <- with(dta, tapply(language, school, mean))
g_viq <- with(dta, tapply(viq, school, mean))
model=lm(formula = g_lang ~ g_viq, data = dta)
summary(model)

## 
## Call:
## lm(formula = g_lang ~ g_viq, data = dta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.4163 -1.7507  0.0461  2.2184  6.8885 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -7.269      3.431  -2.119    0.036 *  
## g_viq          4.049      0.292  13.866   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.346 on 129 degrees of freedom
## Multiple R-squared:  0.5985, Adjusted R-squared:  0.5953 
## F-statistic: 192.3 on 1 and 129 DF,  p-value: < 2.2e-16

with(dta, cor(viq, language, method = "pearson"))

## [1] 0.6098195