ECN325 хичээлийн хүрээнд Rstudio дээр тус даалгаврыг хийж гүйцэтгэлээ. Ингэхдээ wooldrige ын ном дээрх өгөгдөл дээр ажилласан ба католик ахлах сургуулийн сурагчдын талаар юм.
Өгөгдлөө дуудах.
library(readr)
catholic <- read_csv("catholic.csv")
## Rows: 7430 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (14): rownames, id, read12, math12, female, asian, hispan, black, mothed...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(catholic)
str(catholic)
## spc_tbl_ [7,430 × 14] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ rownames: num [1:7430] 1 2 3 4 5 6 7 8 9 10 ...
## $ id : num [1:7430] 124902 124915 124916 124932 124944 ...
## $ read12 : num [1:7430] 61.4 58.3 59.3 49.6 57.6 ...
## $ math12 : num [1:7430] 49.8 59.8 50.4 45 54.3 ...
## $ female : num [1:7430] 0 0 1 1 1 1 1 0 0 0 ...
## $ asian : num [1:7430] 0 0 0 0 0 0 0 0 0 0 ...
## $ hispan : num [1:7430] 0 0 0 0 0 0 0 0 0 0 ...
## $ black : num [1:7430] 0 0 0 0 0 0 0 0 0 0 ...
## $ motheduc: num [1:7430] 14 14 14 12 12 12 14 14 14 14 ...
## $ fatheduc: num [1:7430] 12 14 11 14 12 11 14 14 14 12 ...
## $ lfaminc : num [1:7430] 10.3 10.3 10.3 10.3 10.7 ...
## $ hsgrad : num [1:7430] 1 1 1 1 1 1 1 1 1 1 ...
## $ cathhs : num [1:7430] 0 0 0 0 0 0 0 0 0 0 ...
## $ parcath : num [1:7430] 1 1 1 1 1 1 1 1 1 1 ...
## - attr(*, "spec")=
## .. cols(
## .. rownames = col_double(),
## .. id = col_double(),
## .. read12 = col_double(),
## .. math12 = col_double(),
## .. female = col_double(),
## .. asian = col_double(),
## .. hispan = col_double(),
## .. black = col_double(),
## .. motheduc = col_double(),
## .. fatheduc = col_double(),
## .. lfaminc = col_double(),
## .. hsgrad = col_double(),
## .. cathhs = col_double(),
## .. parcath = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
summary(catholic)
## rownames id read12 math12
## Min. : 1 Min. : 124902 Min. :29.15 Min. :29.50
## 1st Qu.:1858 1st Qu.:2424049 1st Qu.:44.52 1st Qu.:45.02
## Median :3716 Median :4592442 Median :53.08 Median :52.53
## Mean :3716 Mean :4589838 Mean :51.77 Mean :52.13
## 3rd Qu.:5573 3rd Qu.:7241106 3rd Qu.:59.47 3rd Qu.:59.86
## Max. :7430 Max. :7979086 Max. :68.09 Max. :71.37
##
## female asian hispan black
## Min. :0.0000 Min. :0.00000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :1.0000 Median :0.00000 Median :0.0000 Median :0.00000
## Mean :0.5174 Mean :0.05168 Mean :0.1035 Mean :0.07066
## 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.00000 Max. :1.0000 Max. :1.00000
##
## motheduc fatheduc lfaminc hsgrad
## Min. : 8.00 Min. : 8.00 Min. : 6.215 Min. :0.0000
## 1st Qu.:12.00 1st Qu.:12.00 1st Qu.:10.021 1st Qu.:1.0000
## Median :14.00 Median :14.00 Median :10.309 Median :1.0000
## Mean :13.36 Mean :13.67 Mean :10.353 Mean :0.9303
## 3rd Qu.:14.00 3rd Qu.:16.00 3rd Qu.:10.657 3rd Qu.:1.0000
## Max. :18.00 Max. :18.00 Max. :12.346 Max. :1.0000
## NA's :1460
## cathhs parcath
## Min. :0.00000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.00000 Median :0.0000
## Mean :0.06083 Mean :0.3459
## 3rd Qu.:0.00000 3rd Qu.:1.0000
## Max. :1.00000 Max. :1.0000
##
Зарим үзүүлэлтүүд тийм,үгүй (тийм бол 1, үгүй бол 0) гэсэн хариултуудтай бөгөөд тэдгээрийг тоолох
table(catholic$female)
##
## 0 1
## 3586 3844
table(catholic$hsgrad)
##
## 0 1
## 416 5554
table(catholic$cathhs)
##
## 0 1
## 6978 452
table(catholic$parcath)
##
## 0 1
## 4860 2570
Тоон үзүүлэлтүүдийн статистик хамааралыг тооцох нь
cor(catholic[, c("read12", "math12", "motheduc", "fatheduc", "lfaminc")], use = "complete.obs")
## read12 math12 motheduc fatheduc lfaminc
## read12 1.0000000 0.7104134 0.2845934 0.3170248 0.2586439
## math12 0.7104134 1.0000000 0.3422983 0.3752797 0.3163052
## motheduc 0.2845934 0.3422983 1.0000000 0.5831647 0.4052282
## fatheduc 0.3170248 0.3752797 0.5831647 1.0000000 0.4461612
## lfaminc 0.2586439 0.3163052 0.4052282 0.4461612 1.0000000
Шалгалтын онооны тархалтыг хистограмм байгуулж харах нь
hist(catholic$read12, breaks = 20, main = "Distribution of Reading Scores", xlab = "Reading Score")
hist(catholic$math12, breaks = 20, main = "Distribution of Math Scores", xlab = "Math Score")
Шалгалтын оноог хүйсээр нь ялгаж хайрцган зурагаар харах нь
boxplot(catholic$read12 ~ catholic$female, main = "Reading Scores by Gender", xlab = "Female (1 = Yes, 0 = No)", ylab = "Reading Score")
boxplot(catholic$math12 ~ catholic$female, main = "Math Scores by Gender", xlab = "Female (1 = Yes, 0 = No)", ylab = "Math Score")
Шалгалтын оноог католик сургуульд явсан эсэх хамаарлаар нь дүрслэвэл
boxplot(catholic$read12 ~ catholic$cathhs, main = "Reading Scores by Catholic School Attendance", xlab = "Catholic HS (1 = Yes, 0 = No)", ylab = "Reading Score")
boxplot(catholic$math12 ~ catholic$cathhs, main = "Math Scores by Catholic School Attendance", xlab = "Catholic HS (1 = Yes, 0 = No)", ylab = "Math Score")
Шалгалтын оноог эцэг эхийн боловсролын хамаарлаар дүрслэвэл.
plot(catholic$motheduc, catholic$read12, main = "Mother's Education vs. Reading Score", xlab = "Mother's Years of Education", ylab = "Reading Score")
plot(catholic$fatheduc, catholic$math12, main = "Father's Education vs. Math Score", xlab = "Father's Years of Education", ylab = "Math Score")
Шалгалтын оноонууд дээр энгийн регресс хийх нь.
model1 <- lm(read12 ~ cathhs + motheduc + fatheduc + lfaminc, data = catholic)
summary(model1)
##
## Call:
## lm(formula = read12 ~ cathhs + motheduc + fatheduc + lfaminc,
## data = catholic)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.220 -6.254 1.240 6.732 21.976
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.41178 1.34815 13.657 < 2e-16 ***
## cathhs 1.16370 0.42979 2.708 0.00679 **
## motheduc 0.57960 0.06384 9.079 < 2e-16 ***
## fatheduc 0.77901 0.05774 13.491 < 2e-16 ***
## lfaminc 1.43876 0.14667 9.809 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.786 on 7425 degrees of freedom
## Multiple R-squared: 0.1282, Adjusted R-squared: 0.1278
## F-statistic: 273.1 on 4 and 7425 DF, p-value: < 2.2e-16
model2 <- lm(math12 ~ cathhs + motheduc + fatheduc + lfaminc, data = catholic)
summary(model2)
##
## Call:
## lm(formula = math12 ~ cathhs + motheduc + fatheduc + lfaminc,
## data = catholic)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.0824 -6.1371 0.4904 6.4524 26.7728
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.14862 1.31098 8.504 < 2e-16 ***
## cathhs 1.47723 0.41794 3.535 0.000411 ***
## motheduc 0.71635 0.06208 11.539 < 2e-16 ***
## fatheduc 0.89125 0.05615 15.872 < 2e-16 ***
## lfaminc 1.84867 0.14263 12.961 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.544 on 7425 degrees of freedom
## Multiple R-squared: 0.1846, Adjusted R-squared: 0.1841
## F-statistic: 420.2 on 4 and 7425 DF, p-value: < 2.2e-16