#20211118
# All,
# The mid-term exam is as below:
#
# Describe, analyze, and compare the two attached data sets. After finishing your work, you should reply to me immediately with your answer sheet in a pdf file format or your preferred one plus r code.
#
# import data
load("IPS_706_Midterm.rda")
load("IPS_706_Midterm2.rda")
#Describe ,summary, hist , plot
str(ips.706)
## Classes 'tbl_df', 'tbl' and 'data.frame': 142 obs. of 2 variables:
## $ x: num 58.2 58.2 58.7 57.3 58.1 ...
## $ y: num 91.9 92.2 90.3 89.9 92 ...
str(ips.706b)
## Classes 'tbl_df', 'tbl' and 'data.frame': 142 obs. of 2 variables:
## $ x: num 55.4 51.5 46.2 42.8 40.8 ...
## $ y: num 97.2 96 94.5 91.4 88.3 ...
xa <- ips.706$x
ya <- ips.706$y
xb <- ips.706b$x
yb <- ips.706b$y
##summary ,hist
summary(xa)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 27.02 41.03 56.53 54.27 68.71 86.44
hist(xa)

summary(ya)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 14.37 20.37 50.11 47.84 63.55 92.21
hist(ya)

summary(xb)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 22.31 44.10 53.33 54.26 64.74 98.21
hist(xb)

summary(yb)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.949 25.288 46.026 47.832 68.526 99.487
hist(yb)

## plot dataset ips.706
plot(x=xa, y=ya)

## plot dataset ips.706b
plot(x=xb, y=yb)

## Note: annotation:
# 1. The independent variables (x) and dependent variables (y) of the two data sets show stars and dinosaurs, which initially seem to be unrelated or non-linear.
# 2. Therefore, in the next step, try to merge the two data sets.
# cbind 2 dataset
xyc <- cbind(xa, xb, ya, yb)
plot(x=xa, y=yb)

plot(x=xb, y=ya)

plot(x=ya, y=yb)

plot(x=xa, y=xb)
#correlation
cor(xa,xb)
## [1] 0.8523187
cor(ya, yb)
## [1] 0.9631865
## Note: Found after merging
# 1. Initially, xa, yb seem to be related or linear.
# 2. The xb and ya also seem to be related or linear at first.
# 3. Therefore, in the next step, try to merge the 4 variables into a new data set.
as.data.frame(xyc)
## xa xb ya yb
## 1 58.21361 55.3846 91.88189 97.1795
## 2 58.19605 51.5385 92.21499 96.0256
## 3 58.71823 46.1538 90.31053 94.4872
## 4 57.27837 42.8205 89.90761 91.4103
## 5 58.08202 40.7692 92.00815 88.3333
## 6 57.48945 38.7179 88.08529 84.8718
## 7 28.08874 35.6410 63.51079 79.8718
## 8 28.08547 33.0769 63.59020 77.5641
## 9 28.08727 28.9744 63.12328 74.4872
## 10 27.57803 26.1538 62.82104 71.4103
## 11 27.77992 23.0769 63.51815 66.4103
## 12 28.58900 22.3077 63.02408 61.7949
## 13 28.73914 22.3077 62.72086 57.1795
## 14 27.02460 23.3333 62.90186 52.9487
## 15 28.80134 25.8974 63.38904 51.0256
## 16 27.18646 29.4872 63.55873 51.0256
## 17 29.28515 32.8205 63.38361 51.0256
## 18 39.40295 35.3846 51.15086 51.4103
## 19 28.81133 40.2564 61.35785 51.4103
## 20 34.30396 44.1026 56.54213 52.9487
## 21 29.60276 46.6667 60.15735 54.1026
## 22 49.11616 50.0000 63.66000 55.2564
## 23 39.61755 53.0769 62.92519 55.6410
## 24 43.23308 56.6667 63.16522 56.0256
## 25 64.89279 59.2308 65.81418 57.9487
## 26 62.49015 61.2821 74.58429 62.1795
## 27 68.98808 61.5385 63.23215 66.4103
## 28 62.10562 61.7949 75.99087 69.1026
## 29 32.46185 57.4359 62.88190 55.2564
## 30 41.32720 54.8718 49.07025 49.8718
## 31 44.00715 52.5641 46.44967 46.0256
## 32 44.07406 48.2051 34.55320 38.3333
## 33 44.00132 49.4872 33.90421 42.1795
## 34 45.00630 51.0256 38.29902 44.1026
## 35 44.44384 45.3846 36.01908 36.4103
## 36 42.17871 42.8205 26.49212 32.5641
## 37 44.04457 38.7179 35.66224 31.4103
## 38 41.64045 35.1282 27.09310 30.2564
## 39 41.93833 32.5641 24.99152 32.1795
## 40 44.05393 30.0000 33.55639 36.7949
## 41 39.20672 33.5897 51.53372 41.4103
## 42 28.70445 36.6667 61.77753 45.6410
## 43 31.70866 38.2051 58.83775 49.1026
## 44 42.81171 29.7436 30.02045 36.0256
## 45 43.30061 29.7436 31.52643 32.1795
## 46 40.39863 30.0000 16.34701 29.1026
## 47 40.43569 32.0513 20.23267 26.7949
## 48 40.93655 35.8974 16.91300 25.2564
## 49 39.66157 41.0256 15.60936 25.2564
## 50 40.89926 44.1026 20.79853 25.6410
## 51 41.96862 47.1795 26.49707 28.7180
## 52 40.38341 49.4872 21.39123 31.4103
## 53 56.53813 51.5385 32.44425 34.8718
## 54 52.97069 53.5897 29.04020 37.5641
## 55 54.62095 55.1282 30.34452 40.6410
## 56 65.09904 56.6667 27.24156 42.1795
## 57 63.05599 59.2308 29.70910 44.4872
## 58 70.96014 62.3077 41.25950 46.0256
## 59 69.89582 64.8718 43.45376 46.7949
## 60 70.59589 67.9487 41.96474 47.9487
## 61 69.64702 70.5128 44.04445 53.7180
## 62 77.39298 71.5385 63.37146 60.6410
## 63 64.40079 71.5385 67.44872 64.4872
## 64 63.86896 69.4872 70.21374 69.4872
## 65 56.59442 46.9231 86.92701 79.8718
## 66 56.53134 48.2051 87.49981 84.1026
## 67 59.65216 50.0000 87.80946 85.2564
## 68 56.63651 53.0769 85.63750 85.2564
## 69 58.67229 55.3846 90.07716 86.0256
## 70 58.22161 56.6667 90.41102 86.0256
## 71 57.91466 56.1538 89.95380 82.9487
## 72 55.31551 53.8462 80.25186 80.6410
## 73 54.57573 51.2821 77.53629 78.7180
## 74 54.41309 50.0000 78.22909 78.7180
## 75 55.07451 47.9487 79.81755 77.5641
## 76 29.43296 29.7436 60.80178 59.8718
## 77 29.42269 29.7436 63.06846 62.1795
## 78 29.00561 31.2821 63.39075 62.5641
## 79 58.46184 57.9487 90.26533 99.4872
## 80 57.99780 61.7949 92.15991 99.1026
## 81 57.54947 64.8718 90.74891 97.5641
## 82 59.52993 68.4615 88.32727 94.1026
## 83 58.24939 70.7692 92.12968 91.0256
## 84 58.02451 72.0513 91.69442 86.4103
## 85 58.38212 73.8462 90.55348 83.3333
## 86 62.56676 75.1282 77.74393 79.1026
## 87 72.17582 76.6667 63.12893 75.2564
## 88 79.47276 77.6923 63.40869 71.4103
## 89 80.35770 79.7436 63.29544 66.7949
## 90 78.75724 81.7949 53.33262 60.2564
## 91 82.54024 83.3333 56.54105 55.2564
## 92 86.43590 85.1282 59.79276 51.4103
## 93 79.48868 86.4103 53.65167 47.5641
## 94 81.53042 87.9487 56.02536 46.0256
## 95 79.18679 89.4872 53.23479 42.5641
## 96 77.89906 93.3333 51.82246 39.8718
## 97 75.13071 95.3846 23.37244 36.7949
## 98 76.05801 98.2051 16.38375 33.7180
## 99 57.61467 56.6667 33.82245 40.6410
## 100 56.17140 59.2308 32.11799 38.3333
## 101 66.28789 60.7692 26.11711 33.7180
## 102 67.88172 63.0769 24.23602 29.1026
## 103 64.02808 64.1026 27.67269 25.2564
## 104 77.49665 64.3590 14.94852 24.1026
## 105 77.63465 74.3590 14.46185 22.9487
## 106 77.86373 71.2821 14.61068 22.9487
## 107 77.33816 67.9487 15.89005 22.1795
## 108 76.18042 65.8974 15.91257 20.2564
## 109 77.25265 63.0769 15.15152 19.1026
## 110 77.41338 61.2821 15.22193 19.1026
## 111 76.73185 58.7179 16.21685 18.3333
## 112 49.47111 55.1282 25.06302 18.3333
## 113 42.47654 52.3077 18.33847 18.3333
## 114 43.59512 49.7436 19.99420 17.5641
## 115 50.33997 47.4359 26.47140 16.0256
## 116 40.74898 44.8718 16.18214 13.7180
## 117 38.38653 48.7179 14.58022 14.8718
## 118 38.40402 51.2821 14.45195 14.8718
## 119 38.76428 54.1026 14.36559 14.8718
## 120 41.47014 56.1538 17.27803 14.1026
## 121 47.15540 52.0513 22.37793 12.5641
## 122 39.58257 48.7179 17.64845 11.0256
## 123 41.74024 47.1795 17.82932 9.8718
## 124 39.31187 46.1538 15.64072 6.0256
## 125 41.67985 50.5128 17.74592 9.4872
## 126 39.08746 53.8462 15.12230 10.2564
## 127 41.48150 57.4359 18.04744 10.2564
## 128 77.60609 60.0000 15.16287 10.6410
## 129 75.98266 64.1026 16.30692 10.6410
## 130 76.94576 66.9231 15.85848 10.6410
## 131 77.54372 71.2821 15.25395 10.6410
## 132 77.58474 74.3590 15.83004 10.6410
## 133 76.82230 78.2051 15.59517 10.6410
## 134 77.34857 67.9487 15.77453 8.7180
## 135 77.57315 68.4615 14.78065 5.2564
## 136 77.97261 68.2051 14.95570 2.9487
## 137 41.52892 37.6923 24.91643 25.7692
## 138 43.72255 39.4872 19.07733 25.3846
## 139 79.32608 91.2821 52.90039 41.5385
## 140 56.66397 50.0000 87.94013 95.7692
## 141 57.82179 47.9487 90.69317 95.0000
## 142 58.24317 44.1026 92.10433 92.6923
library(tibble)
xyc <- xyc %>%
as_tibble()%>%
setNames(c("xa","xb","ya","yb"))
save(xyc,file = "xyc.rda")
load("xyc.rda")
#lm analysis
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
m1 <- lm(ya~yb,data = xyc)
abline(m1)
m1.1 <- lm(ya~yb+xa+xb, data = xyc)
abline(m1.1)
## Warning in abline(m1.1): only using the first two of 4 regression coefficients
m1.2 <- lm(yb~ya+xa+xb, data = xyc)
abline(m1.2)
## Warning in abline(m1.2): only using the first two of 4 regression coefficients
m2 <- lm(xa~xb,data = xyc)
abline(m2)
m2.1 <- lm(xa~xb+ya+yb, data = xyc)
abline(m2.1)
## Warning in abline(m2.1): only using the first two of 4 regression coefficients
m2.2 <- lm(xb~xa+ya+yb, data = xyc)
abline(m2.2)
## Warning in abline(m2.2): only using the first two of 4 regression coefficients

stargazer(m1,m1.1,m1.2,m2,m2.1,m2.2 ,type = "html", out = "xyc.html")
##
## <table style="text-align:center"><tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="6"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="6" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="2">ya</td><td>yb</td><td colspan="2">xa</td><td>xb</td></tr>
## <tr><td style="text-align:left"></td><td>(1)</td><td>(2)</td><td>(3)</td><td>(4)</td><td>(5)</td><td>(6)</td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">yb</td><td>0.963<sup>***</sup></td><td>0.964<sup>***</sup></td><td></td><td></td><td>0.182<sup>*</sup></td><td>-0.161</td></tr>
## <tr><td style="text-align:left"></td><td>(0.023)</td><td>(0.023)</td><td></td><td></td><td>(0.102)</td><td>(0.102)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">ya</td><td></td><td></td><td>0.964<sup>***</sup></td><td></td><td>-0.180<sup>*</sup></td><td>0.148</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.023)</td><td></td><td>(0.102)</td><td>(0.103)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">xa</td><td></td><td>-0.123<sup>*</sup></td><td>0.124<sup>*</sup></td><td></td><td></td><td>0.856<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.070)</td><td>(0.069)</td><td></td><td></td><td>(0.044)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">xb</td><td></td><td>0.101</td><td>-0.109</td><td>0.853<sup>***</sup></td><td>0.853<sup>***</sup></td><td></td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.070)</td><td>(0.070)</td><td>(0.044)</td><td>(0.044)</td><td></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Constant</td><td>1.777</td><td>2.929</td><td>0.927</td><td>8.007<sup>***</sup></td><td>7.918<sup>***</sup></td><td>8.387<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(1.246)</td><td>(2.450)</td><td>(2.462)</td><td>(2.510)</td><td>(2.906)</td><td>(2.903)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Observations</td><td>142</td><td>142</td><td>142</td><td>142</td><td>142</td><td>142</td></tr>
## <tr><td style="text-align:left">R<sup>2</sup></td><td>0.928</td><td>0.929</td><td>0.929</td><td>0.726</td><td>0.733</td><td>0.731</td></tr>
## <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.927</td><td>0.928</td><td>0.928</td><td>0.724</td><td>0.727</td><td>0.726</td></tr>
## <tr><td style="text-align:left">Residual Std. Error</td><td>7.266 (df = 140)</td><td>7.237 (df = 138)</td><td>7.236 (df = 138)</td><td>8.802 (df = 140)</td><td>8.764 (df = 138)</td><td>8.783 (df = 138)</td></tr>
## <tr><td style="text-align:left">F Statistic</td><td>1,797.132<sup>***</sup> (df = 1; 140)</td><td>604.889<sup>***</sup> (df = 3; 138)</td><td>605.163<sup>***</sup> (df = 3; 138)</td><td>371.784<sup>***</sup> (df = 1; 140)</td><td>126.062<sup>***</sup> (df = 3; 138)</td><td>125.234<sup>***</sup> (df = 3; 138)</td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="6" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
##Note: Through regression analysis, it is found
# 1. When "ya" is a dependent variable:
# "yb" and "xa" are statistically significant for "ya" ("yb" is positively correlated, "xa" is negatively correlated).
# 2. When "yb" is a dependent variable:
# "ya" and "xa" are statistically significant for "yb" (both "ya" and "xa" are positively correlated).
# 3. When "xa" is a dependent variable:
# "ya", "yb" and "xb" are all statistically significant for "xa" ("yb" and "xb" are positively correlated, and "ya" are negatively correlated).
# 4. When "xb" is a dependent variable:
# Only "xa" is statistically significant (positive correlation) to "xb".