#20211118 
# All,

# The mid-term exam is as below:
#   
#   Describe, analyze, and compare the two attached data sets.  After finishing your work, you should reply to me immediately with your answer sheet in a pdf file format or your preferred one plus r code.  
# 
# import data

load("IPS_706_Midterm.rda")
load("IPS_706_Midterm2.rda")

#Describe ,summary, hist , plot
str(ips.706)
## Classes 'tbl_df', 'tbl' and 'data.frame':    142 obs. of  2 variables:
##  $ x: num  58.2 58.2 58.7 57.3 58.1 ...
##  $ y: num  91.9 92.2 90.3 89.9 92 ...
str(ips.706b)
## Classes 'tbl_df', 'tbl' and 'data.frame':    142 obs. of  2 variables:
##  $ x: num  55.4 51.5 46.2 42.8 40.8 ...
##  $ y: num  97.2 96 94.5 91.4 88.3 ...
xa <- ips.706$x
ya <- ips.706$y
xb <- ips.706b$x
yb <- ips.706b$y

##summary ,hist

summary(xa)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   27.02   41.03   56.53   54.27   68.71   86.44
hist(xa)

summary(ya)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   14.37   20.37   50.11   47.84   63.55   92.21
hist(ya)

summary(xb)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   22.31   44.10   53.33   54.26   64.74   98.21
hist(xb)

summary(yb)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.949  25.288  46.026  47.832  68.526  99.487
hist(yb)

## plot dataset ips.706
plot(x=xa, y=ya)

## plot dataset ips.706b
plot(x=xb, y=yb)

## Note: annotation:
# 1. The independent variables (x) and dependent variables (y) of the two data sets show stars and dinosaurs, which initially seem to be unrelated or non-linear.
# 2. Therefore, in the next step, try to merge the two data sets.


# cbind 2 dataset

xyc <- cbind(xa, xb, ya, yb)

plot(x=xa, y=yb)

plot(x=xb, y=ya)

plot(x=ya, y=yb)

plot(x=xa, y=xb)

#correlation
cor(xa,xb)
## [1] 0.8523187
cor(ya, yb)
## [1] 0.9631865
## Note: Found after merging
# 1. Initially, xa, yb seem to be related or linear.
# 2. The xb and ya also seem to be related or linear at first.
# 3. Therefore, in the next step, try to merge the 4 variables into a new data set.

as.data.frame(xyc)
##           xa      xb       ya      yb
## 1   58.21361 55.3846 91.88189 97.1795
## 2   58.19605 51.5385 92.21499 96.0256
## 3   58.71823 46.1538 90.31053 94.4872
## 4   57.27837 42.8205 89.90761 91.4103
## 5   58.08202 40.7692 92.00815 88.3333
## 6   57.48945 38.7179 88.08529 84.8718
## 7   28.08874 35.6410 63.51079 79.8718
## 8   28.08547 33.0769 63.59020 77.5641
## 9   28.08727 28.9744 63.12328 74.4872
## 10  27.57803 26.1538 62.82104 71.4103
## 11  27.77992 23.0769 63.51815 66.4103
## 12  28.58900 22.3077 63.02408 61.7949
## 13  28.73914 22.3077 62.72086 57.1795
## 14  27.02460 23.3333 62.90186 52.9487
## 15  28.80134 25.8974 63.38904 51.0256
## 16  27.18646 29.4872 63.55873 51.0256
## 17  29.28515 32.8205 63.38361 51.0256
## 18  39.40295 35.3846 51.15086 51.4103
## 19  28.81133 40.2564 61.35785 51.4103
## 20  34.30396 44.1026 56.54213 52.9487
## 21  29.60276 46.6667 60.15735 54.1026
## 22  49.11616 50.0000 63.66000 55.2564
## 23  39.61755 53.0769 62.92519 55.6410
## 24  43.23308 56.6667 63.16522 56.0256
## 25  64.89279 59.2308 65.81418 57.9487
## 26  62.49015 61.2821 74.58429 62.1795
## 27  68.98808 61.5385 63.23215 66.4103
## 28  62.10562 61.7949 75.99087 69.1026
## 29  32.46185 57.4359 62.88190 55.2564
## 30  41.32720 54.8718 49.07025 49.8718
## 31  44.00715 52.5641 46.44967 46.0256
## 32  44.07406 48.2051 34.55320 38.3333
## 33  44.00132 49.4872 33.90421 42.1795
## 34  45.00630 51.0256 38.29902 44.1026
## 35  44.44384 45.3846 36.01908 36.4103
## 36  42.17871 42.8205 26.49212 32.5641
## 37  44.04457 38.7179 35.66224 31.4103
## 38  41.64045 35.1282 27.09310 30.2564
## 39  41.93833 32.5641 24.99152 32.1795
## 40  44.05393 30.0000 33.55639 36.7949
## 41  39.20672 33.5897 51.53372 41.4103
## 42  28.70445 36.6667 61.77753 45.6410
## 43  31.70866 38.2051 58.83775 49.1026
## 44  42.81171 29.7436 30.02045 36.0256
## 45  43.30061 29.7436 31.52643 32.1795
## 46  40.39863 30.0000 16.34701 29.1026
## 47  40.43569 32.0513 20.23267 26.7949
## 48  40.93655 35.8974 16.91300 25.2564
## 49  39.66157 41.0256 15.60936 25.2564
## 50  40.89926 44.1026 20.79853 25.6410
## 51  41.96862 47.1795 26.49707 28.7180
## 52  40.38341 49.4872 21.39123 31.4103
## 53  56.53813 51.5385 32.44425 34.8718
## 54  52.97069 53.5897 29.04020 37.5641
## 55  54.62095 55.1282 30.34452 40.6410
## 56  65.09904 56.6667 27.24156 42.1795
## 57  63.05599 59.2308 29.70910 44.4872
## 58  70.96014 62.3077 41.25950 46.0256
## 59  69.89582 64.8718 43.45376 46.7949
## 60  70.59589 67.9487 41.96474 47.9487
## 61  69.64702 70.5128 44.04445 53.7180
## 62  77.39298 71.5385 63.37146 60.6410
## 63  64.40079 71.5385 67.44872 64.4872
## 64  63.86896 69.4872 70.21374 69.4872
## 65  56.59442 46.9231 86.92701 79.8718
## 66  56.53134 48.2051 87.49981 84.1026
## 67  59.65216 50.0000 87.80946 85.2564
## 68  56.63651 53.0769 85.63750 85.2564
## 69  58.67229 55.3846 90.07716 86.0256
## 70  58.22161 56.6667 90.41102 86.0256
## 71  57.91466 56.1538 89.95380 82.9487
## 72  55.31551 53.8462 80.25186 80.6410
## 73  54.57573 51.2821 77.53629 78.7180
## 74  54.41309 50.0000 78.22909 78.7180
## 75  55.07451 47.9487 79.81755 77.5641
## 76  29.43296 29.7436 60.80178 59.8718
## 77  29.42269 29.7436 63.06846 62.1795
## 78  29.00561 31.2821 63.39075 62.5641
## 79  58.46184 57.9487 90.26533 99.4872
## 80  57.99780 61.7949 92.15991 99.1026
## 81  57.54947 64.8718 90.74891 97.5641
## 82  59.52993 68.4615 88.32727 94.1026
## 83  58.24939 70.7692 92.12968 91.0256
## 84  58.02451 72.0513 91.69442 86.4103
## 85  58.38212 73.8462 90.55348 83.3333
## 86  62.56676 75.1282 77.74393 79.1026
## 87  72.17582 76.6667 63.12893 75.2564
## 88  79.47276 77.6923 63.40869 71.4103
## 89  80.35770 79.7436 63.29544 66.7949
## 90  78.75724 81.7949 53.33262 60.2564
## 91  82.54024 83.3333 56.54105 55.2564
## 92  86.43590 85.1282 59.79276 51.4103
## 93  79.48868 86.4103 53.65167 47.5641
## 94  81.53042 87.9487 56.02536 46.0256
## 95  79.18679 89.4872 53.23479 42.5641
## 96  77.89906 93.3333 51.82246 39.8718
## 97  75.13071 95.3846 23.37244 36.7949
## 98  76.05801 98.2051 16.38375 33.7180
## 99  57.61467 56.6667 33.82245 40.6410
## 100 56.17140 59.2308 32.11799 38.3333
## 101 66.28789 60.7692 26.11711 33.7180
## 102 67.88172 63.0769 24.23602 29.1026
## 103 64.02808 64.1026 27.67269 25.2564
## 104 77.49665 64.3590 14.94852 24.1026
## 105 77.63465 74.3590 14.46185 22.9487
## 106 77.86373 71.2821 14.61068 22.9487
## 107 77.33816 67.9487 15.89005 22.1795
## 108 76.18042 65.8974 15.91257 20.2564
## 109 77.25265 63.0769 15.15152 19.1026
## 110 77.41338 61.2821 15.22193 19.1026
## 111 76.73185 58.7179 16.21685 18.3333
## 112 49.47111 55.1282 25.06302 18.3333
## 113 42.47654 52.3077 18.33847 18.3333
## 114 43.59512 49.7436 19.99420 17.5641
## 115 50.33997 47.4359 26.47140 16.0256
## 116 40.74898 44.8718 16.18214 13.7180
## 117 38.38653 48.7179 14.58022 14.8718
## 118 38.40402 51.2821 14.45195 14.8718
## 119 38.76428 54.1026 14.36559 14.8718
## 120 41.47014 56.1538 17.27803 14.1026
## 121 47.15540 52.0513 22.37793 12.5641
## 122 39.58257 48.7179 17.64845 11.0256
## 123 41.74024 47.1795 17.82932  9.8718
## 124 39.31187 46.1538 15.64072  6.0256
## 125 41.67985 50.5128 17.74592  9.4872
## 126 39.08746 53.8462 15.12230 10.2564
## 127 41.48150 57.4359 18.04744 10.2564
## 128 77.60609 60.0000 15.16287 10.6410
## 129 75.98266 64.1026 16.30692 10.6410
## 130 76.94576 66.9231 15.85848 10.6410
## 131 77.54372 71.2821 15.25395 10.6410
## 132 77.58474 74.3590 15.83004 10.6410
## 133 76.82230 78.2051 15.59517 10.6410
## 134 77.34857 67.9487 15.77453  8.7180
## 135 77.57315 68.4615 14.78065  5.2564
## 136 77.97261 68.2051 14.95570  2.9487
## 137 41.52892 37.6923 24.91643 25.7692
## 138 43.72255 39.4872 19.07733 25.3846
## 139 79.32608 91.2821 52.90039 41.5385
## 140 56.66397 50.0000 87.94013 95.7692
## 141 57.82179 47.9487 90.69317 95.0000
## 142 58.24317 44.1026 92.10433 92.6923
library(tibble)
xyc <- xyc %>%
  as_tibble()%>%
  setNames(c("xa","xb","ya","yb"))

save(xyc,file = "xyc.rda")
load("xyc.rda")


#lm analysis

library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
m1 <- lm(ya~yb,data = xyc)
abline(m1)
m1.1 <- lm(ya~yb+xa+xb, data = xyc)
abline(m1.1)
## Warning in abline(m1.1): only using the first two of 4 regression coefficients
m1.2 <- lm(yb~ya+xa+xb, data = xyc)
abline(m1.2)
## Warning in abline(m1.2): only using the first two of 4 regression coefficients
m2 <- lm(xa~xb,data = xyc)
abline(m2)

m2.1 <- lm(xa~xb+ya+yb, data = xyc)
abline(m2.1)
## Warning in abline(m2.1): only using the first two of 4 regression coefficients
m2.2 <- lm(xb~xa+ya+yb, data = xyc)
abline(m2.2)
## Warning in abline(m2.2): only using the first two of 4 regression coefficients

stargazer(m1,m1.1,m1.2,m2,m2.1,m2.2 ,type = "html", out = "xyc.html")
## 
## <table style="text-align:center"><tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="6"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="6" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="2">ya</td><td>yb</td><td colspan="2">xa</td><td>xb</td></tr>
## <tr><td style="text-align:left"></td><td>(1)</td><td>(2)</td><td>(3)</td><td>(4)</td><td>(5)</td><td>(6)</td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">yb</td><td>0.963<sup>***</sup></td><td>0.964<sup>***</sup></td><td></td><td></td><td>0.182<sup>*</sup></td><td>-0.161</td></tr>
## <tr><td style="text-align:left"></td><td>(0.023)</td><td>(0.023)</td><td></td><td></td><td>(0.102)</td><td>(0.102)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">ya</td><td></td><td></td><td>0.964<sup>***</sup></td><td></td><td>-0.180<sup>*</sup></td><td>0.148</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.023)</td><td></td><td>(0.102)</td><td>(0.103)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">xa</td><td></td><td>-0.123<sup>*</sup></td><td>0.124<sup>*</sup></td><td></td><td></td><td>0.856<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.070)</td><td>(0.069)</td><td></td><td></td><td>(0.044)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">xb</td><td></td><td>0.101</td><td>-0.109</td><td>0.853<sup>***</sup></td><td>0.853<sup>***</sup></td><td></td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.070)</td><td>(0.070)</td><td>(0.044)</td><td>(0.044)</td><td></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Constant</td><td>1.777</td><td>2.929</td><td>0.927</td><td>8.007<sup>***</sup></td><td>7.918<sup>***</sup></td><td>8.387<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(1.246)</td><td>(2.450)</td><td>(2.462)</td><td>(2.510)</td><td>(2.906)</td><td>(2.903)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Observations</td><td>142</td><td>142</td><td>142</td><td>142</td><td>142</td><td>142</td></tr>
## <tr><td style="text-align:left">R<sup>2</sup></td><td>0.928</td><td>0.929</td><td>0.929</td><td>0.726</td><td>0.733</td><td>0.731</td></tr>
## <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.927</td><td>0.928</td><td>0.928</td><td>0.724</td><td>0.727</td><td>0.726</td></tr>
## <tr><td style="text-align:left">Residual Std. Error</td><td>7.266 (df = 140)</td><td>7.237 (df = 138)</td><td>7.236 (df = 138)</td><td>8.802 (df = 140)</td><td>8.764 (df = 138)</td><td>8.783 (df = 138)</td></tr>
## <tr><td style="text-align:left">F Statistic</td><td>1,797.132<sup>***</sup> (df = 1; 140)</td><td>604.889<sup>***</sup> (df = 3; 138)</td><td>605.163<sup>***</sup> (df = 3; 138)</td><td>371.784<sup>***</sup> (df = 1; 140)</td><td>126.062<sup>***</sup> (df = 3; 138)</td><td>125.234<sup>***</sup> (df = 3; 138)</td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="6" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
##Note: Through regression analysis, it is found
# 1. When "ya" is a dependent variable:
#   "yb" and "xa" are statistically significant for "ya" ("yb" is positively correlated, "xa" is negatively correlated).
# 2. When "yb" is a dependent variable:
#   "ya" and "xa" are statistically significant for "yb" (both "ya" and "xa" are positively correlated).
# 3. When "xa" is a dependent variable:
#   "ya", "yb" and "xb" are all statistically significant for "xa" ("yb" and "xb" are positively correlated, and "ya" are negatively correlated).
# 4. When "xb" is a dependent variable:
#   Only "xa" is statistically significant (positive correlation) to "xb".