library(ggplot2)
library(emmeans)
library(knitr)

1 Criação do Banco de Dados

semente<-130 #Nr_da_residencia
nr_mat<-1525#Numero de Matricula
idade<-19 # Idade
set.seed(2)
Y1 <- matrix(rnorm(180,nr_mat/2,nr_mat *(20/100)),nrow=180,ncol=1)
Y2 <- matrix(rnorm(20,nr_mat,nr_mat*(28/100)),nrow=20,ncol=1)
Y <- rbind(Y1,Y2)
X1 <- matrix(rnorm(180,idade,idade/4),nrow=180,ncol=1)
X2 <- matrix(rnorm(20,idade,idade*(75/100)),nrow=20,ncol=1)
X <-rbind(X1,X2)
rho <- (01/100)
Y = Y
X = rho*Y+sqrt(1-rho^2)*X
cor(X,Y)
##           [,1]
## [1,] 0.5757991
plot(X,Y)

dados <-data.frame(1:200, coll1=c(X), coll2=c(Y))
str(dados)
## 'data.frame':    200 obs. of  3 variables:
##  $ X1.200: int  1 2 3 4 5 6 7 8 9 10 ...
##  $ coll1 : num  25.3 22.3 45.1 24.2 21.8 ...
##  $ coll2 : num  489 819 1247 418 738 ...
dados
##     X1.200    coll1      coll2
## 1        1 25.30381  488.94106
## 2        2 22.34521  818.87900
## 3        3 45.10306 1246.79283
## 4        4 24.21522  417.73542
## 5        5 21.78838  738.02321
## 6        6 28.85107  802.88819
## 7        7 28.20433  978.42619
## 8        8 24.22795  689.39210
## 9        9 35.52401 1367.76455
## 10      10 27.29925  720.16996
## 11      11 32.77231  889.88348
## 12      12 27.12739 1061.93460
## 13      13 33.95788  642.72792
## 14      14 16.64097  445.40096
## 15      15 32.71857 1306.07983
## 16      16 21.71963   57.62393
## 17      17 35.07368 1030.47440
## 18      18 20.44229  773.42105
## 19      19 24.30022 1071.41275
## 20      20 35.90139  894.34087
## 21      21 34.89215 1400.19986
## 22      22 26.36282  396.52263
## 23      23 27.70496 1247.33965
## 24      24 41.72358 1358.66875
## 25      25 26.97368  764.00602
## 26      26 21.37531   14.72955
## 27      27 27.65285  908.05738
## 28      28 29.05156  580.54976
## 29      29 27.26524 1004.12200
## 30      30 38.23360  850.83920
## 31      31 36.34967  987.87627
## 32      32 18.31432  859.78292
## 33      33 26.96472 1090.73013
## 34      34 25.61512  675.83190
## 35      35 16.07772  525.61404
## 36      36 19.80362  580.82355
## 37      37 18.64930  236.07617
## 38      38 26.16395  487.21173
## 39      39 25.37453  591.98612
## 40      40 23.08009  687.31367
## 41      41 25.06568  645.50620
## 42      42 34.36809  164.97353
## 43      43 29.62749  505.77996
## 44      44 35.04054 1343.08198
## 45      45 32.22322  952.36065
## 46      46 30.01212 1369.73073
## 47      47 26.18133  669.32746
## 48      48 27.68414  734.79251
## 49      49 23.80892  706.33076
## 50      50 20.79240  396.87583
## 51      51 26.44949  506.82242
## 52      52 31.00200 1392.72191
## 53      53 24.06666  591.01465
## 54      54 28.99722 1151.59323
## 55      55 27.43514  442.99035
## 56      56 22.92538  162.90714
## 57      57 24.19663  663.99382
## 58      58 28.12429 1047.93807
## 59      59 34.75554 1109.96509
## 60      60 30.90876 1272.34372
## 61      61 27.54890  217.08613
## 62      62 32.35216 1382.02897
## 63      63 22.95006  548.04098
## 64      64 18.01192  810.74025
## 65      65 25.09477  916.90161
## 66      66 17.47607  512.40149
## 67      67 29.70019  152.85167
## 68      68 27.35457  616.31576
## 69      69 29.38601  788.17487
## 70      70 23.71854  489.37658
## 71      71 10.89401  481.51092
## 72      72 27.40809  863.28710
## 73      73 26.21448  719.29345
## 74      74 22.13043  895.12857
## 75      75 27.65203  746.11460
## 76      76 31.48168  485.83134
## 77      77 30.43105 1160.07123
## 78      78 33.86750  997.89588
## 79      79 27.93414 1083.52031
## 80      80 22.64282  332.43831
## 81      81 39.33141 1066.27530
## 82      82 24.53703  245.29170
## 83      83 31.07388  599.82150
## 84      84 19.45483  343.95782
## 85      85 26.30365   89.08447
## 86      86 40.12593 1318.24737
## 87      87 30.20139  563.21501
## 88      88 25.78512  675.67223
## 89      89 31.64473  644.48037
## 90      90 27.36180  880.44197
## 91      91 26.14395 1250.61921
## 92      92 34.54133 1275.25226
## 93      93 23.43756  401.50005
## 94      94 21.37423  348.17054
## 95      95 29.07699  301.13541
## 96      96 21.12022  380.30301
## 97      97 34.58857 1360.10391
## 98      98 16.68505  764.83199
## 99      99 17.54462  505.50236
## 100    100 21.53948  579.14617
## 101    101 28.39016 1090.21012
## 102    102 25.92037  841.98234
## 103    103 29.86584  666.64705
## 104    104 15.38237  533.86282
## 105    105 27.47202  499.52951
## 106    106 36.62547 1387.15229
## 107    107 30.41631 1049.17562
## 108    108 37.12592 1375.14957
## 109    109 31.23548  633.98106
## 110    110 22.31861  655.49550
## 111    111 25.93824  449.14892
## 112    112 23.63608  686.09167
## 113    113 25.67857  906.41714
## 114    114 21.39282 1176.97665
## 115    115 25.64003  934.57142
## 116    116 30.27250  901.57393
## 117    117 36.15995 1137.94087
## 118    118 30.68181 1112.37674
## 119    119 27.36889  795.01240
## 120    120 23.28205  523.58842
## 121    121 28.04156 1141.06595
## 122    122 32.22323  804.85182
## 123    123 36.21900 1284.24263
## 124    124 27.04214  631.15450
## 125    125 24.84103  444.00998
## 126    126 30.78080  926.46175
## 127    127 29.18333  558.27627
## 128    128 37.63783  957.33571
## 129    129 28.50379  236.68310
## 130    130 19.82853  231.05883
## 131    131 33.23082  972.89027
## 132    132 27.21706  863.44377
## 133    133 28.36065 1028.17565
## 134    134 23.93822  147.54510
## 135    135 35.20710 1132.33663
## 136    136 28.82648 1128.65088
## 137    137 33.40895 1077.28084
## 138    138 28.92072 1002.35513
## 139    139 40.14893 1406.07242
## 140    140 24.31629  319.08800
## 141    141 25.67668  584.65333
## 142    142 29.71301  887.46581
## 143    143 18.04081  516.37060
## 144    144 27.98458  788.59288
## 145    145 32.32062  990.10417
## 146    146 23.43611  563.12972
## 147    147 27.11307  962.91732
## 148    148 32.97240  930.22232
## 149    149 28.09747  516.44755
## 150    150 25.40022  458.29919
## 151    151 36.51455 1060.14664
## 152    152 29.27284  710.82593
## 153    153 27.06929  982.76849
## 154    154 31.40240  504.95232
## 155    155 41.58325 1152.07457
## 156    156 29.17450  352.85128
## 157    157 24.48764  995.92890
## 158    158 31.99785  904.08178
## 159    159 30.93197  844.23795
## 160    160 29.24525  966.09442
## 161    161 35.22698  884.03252
## 162    162 31.43807  567.88834
## 163    163 24.10351  680.84756
## 164    164 25.35752  872.26327
## 165    165 13.66656  362.07584
## 166    166 18.51499  492.88927
## 167    167 30.78983 1396.01391
## 168    168 25.79447  122.23618
## 169    169 21.32579  384.75568
## 170    170 29.84281 1064.58209
## 171    171 29.10384 1094.54187
## 172    172 40.03756 1018.65494
## 173    173 22.98051  779.84188
## 174    174 33.45710  861.28281
## 175    175 23.70121  486.57606
## 176    176 21.48845  563.58393
## 177    177 24.63802  682.45134
## 178    178 35.80502  477.42783
## 179    179 29.87962 1012.95417
## 180    180 25.24207  267.10095
## 181    181 23.75446 1085.01763
## 182    182 18.33397  986.15618
## 183    183 37.97055 1692.46284
## 184    184 19.22926 1041.87586
## 185    185 55.33153 1757.34969
## 186    186 16.84098 2027.41202
## 187    187 35.01454 1535.77260
## 188    188 36.04194 1744.96186
## 189    189 51.59208 1245.69513
## 190    190 48.21501 1740.05513
## 191    191 31.42974  981.80509
## 192    192 53.00897 1492.21872
## 193    193 45.63687  950.54863
## 194    194 45.53491 1411.28240
## 195    195 37.23394 1989.38940
## 196    196 42.12554 1824.14245
## 197    197 18.31620 1335.94169
## 198    198 46.95115 1188.30197
## 199    199 33.86553 1159.15677
## 200    200 44.15082 1206.27908
summary(dados)
##      X1.200           coll1           coll2        
##  Min.   :  1.00   Min.   :10.89   Min.   :  14.73  
##  1st Qu.: 50.75   1st Qu.:24.17   1st Qu.: 531.80  
##  Median :100.50   Median :27.69   Median : 843.11  
##  Mean   :100.50   Mean   :28.70   Mean   : 835.52  
##  3rd Qu.:150.25   3rd Qu.:32.33   3rd Qu.:1078.84  
##  Max.   :200.00   Max.   :55.33   Max.   :2027.41
head(dados)
##   X1.200    coll1     coll2
## 1      1 25.30381  488.9411
## 2      2 22.34521  818.8790
## 3      3 45.10306 1246.7928
## 4      4 24.21522  417.7354
## 5      5 21.78838  738.0232
## 6      6 28.85107  802.8882

1.1 Analise Grafica: com Outlyers

ggplot(dados)+
  aes(x= coll1)+
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

2 Outlyers

2.1 Analise Outlyers

out1<-boxplot.stats(dados$coll1)$out
out2<-boxplot.stats(dados$coll2)$out
View(out1)
View(out2)

3 Retirar Outlyers

dados$coll2[dados$coll2 == "out2"] <- mean(dados$coll2)
dados$coll1[dados$coll1 == "out1"] <- mean(dados$coll1)
boxplot(dados$coll1)

boxplot(dados$coll2)

## Analise Grafica: sem Outlyers

ggplot(dados)+
  aes(x= coll1)+
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

4 Grafico de Dispersão

ggplot(dados)+ aes(x= coll1, y= coll2)+ geom_point()

## Valor da Correlação

dados$X1.200 <- NULL
cor(dados, method = "pearson", use = "complete.obs")
##           coll1     coll2
## coll1 1.0000000 0.5757991
## coll2 0.5757991 1.0000000

4.1 Ajustar Modelo de Regreção

 regres = lm(coll1 ~ coll2, data = dados) 
 summary(regres)
## 
## Call:
## lm(formula = coll1 ~ coll2, data = dados)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -24.6979  -3.1851  -0.4478   3.0673  18.4769 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 19.691589   1.001280   19.67   <2e-16 ***
## coll2        0.010776   0.001087    9.91   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.952 on 198 degrees of freedom
## Multiple R-squared:  0.3315, Adjusted R-squared:  0.3282 
## F-statistic: 98.21 on 1 and 198 DF,  p-value: < 2.2e-16

4.2 Analise de Residuos

ggplot(dados, aes(x = coll1, y = coll2)) +
  geom_point() +
  geom_hline(yintercept = 0) +
  labs(x = "Índice", y = "Resíduos")