library(rio)
linkToData='https://github.com/JhazminRios29/clase1/raw/master/DATA%20(3).xlsx'
data=import(linkToData)
#Visualizando
install.packages("ggplot2")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/3.6'
## (as 'lib' is unspecified)
library(ggplot2)
data
## Departamento
## 1 Amazonas
## 2 <NA>
## 3 <NA>
## 4 <NA>
## 5 <NA>
## 6 <NA>
## 7 <NA>
## 8 Áncash
## 9 <NA>
## 10 <NA>
## 11 <NA>
## 12 <NA>
## 13 <NA>
## 14 <NA>
## 15 <NA>
## 16 <NA>
## 17 <NA>
## 18 <NA>
## 19 <NA>
## 20 <NA>
## 21 <NA>
## 22 <NA>
## 23 <NA>
## 24 <NA>
## 25 <NA>
## 26 <NA>
## 27 <NA>
## 28 Apurímac
## 29 <NA>
## 30 <NA>
## 31 <NA>
## 32 <NA>
## 33 <NA>
## 34 <NA>
## 35 Arequipa
## 36 <NA>
## 37 <NA>
## 38 <NA>
## 39 <NA>
## 40 <NA>
## 41 <NA>
## 42 <NA>
## 43 Ayacucho
## 44 <NA>
## 45 <NA>
## 46 <NA>
## 47 <NA>
## 48 <NA>
## 49 <NA>
## 50 <NA>
## 51 <NA>
## 52 <NA>
## 53 <NA>
## 54 Cajamarca
## 55 <NA>
## 56 <NA>
## 57 <NA>
## 58 <NA>
## 59 <NA>
## 60 <NA>
## 61 <NA>
## 62 <NA>
## 63 <NA>
## 64 <NA>
## 65 <NA>
## 66 <NA>
## 67 Cusco
## 68 <NA>
## 69 <NA>
## 70 <NA>
## 71 <NA>
## 72 <NA>
## 73 <NA>
## 74 <NA>
## 75 <NA>
## 76 <NA>
## 77 <NA>
## 78 <NA>
## 79 <NA>
## 80 Huancavelica
## 81 <NA>
## 82 <NA>
## 83 <NA>
## 84 <NA>
## 85 <NA>
## 86 <NA>
## 87 Huánuco
## 88 <NA>
## 89 <NA>
## 90 <NA>
## 91 <NA>
## 92 <NA>
## 93 <NA>
## 94 <NA>
## 95 <NA>
## 96 <NA>
## 97 <NA>
## 98 Ica
## 99 <NA>
## 100 <NA>
## 101 <NA>
## 102 <NA>
## 103 Junín
## 104 <NA>
## 105 <NA>
## 106 <NA>
## 107 <NA>
## 108 <NA>
## 109 <NA>
## 110 <NA>
## 111 <NA>
## 112 La Libertad
## 113 <NA>
## 114 <NA>
## 115 <NA>
## 116 <NA>
## 117 <NA>
## 118 <NA>
## 119 <NA>
## 120 <NA>
## 121 <NA>
## 122 <NA>
## 123 <NA>
## 124 Lambayeque
## 125 <NA>
## 126 <NA>
## 127 Lima
## 128 <NA>
## 129 <NA>
## 130 <NA>
## 131 <NA>
## 132 <NA>
## 133 <NA>
## 134 <NA>
## 135 <NA>
## 136 <NA>
## 137 Loreto
## 138 <NA>
## 139 <NA>
## 140 <NA>
## 141 <NA>
## 142 <NA>
## 143 <NA>
## 144 Madre de Dios
## 145 <NA>
## 146 <NA>
## 147 Moquegua
## 148 <NA>
## 149 <NA>
## 150 Pasco
## 151 <NA>
## 152 <NA>
## 153 Piura
## 154 <NA>
## 155 <NA>
## 156 <NA>
## 157 <NA>
## 158 <NA>
## 159 <NA>
## 160 <NA>
## 161 Provincia Constitucional del Callao
## 162 Puno
## 163 <NA>
## 164 <NA>
## 165 <NA>
## 166 <NA>
## 167 <NA>
## 168 <NA>
## 169 <NA>
## 170 <NA>
## 171 <NA>
## 172 <NA>
## 173 <NA>
## 174 <NA>
## 175 San Martín
## 176 <NA>
## 177 <NA>
## 178 <NA>
## 179 <NA>
## 180 <NA>
## 181 <NA>
## 182 <NA>
## 183 <NA>
## 184 <NA>
## 185 Tacna
## 186 <NA>
## 187 <NA>
## 188 <NA>
## 189 Tumbes
## 190 <NA>
## 191 <NA>
## 192 Ucayali
## 193 <NA>
## 194 <NA>
## 195 <NA>
## Provincia MOR EV IDIO VIV
## 1 Bagua 16.9 12991 0.2454885630 0.7820969
## 2 Bongará 21.4 6410 0.0020556180 0.8425926
## 3 Chachapoyas 20.4 9427 0.0036320906 0.7907919
## 4 Condorcanqui 28.5 8281 0.9058878797 0.4505415
## 5 Luya 24.5 10576 0.0010222449 0.8763681
## 6 Rodríguez de Mendoza 14.3 6090 0.0010505051 0.8586932
## 7 Utcubamba 19.4 18253 0.0032615979 0.8273204
## 8 Aija 41.9 1144 0.3779717931 0.6075424
## 9 Antonio Raymondi 32.0 1875 0.7643605605 0.8699090
## 10 Asunción 32.6 1126 0.8087682181 0.8779180
## 11 Bolognesi 27.1 4850 0.1581143527 0.6994278
## 12 Carhuaz 22.8 7925 0.7340990252 0.8701681
## 13 Carlos Fermín Fitzcarrald 28.3 2312 0.9096199585 0.8851282
## 14 Casma 16.4 5469 0.1393454946 0.7758907
## 15 Corongo 20.7 1398 0.0946888072 0.7865223
## 16 Huaraz 23.0 17941 0.3640127137 0.7869267
## 17 Huari 24.9 6102 0.7821222391 0.8082139
## 18 Huarmey 13.6 3990 0.0652927508 0.8252869
## 19 Huaylas 19.1 9182 0.5725788814 0.7175274
## 20 Mariscal Luzuriaga 26.4 2352 0.9105105105 0.7826762
## 21 Ocros 12.9 1918 0.0851945660 0.6956188
## 22 Pallasca 37.6 3912 0.0051202711 0.6301375
## 23 Pomabamba 17.5 2899 0.8489637405 0.8483965
## 24 Recuay 26.6 2653 0.3650740987 0.7335432
## 25 Santa 12.9 40518 0.0250912287 0.8683386
## 26 Sihuas 25.6 5010 0.3689259207 0.9001767
## 27 Yungay 37.4 10704 0.7325770482 0.6822133
## 28 Abancay 17.5 13126 0.4831680758 0.6761962
## 29 Andahuaylas 18.3 22026 0.7375944608 0.8666699
## 30 Antabamba 41.2 2163 0.7750813330 0.7811671
## 31 Aymaraes 27.2 5544 0.7123125067 0.8014204
## 32 Chincheros 26.4 6506 0.8057715966 0.7471992
## 33 Cotabambas 24.3 4197 0.9037901565 0.9275256
## 34 Grau 30.2 3384 0.8156405419 0.8447617
## 35 Arequipa 16.1 72673 0.1412686418 0.7810000
## 36 Camaná 13.2 6420 0.1590370562 0.7933962
## 37 Caravelí 12.9 5472 0.1557947508 0.7403431
## 38 Castilla 23.9 5288 0.2012825474 0.7358333
## 39 Caylloma 23.4 13070 0.3542947885 0.6846604
## 40 Condesuyos 22.0 3220 0.2687614909 0.7354485
## 41 Islay 21.2 5145 0.1406876906 0.7931824
## 42 La Unión 25.1 3632 0.5969304229 0.7818935
## 43 Cangallo 22.4 6949 0.9026007922 0.9262534
## 44 Huamanga 25.7 29322 0.5054988954 0.8039439
## 45 Huanca Sancos 26.9 1976 0.8109442709 0.8330626
## 46 Huanta 24.8 17510 0.6742124171 0.8089886
## 47 La Mar 22.9 18895 0.8328085401 0.8376204
## 48 Lucanas 25.3 13127 0.4270301793 0.7431646
## 49 Parinacochas 43.4 5038 0.5528798411 0.7736300
## 50 Páucar del Sara Sara 19.0 2111 0.5048972537 0.7308979
## 51 Sucre 28.5 2009 0.7668411278 0.7486461
## 52 Víctor Fajardo 21.4 4803 0.8642361981 0.8678534
## 53 Vilcas Huamán 26.4 3763 0.8973769605 0.8804958
## 54 Cajabamba 16.9 14148 0.0008598703 0.8745239
## 55 Cajamarca 22.6 40794 0.0118220618 0.7919398
## 56 Celendín 31.3 15567 0.0008832105 0.8571364
## 57 Chota 19.6 28763 0.0032494662 0.9059354
## 58 Contumazá 22.1 4789 0.0010506694 0.8264287
## 59 Cutervo 18.5 25564 0.0008845849 0.8874671
## 60 Hualgayoc 18.6 33521 0.0011612475 0.9142621
## 61 Jaén 20.5 31288 0.0079981373 0.7721503
## 62 San Ignacio 21.5 30051 0.0087058416 0.8780383
## 63 San Marcos 31.8 9242 0.0033237175 0.8967720
## 64 San Miguel 19.0 10011 0.0012633405 0.8868788
## 65 San Pablo 34.8 3669 0.0013518553 0.8683527
## 66 Santa Cruz 21.0 7265 0.0005315037 0.8922003
## 67 Acomayo 44.0 5228 0.8754764431 0.8715876
## 68 Anta 19.5 11357 0.7038206140 0.8302409
## 69 Calca 23.5 14680 0.7027782745 0.8207821
## 70 Canas 45.1 6984 0.9175701352 0.9326131
## 71 Canchis 28.3 20764 0.5872454659 0.7773632
## 72 Chumbivílcas 30.9 18579 0.9123323101 0.9397103
## 73 Cusco 15.6 45726 0.1855929124 0.6414359
## 74 Espinar 30.6 14624 0.6912044266 0.8712463
## 75 La Convención 18.1 41311 0.4774174944 0.7750523
## 76 Paruro 37.5 6635 0.9229336919 0.8037265
## 77 Paucartambo 39.8 12553 0.8631771613 0.9016574
## 78 Quispicanchi 39.1 17269 0.7557503598 0.7749434
## 79 Urubamba 17.0 10234 0.5156139238 0.7145397
## 80 Acobamba 23.9 11726 0.8604281387 0.7919411
## 81 Angaraes 31.4 6918 0.7877539973 0.7805251
## 82 Castrovirreyna 27.5 3328 0.2240082825 0.7040927
## 83 Churcampa 28.8 8710 0.7922724986 0.6714034
## 84 Huancavelica 34.9 29363 0.5517743594 0.8102975
## 85 Huaytará 21.7 4460 0.2593673079 0.6407428
## 86 Tayacaja 24.8 19303 0.6523894850 0.8117733
## 87 Ambo 19.3 11297 0.2530078867 0.8516018
## 88 Dos de Mayo 36.3 6654 0.3995737269 0.8832404
## 89 Huacaybamba 27.1 3118 0.7898409007 0.9068943
## 90 Huamalíes 23.0 10215 0.5339549214 0.8395686
## 91 Huánuco 21.2 43889 0.2386471946 0.7637156
## 92 Lauricocha 24.7 5561 0.1045283477 0.8446608
## 93 Leoncio Prado 17.3 22959 0.0839951689 0.7658068
## 94 Marañón 36.8 4636 0.2482376431 0.9042024
## 95 Pachitea 27.8 12545 0.4712161920 0.8300414
## 96 Puerto Inca 21.6 7160 0.1029746680 0.7910094
## 97 Yarowilca 29.4 7333 0.6109631011 0.9328365
## 98 Chincha 10.3 19813 0.0254217703 0.8015418
## 99 Ica 10.8 26455 0.0536116152 0.8027519
## 100 Nazca 9.5 6844 0.0790800888 0.6350561
## 101 Palpa 10.3 1475 0.0745813052 0.8186654
## 102 Pisco 12.3 12991 0.0441888825 0.8039913
## 103 Chanchamayo 19.1 31021 0.1360817450 0.7317907
## 104 Chupaca 25.4 9664 0.0677872748 0.8066284
## 105 Concepción 26.8 9953 0.0718788737 0.7573213
## 106 Huancayo 21.5 62014 0.1220196990 0.6967929
## 107 Jauja 25.8 12803 0.0225185833 0.6880273
## 108 Junín 30.6 4851 0.1549227127 0.7165609
## 109 Satipo 25.8 41467 0.3122761060 0.7078938
## 110 Tarma 21.2 23438 0.0541062165 0.7267595
## 111 Yauli 17.4 4186 0.0401873138 0.4263194
## 112 Ascope 15.7 10365 0.0017166680 0.8201242
## 113 Bolívar 20.7 2665 0.0064419573 0.8456246
## 114 Chepén 14.7 7532 0.0016604342 0.7749433
## 115 Gran Chimú 20.8 5551 0.0009848059 0.8473810
## 116 Julcán 26.4 6484 0.0004594683 0.8718416
## 117 Otuzco 19.6 21457 0.0017051639 0.8742680
## 118 Pacasmayo 11.4 7278 0.0021413756 0.7984786
## 119 Pataz 28.8 11330 0.0099415448 0.8418966
## 120 Sánchez Carrión 29.2 23826 0.0007537729 0.8863797
## 121 Santiago de Chuco 19.6 7878 0.0039664887 0.7726470
## 122 Trujillo 12.9 79437 0.0036853646 0.7902482
## 123 Virú 14.6 8276 0.0039097235 0.7693884
## 124 Chiclayo 12.9 83813 0.0042001782 0.7536106
## 125 Ferreñafe 32.2 9828 0.2293893599 0.8732049
## 126 Lambayeque 21.4 24554 0.0049908075 0.8728926
## 127 Barranca 10.1 16800 0.0342238381 0.7189611
## 128 Cajatambo 25.6 1464 0.1389277983 0.6654389
## 129 Cañete 11.1 22462 0.0156617612 0.7618227
## 130 Canta 15.7 2417 0.0408067174 0.6646653
## 131 Huaral 10.8 21958 0.0362907629 0.7462452
## 132 Huarochirí 24.1 13879 0.0182021558 0.6474198
## 133 Huaura 14.0 23991 0.0212710280 0.7398813
## 134 Lima 10.9 712535 0.0675540589 0.7534620
## 135 Oyón 22.4 2400 0.0762067360 0.5778852
## 136 Yauyos 28.2 6432 0.0638053267 0.7346670
## 137 Alto Amazonas 26.8 18258 0.1386189527 0.8893598
## 138 Datem del Marañón 31.3 11400 0.4931814063 0.6593053
## 139 Loreto 32.6 10813 0.0924578116 0.9272697
## 140 Mariscal Ramón Castilla 31.7 11722 0.1008021826 0.9164586
## 141 Maynas 24.6 75219 0.0160466139 0.8906006
## 142 Requena 31.0 13933 0.0253699789 0.9288669
## 143 Ucayali 35.3 11989 0.1055518706 0.9209401
## 144 Manu 24.9 3417 0.4015916253 0.5874367
## 145 Tahuamanu 9.2 1519 0.1130099773 0.7265023
## 146 Tambopata 24.3 10675 0.1546845439 0.6611566
## 147 General Sánchez Cerro 11.8 4784 0.3953576385 0.7896082
## 148 Ilo 14.7 5936 0.1321813471 0.8324410
## 149 Mariscal Nieto 11.4 8625 0.2036139205 0.8044019
## 150 Daniel Alcides Carrión 29.1 7740 0.0898774899 0.5153672
## 151 Oxapampa 23.2 18198 0.1840493217 0.7471018
## 152 Pasco 19.4 17422 0.0509023209 0.6673730
## 153 Ayabaca 33.9 27718 0.0011256772 0.8468006
## 154 Huancabamba 41.3 23130 0.0014108279 0.9129753
## 155 Morropón 27.1 26194 0.0022834384 0.8980590
## 156 Paita 17.2 11520 0.0018055376 0.9020664
## 157 Piura 19.0 72998 0.0026703598 0.8783113
## 158 Sechura 28.3 8767 0.0015283804 0.9174438
## 159 Sullana 14.2 35192 0.0044356186 0.8710447
## 160 Talara 17.7 10933 0.0022111117 0.8094868
## 161 Provincia Constitucional del Callao 10.1 68672 0.0491503158 0.7803990
## 162 Azángaro 45.3 29045 0.8143051084 0.8824321
## 163 Carabaya 52.1 17722 0.8478336929 0.8383293
## 164 Chucuito 25.3 33382 0.7308936955 0.9222909
## 165 El Collao 25.0 25228 0.7733056583 0.9459967
## 166 Huancané 30.1 18352 0.8414976882 0.9359649
## 167 Lampa 32.6 8716 0.7497265608 0.8144226
## 168 Melgar 39.4 15420 0.7079639732 0.6830981
## 169 Moho 32.1 9309 0.8614209551 0.9267793
## 170 Puno 36.6 49731 0.5423945932 0.8165708
## 171 San Antonio de Putina 40.8 6486 0.6967688834 0.7798417
## 172 San Román 26.5 43853 0.3838350512 0.7510167
## 173 Sandia 35.3 20650 0.6438410210 0.8182663
## 174 Yunguyo 24.1 14742 0.6856776532 0.9506660
## 175 Bellavista 27.6 11889 0.0067929688 0.8646030
## 176 El Dorado 26.4 10078 0.0176345197 0.9048115
## 177 Huallaga 14.0 6206 0.0016223089 0.8867886
## 178 Lamas 21.2 22339 0.0523782907 0.9031284
## 179 Mariscal Cáceres 22.3 11402 0.0040200580 0.8243683
## 180 Moyobamba 18.5 23111 0.0112437198 0.8338765
## 181 Picota 20.9 8505 0.0030233022 0.8811598
## 182 Rioja 16.1 21100 0.0210254935 0.8017022
## 183 San Martín 21.1 27793 0.0057288225 0.8233855
## 184 Tocache 22.7 15621 0.0421823021 0.7458411
## 185 Candarave 28.2 2410 0.2174076393 0.8134683
## 186 Jorge Basadre 18.3 1118 0.2008483563 0.5631562
## 187 Tacna 14.7 37505 0.1902530347 0.8409904
## 188 Tarata 19.9 2449 0.1758477416 0.8082227
## 189 Contralmirante Villar 11.9 2355 0.0009486466 0.8924382
## 190 Tumbes 12.0 18498 0.0024576836 0.8463211
## 191 Zarumilla 15.0 7433 0.0034027850 0.8440782
## 192 Atalaya 47.1 6320 0.5077498664 0.7849683
## 193 Coronel Portillo 22.4 51278 0.0704480353 0.8644931
## 194 Padre Abad 13.2 11454 0.1047972220 0.8179934
## 195 Purús 31.3 522 0.6933174224 0.9089878
## Cap
## 1 0
## 2 0
## 3 1
## 4 0
## 5 0
## 6 0
## 7 0
## 8 0
## 9 0
## 10 0
## 11 0
## 12 0
## 13 0
## 14 0
## 15 0
## 16 1
## 17 0
## 18 0
## 19 0
## 20 0
## 21 0
## 22 0
## 23 0
## 24 0
## 25 0
## 26 0
## 27 0
## 28 1
## 29 0
## 30 0
## 31 0
## 32 0
## 33 0
## 34 0
## 35 1
## 36 0
## 37 0
## 38 0
## 39 0
## 40 0
## 41 0
## 42 0
## 43 0
## 44 1
## 45 0
## 46 0
## 47 0
## 48 0
## 49 0
## 50 0
## 51 0
## 52 0
## 53 0
## 54 0
## 55 1
## 56 0
## 57 0
## 58 0
## 59 0
## 60 0
## 61 0
## 62 0
## 63 0
## 64 0
## 65 0
## 66 0
## 67 0
## 68 0
## 69 0
## 70 0
## 71 0
## 72 0
## 73 1
## 74 0
## 75 0
## 76 0
## 77 0
## 78 0
## 79 0
## 80 0
## 81 0
## 82 0
## 83 0
## 84 1
## 85 0
## 86 0
## 87 0
## 88 0
## 89 0
## 90 0
## 91 1
## 92 0
## 93 0
## 94 0
## 95 0
## 96 0
## 97 0
## 98 0
## 99 1
## 100 0
## 101 0
## 102 0
## 103 0
## 104 0
## 105 0
## 106 1
## 107 0
## 108 0
## 109 0
## 110 0
## 111 0
## 112 0
## 113 0
## 114 0
## 115 0
## 116 0
## 117 0
## 118 0
## 119 0
## 120 0
## 121 0
## 122 1
## 123 0
## 124 1
## 125 0
## 126 0
## 127 0
## 128 0
## 129 0
## 130 0
## 131 0
## 132 0
## 133 0
## 134 1
## 135 0
## 136 0
## 137 0
## 138 0
## 139 0
## 140 0
## 141 1
## 142 0
## 143 0
## 144 0
## 145 0
## 146 1
## 147 0
## 148 0
## 149 1
## 150 0
## 151 0
## 152 1
## 153 0
## 154 0
## 155 0
## 156 0
## 157 1
## 158 0
## 159 0
## 160 0
## 161 1
## 162 0
## 163 0
## 164 0
## 165 0
## 166 0
## 167 0
## 168 0
## 169 0
## 170 1
## 171 0
## 172 0
## 173 0
## 174 0
## 175 0
## 176 0
## 177 0
## 178 0
## 179 0
## 180 1
## 181 0
## 182 0
## 183 0
## 184 0
## 185 0
## 186 0
## 187 1
## 188 0
## 189 0
## 190 1
## 191 0
## 192 0
## 193 1
## 194 0
## 195 0
base=ggplot(data=data, aes(x=EV, y=MOR))
scatter = base + geom_point()
scatter
#Calculando correlación
f1=formula(~MOR + EV)
# camino parametrico
pearsonf1=cor.test(f1,data=data)[c('estimate','p.value')]
pearsonf1
## $estimate
## cor
## -0.1586011
##
## $p.value
## [1] 0.02679163
# camino no parametrico
spearmanf1=cor.test(f1,data=data,method='spearman')[c('estimate','p.value')]
## Warning in cor.test.default(x = c(16.9, 21.4, 20.4, 28.5, 24.5, 14.3,
## 19.4, : Cannot compute exact p-value with ties
spearmanf1
## $estimate
## rho
## -0.1517232
##
## $p.value
## [1] 0.03423011
#Añadiendo otra variable
install.packages("scatterplot3d")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/3.6'
## (as 'lib' is unspecified)
library(scatterplot3d)
scatterplot3d(data[,c('IDIO','EV','MOR')])
base=ggplot(data=data, aes(x=EV, y=MOR))
base + geom_point(aes(color = IDIO))
#Correlaciones
f2=formula(~MOR+IDIO)
# camino parametrico
pearsonf2=cor.test(f2,data=data)[c('estimate','p.value')]
pearsonf2
## $estimate
## cor
## 0.5019944
##
## $p.value
## [1] 7.640369e-14
# camino no parametrico
spearmanf2=cor.test(f2,data=data, method='spearman')[c('estimate','p.value')]
## Warning in cor.test.default(x = c(16.9, 21.4, 20.4, 28.5, 24.5, 14.3,
## 19.4, : Cannot compute exact p-value with ties
spearmanf2
## $estimate
## rho
## 0.4472877
##
## $p.value
## [1] 5.549205e-11
#Correlaciones
f3=formula(~MOR+VIV)
# camino parametrico
pearsonf3=cor.test(f3,data=data)[c('estimate','p.value')]
pearsonf3
## $estimate
## cor
## 0.1347842
##
## $p.value
## [1] 0.06029368
# camino no parametrico
spearmanf3=cor.test(f3,data=data, method='spearman')[c('estimate','p.value')]
## Warning in cor.test.default(x = c(16.9, 21.4, 20.4, 28.5, 24.5, 14.3,
## 19.4, : Cannot compute exact p-value with ties
spearmanf3
## $estimate
## rho
## 0.1940656
##
## $p.value
## [1] 0.006558888
#Añadiendo capital
base=ggplot(data=data, aes(x=Cap, y=MOR))
base + geom_boxplot(notch = T)
## Warning: Continuous x aesthetic -- did you forget aes(group=...)?
#Graficando
install.packages("ggpubr")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/3.6'
## (as 'lib' is unspecified)
library(ggpubr)
## Loading required package: magrittr
ggerrorplot(data=data, x = "Cap", y = "MOR")
#verificando normalidad entre CAPITAL O NO
library(ggplot2)
ggplot(data,aes(x=MOR)) +
geom_histogram(aes(y = ..density..),bins = 20, fill='green') +
stat_function(fun = dnorm, colour = "red",
args = list(mean = mean(data$MOR, na.rm = TRUE),
sd = sd(data$MOR, na.rm = TRUE))) +
facet_grid(~Cap) +
coord_flip()
# se sugiere normalidad si los puntos no se alejan de la diagonal.
library(ggpubr)
ggqqplot(data=data,x="MOR") + facet_grid(. ~ Cap)
#INDICE DE SHAPIRO-WILL
library(knitr)
library(magrittr)
library(kableExtra)
f4=formula(MOR~Cap)
tablag= aggregate(f4, data,
FUN = function(x) {y <- shapiro.test(x); c(y$statistic, y$p.value)})
shapiroTest=as.data.frame(tablag[,2])
shapiroTest
## W V2
## 1 0.9749568 0.003614951
## 2 0.9204432 0.052438890
names(shapiroTest)=c("W","Prob")
kable(cbind(tablag[1],shapiroTest))%>%
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = F, position = "left")
| Cap | W | Prob |
|---|---|---|
| 0 | 0.9749568 | 0.0036150 |
| 1 | 0.9204432 | 0.0524389 |
#La hipótesis del Shapiro Wills
# HO = La muestra es normal.
#Si supera el p value de 0.05 se acepta la H0
#La prueba en este caso es inconsistente
#En consecuencia, tenemos que usar la prueba de Mann-Whitney
#para para testear la relación que poseen ambas variables
tf4=t.test(f4,data=data)[c('estimate','p.value')]
wilcoxf4=wilcox.test(f4,data=data)['p.value']
wilcoxf4
## $p.value
## [1] 0.001322102
###La prueba no paramétrica rechazaría la igualdad de valores medios
#Por tanto, la capital si influencia en la fórmula
#Viendo visualmente
library(ggplot2)
base=ggplot(data=data, aes(x=EV, y=MOR))
base + geom_point(aes(color = Cap))
#Añadiendo los valores de idioma y capital
base + geom_point(aes(color = IDIO)) + facet_grid(~Cap)
#Modelos
modelo1=formula(MOR~EV)
modelo2=formula(MOR ~ EV + IDIO)
modelo3= formula(MOR ~ EV + IDIO + VIV)
modelo4= formula
#comenzamos
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
reg1=lm(modelo1,data=data)
stargazer(reg1,type = "text",intercept.bottom = FALSE)
##
## ===============================================
## Dependent variable:
## ---------------------------
## MOR
## -----------------------------------------------
## Constant 24.383***
## (0.640)
##
## EV -0.00003**
## (0.00001)
##
## -----------------------------------------------
## Observations 195
## R2 0.025
## Adjusted R2 0.020
## Residual Std. Error 8.400 (df = 193)
## F Statistic 4.980** (df = 1; 193)
## ===============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
#produciendo Recta a través de la formula
library(ggplot2)
ggplot(data, aes(x=EV, y=MOR)) +
geom_point()+
geom_smooth(method=lm)
#Con IDIO
reg2=lm(modelo2,data=data)
stargazer(reg2,type = "text",intercept.bottom = FALSE)
##
## ===============================================
## Dependent variable:
## ---------------------------
## MOR
## -----------------------------------------------
## Constant 20.333***
## (0.759)
##
## EV -0.00002*
## (0.00001)
##
## IDIO 12.865***
## (1.636)
##
## -----------------------------------------------
## Observations 195
## R2 0.263
## Adjusted R2 0.255
## Residual Std. Error 7.325 (df = 192)
## F Statistic 34.173*** (df = 2; 192)
## ===============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
#Con VIV
reg3=lm(modelo3,data=data)
stargazer(reg3,type = "text",intercept.bottom = FALSE)
##
## ===============================================
## Dependent variable:
## ---------------------------
## MOR
## -----------------------------------------------
## Constant 13.711***
## (4.608)
##
## EV -0.00002*
## (0.00001)
##
## IDIO 12.653***
## (1.638)
##
## VIV 8.331
## (5.719)
##
## -----------------------------------------------
## Observations 195
## R2 0.271
## Adjusted R2 0.259
## Residual Std. Error 7.303 (df = 191)
## F Statistic 23.623*** (df = 3; 191)
## ===============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
#Checkeando que el error disminuya significativamente
tanova=anova(reg1,reg2)
stargazer(tanova,type = 'text',summary = F,
title = "Table de Análisis de Varianza")
##
## Table de Análisis de Varianza
## ===============================================
## Res.Df RSS Df Sum of Sq F Pr(> F)
## -----------------------------------------------
## 1 193 13,616.900
## 2 192 10,301.320 1 3,315.571 61.797 0
## -----------------------------------------------
#El H0 de anova es que los modelos (o medias) no difieren
#PR es 0, por lo que el H0 se rechaza
#Comparando entre modelos
tanova2=anova(reg2,reg3)
stargazer(tanova2,type = 'text',summary = F,
title = "Table de Análisis de Varianza 2")
##
## Table de Análisis de Varianza 2
## ==============================================
## Res.Df RSS Df Sum of Sq F Pr(> F)
## ----------------------------------------------
## 1 192 10,301.320
## 2 191 10,188.120 1 113.209 2.122 0.147
## ----------------------------------------------
#Entre los modelos
tanova=anova(reg1,reg2,reg3)
stargazer(tanova,type = 'text',summary = F,title = "Table de Análisis de Varianza")
##
## Table de Análisis de Varianza
## ===============================================
## Res.Df RSS Df Sum of Sq F Pr(> F)
## -----------------------------------------------
## 1 193 13,616.900
## 2 192 10,301.320 1 3,315.571 62.158 0
## 3 191 10,188.120 1 113.209 2.122 0.147
## -----------------------------------------------
#RESUMIENDO LOS RESULTADOS
library(stargazer)
stargazer(reg1,reg2,reg3, type = "text", title = "Modelos planteadas",
digits = 2, single.row = F,no.space = F,intercept.bottom = FALSE,
dep.var.caption="Variable dependiente:",
dep.var.labels="Tasa de Mortalidad Infantil",
covariate.labels=c("Constante","Empleo Vulnerable","Idioma",
"Vivienda propia","Capital"),
keep.stat = c("n","adj.rsq","ser"),df = F,
notes.label = "Notas:")
##
## Modelos planteadas
## =================================================
## Variable dependiente:
## -----------------------------
## Tasa de Mortalidad Infantil
## (1) (2) (3)
## -------------------------------------------------
## Constante 24.38*** 20.33*** 13.71***
## (0.64) (0.76) (4.61)
##
## Empleo Vulnerable -0.0000** -0.0000* -0.0000*
## (0.0000) (0.0000) (0.0000)
##
## Idioma 12.86*** 12.65***
## (1.64) (1.64)
##
## Vivienda propia 8.33
## (5.72)
##
## -------------------------------------------------
## Observations 195 195 195
## Adjusted R2 0.02 0.25 0.26
## Residual Std. Error 8.40 7.32 7.30
## =================================================
## Notas: *p<0.1; **p<0.05; ***p<0.01
#Graficamente
library(ggplot2)
library(sjPlot)
## Install package "strengejacke" from GitHub (`devtools::install_github("strengejacke/strengejacke")`) to load all sj-packages at once!
plot_models(reg1,reg2,reg3,vline.color = "grey",
m.labels=c("Modelo 1","Modelo 2","Modelo 3"))
#Una vez con tu modelo, necesitas comprobar que el modelo tenga validez
#Esto se logra a través de diversa pruebas
#Escogemos el 2 modelo
#Evaluando la linealidad
plot(reg2, 1)
#La linea debe ser horizontal
#Homocedasticidad
# linea roja debe tender a horizontal
plot(reg2, 3)
#Test de Breusch-Pagan
install.packages("lmtest")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/3.6'
## (as 'lib' is unspecified)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
# null: modelo homocedastico
bptest(reg2)
##
## studentized Breusch-Pagan test
##
## data: reg2
## BP = 2.1281, df = 2, p-value = 0.3451
#La probabilidad de homocedasticidad es alta (p-value mayor a 0.05),
#de ahi que se acepte que el modelo muestre homocedasticidad.
#ESTA ES UNA PRIMERA INDICACIÓN QUE LA REGRESIÓN PRESENTA ERRORES
#Normalidad de los residuos
# puntos cerca a la diagonal
plot(reg2, 2)
#Shapiro Test para los residuos
shapiro.test(reg2$residuals)
##
## Shapiro-Wilk normality test
##
## data: reg2$residuals
## W = 0.969, p-value = 0.0002617
#No existe normalidad.
#No multicolinelidad
install.packages("DescTools")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/3.6'
## (as 'lib' is unspecified)
library(DescTools)
VIF(reg2) # > 5 es problematico
## EV IDIO
## 1.012917 1.012917
#Valores influyentes
plot(reg2, 5)
#REcuperando los casos influyentes
checkReg2=as.data.frame(influence.measures(reg2)$is.inf)
head(checkReg2)
## dfb.1_ dfb.EV dfb.IDIO dffit cov.r cook.d hat
## 1 FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 3 FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 4 FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 5 FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 6 FALSE FALSE FALSE FALSE FALSE FALSE FALSE
#Checkeando
checkReg2[checkReg2$cook.d | checkReg2$hat,]
## dfb.1_ dfb.EV dfb.IDIO dffit cov.r cook.d hat
## 134 FALSE TRUE FALSE TRUE TRUE TRUE TRUE