Leyendo la data(URL)

link1 <- "https://raw.githubusercontent.com/Hurtado20161224/Proyectoaplicativo/master/dataVictimizacion2.csv"
bd = read.csv(link1,header=TRUE,na.strings = "-",quote = "",stringsAsFactors = FALSE,row.names =NULL,encoding = "utf-08")

bd$DEPARTAMENTO <- as.factor(bd$DEPARTAMENTO)

levels(bd$DEPARTAMENTO) <- c("ANCASH","AMAZONAS","APURIMAC","AREQUIPA","AYACUCHO","CAJAMARCA","CUSCO","HUANUCO","HUANCAVELICA","ICA","JUNIN","LA LIBERTAD","LAMBAYEQUE","LIMA","LORETO","MADRE DE DIOS","MOQUEGUA","PASCO","PIURA","CALLAO","PUNO","SAN MARTIN","TACNA","TUMBES","UCAYALI")

head(bd)

##   Ubigeo DEPARTAMENTO    Distrito numero.de.victimizacion poblacion2016
## 1  10101     AMAZONAS CHACHAPOYAS                    4313         28937
## 2  10109     AMAZONAS    LA JALCA                     213          5503
## 3  10201     AMAZONAS       BAGUA                    2114         26005
## 4  10202     AMAZONAS    ARAMANGO                     246         10969
## 5  10205     AMAZONAS       IMAZA                     123         24201
## 6  10206     AMAZONAS     LA PECA                     111          8019
##   victimas.en.su.distrito victimas.en.su.barrio vigilancia.zona
## 1                     867                   401            4228
## 2                      25                     9             213
## 3                     432                   175            2078
## 4                      34                    18             245
## 5                      26                    13             123
## 6                      12                    12             108
##   vigilancia.policia.nacional presencia.de.policia.nacional
## 1                        1596                          1306
## 2                          60                            69
## 3                         583                           613
## 4                         114                            56
## 5                          32                            46
## 6                          34                            77
##   confianza.segurirdad.municipal confianza.en.poder.judicial
## 1                           4229                        4115
## 2                            209                         191
## 3                           2067                        1968
## 4                            243                         227
## 5                            122                         112
## 6                            109                          95
##   confianza.en.ministerio.publico
## 1                            4121
## 2                             195
## 3                            1973
## 4                             227
## 5                             112
## 6                             100

Eliminacion de variables que presentan problemas de multicolinealidad

tail(bd)

##     Ubigeo DEPARTAMENTO   Distrito numero.de.victimizacion poblacion2016
## 656 250107      UCAYALI   MANANTAY                    3146         81150
## 657 250201      UCAYALI   RAYMONDI                    1059         34213
## 658 250202      UCAYALI    SEPAHUA                     305          8939
## 659 250301      UCAYALI PADRE ABAD                    1028         26132
## 660 250302      UCAYALI    IRAZOLA                     716         10527
## 661 250303      UCAYALI   CURIMANA                      37          8697
##     victimas.en.su.distrito victimas.en.su.barrio vigilancia.zona
## 656                     906                   402            3110
## 657                     276                    86            1043
## 658                      69                    23             302
## 659                     315                    98            1019
## 660                     189                    50             716
## 661                      11                    11              37
##     vigilancia.policia.nacional presencia.de.policia.nacional
## 656                         707                          1239
## 657                         222                           370
## 658                          90                            93
## 659                         165                           318
## 660                         210                           203
## 661                          10                            27
##     confianza.segurirdad.municipal confianza.en.poder.judicial
## 656                           3087                        2995
## 657                           1032                         994
## 658                            290                         273
## 659                           1008                         964
## 660                            692                         671
## 661                             34                          35
##     confianza.en.ministerio.publico
## 656                            2983
## 657                             991
## 658                             267
## 659                             959
## 660                             662
## 661                              34

colnames(bd)

##  [1] "Ubigeo"                          "DEPARTAMENTO"                   
##  [3] "Distrito"                        "numero.de.victimizacion"        
##  [5] "poblacion2016"                   "victimas.en.su.distrito"        
##  [7] "victimas.en.su.barrio"           "vigilancia.zona"                
##  [9] "vigilancia.policia.nacional"     "presencia.de.policia.nacional"  
## [11] "confianza.segurirdad.municipal"  "confianza.en.poder.judicial"    
## [13] "confianza.en.ministerio.publico"

bdtemp <- bd
bd <- bd[,-c(1,2,3,
             #4,
             5,8,13)] # Elimina ubigeo

Imputacion por la mediana

library(mlr)

## Loading required package: ParamHelpers

data_parametrica <- impute(bd, classes = list(factor = imputeMode(), 
                                                             integer = imputeMedian(),
                                                             numeric = imputeMedian()),
                           dummy.classes = c("integer","factor"), dummy.type = "numeric")
bd1=data_parametrica$data[,1:min(dim(bd))]
summary(bd1)

##  numero.de.victimizacion victimas.en.su.distrito victimas.en.su.barrio
##  Min.   :   15.0         Min.   :   2.0          Min.   :   1.00      
##  1st Qu.:   99.0         1st Qu.:  22.0          1st Qu.:  12.00      
##  Median :  236.0         Median :  54.0          Median :  26.00      
##  Mean   :  691.8         Mean   : 220.2          Mean   :  89.25      
##  3rd Qu.:  591.0         3rd Qu.: 170.0          3rd Qu.:  64.00      
##  Max.   :14334.0         Max.   :5143.0          Max.   :2115.00      
##  vigilancia.policia.nacional presencia.de.policia.nacional
##  Min.   :   1.0              Min.   :   8.0               
##  1st Qu.:  17.0              1st Qu.:  51.0               
##  Median :  52.5              Median :  93.0               
##  Mean   : 162.1              Mean   : 244.7               
##  3rd Qu.: 135.0              3rd Qu.: 196.0               
##  Max.   :3596.0              Max.   :5244.0               
##  confianza.segurirdad.municipal confianza.en.poder.judicial
##  Min.   :   15.0                Min.   :   13.0            
##  1st Qu.:   95.0                1st Qu.:   89.0            
##  Median :  228.0                Median :  214.0            
##  Mean   :  677.4                Mean   :  642.7            
##  3rd Qu.:  582.0                3rd Qu.:  549.0            
##  Max.   :14208.0                Max.   :13902.0

dim(bd1)

## [1] 661   7

tem.num.vict <- bd1$numero.de.victimizacion
bd1 <- bd1[,-1]

Reduccion de variables para formar latentes

library(psych)
#
# Matriz de correlaciÃ³n
corrMatrix<-lowerCor(bd1)

##                                vctms.n.s.d vctms.n.s.b vgl.. pr... cnf..
## victimas.en.su.distrito        1.00                                     
## victimas.en.su.barrio          0.99        1.00                         
## vigilancia.policia.nacional    0.85        0.84        1.00             
## presencia.de.policia.nacional  0.98        0.98        0.83  1.00       
## confianza.segurirdad.municipal 0.98        0.97        0.90  0.98  1.00 
## confianza.en.poder.judicial    0.98        0.97        0.90  0.98  1.00 
##                                cn...
## victimas.en.su.distrito             
## victimas.en.su.barrio               
## vigilancia.policia.nacional         
## presencia.de.policia.nacional       
## confianza.segurirdad.municipal      
## confianza.en.poder.judicial    1.00

#
cat("NÂº de variables que no correlacionan bien: ",
    sum( sapply(as.data.frame(corrMatrix), function(x) all(abs(x) < 0.3)) ) )

## NÂº de variables que no correlacionan bien:  0

#
cat("NÂº de variables que podrian causar multicolinealidad: ",
    sum( sapply(as.data.frame(corrMatrix), function(x) any(abs(x) >= 0.9 & abs(x) != 1)) ) )

## NÂº de variables que podrian causar multicolinealidad:  5

Realizacion del analisis factorial que nos sugiere formar dos latentes

library(usdm)

## Loading required package: sp

## Loading required package: raster

## 
## Attaching package: 'raster'

## The following object is masked from 'package:mlr':
## 
##     resample

## The following object is masked from 'package:ParamHelpers':
## 
##     getValues

vifstep(bd1,th=10)

## 4 variables from the 6 input variables have collinearity problem: 
##  
## confianza.segurirdad.municipal confianza.en.poder.judicial victimas.en.su.barrio victimas.en.su.distrito 
## 
## After excluding the collinear variables, the linear correlation coefficients ranges between: 
## min correlation ( presencia.de.policia.nacional ~ vigilancia.policia.nacional ):  0.8287628 
## max correlation ( presencia.de.policia.nacional ~ vigilancia.policia.nacional ):  0.8287628 
## 
## ---------- VIFs of the remained variables -------- 
##                       Variables      VIF
## 1   vigilancia.policia.nacional 3.193335
## 2 presencia.de.policia.nacional 3.193335

# Prueba de KMO
KMO(bd1)[1:2]

## $MSA
## [1] 0.7569126
## 
## $MSAi
##        victimas.en.su.distrito          victimas.en.su.barrio 
##                      0.7325947                      0.7247999 
##    vigilancia.policia.nacional  presencia.de.policia.nacional 
##                      0.7261371                      0.7326029 
## confianza.segurirdad.municipal    confianza.en.poder.judicial 
##                      0.7807750                      0.8512290

# Prueba de esfericidad de Bartlett  
psych::mardia(bd1,na.rm = TRUE,plot=TRUE)

## Call: psych::mardia(x = bd1, na.rm = TRUE, plot = TRUE)
## 
## Mardia tests of multivariate skew and kurtosis
## Use describe(x) the to get univariate tests
## n.obs = 661   num.vars =  6 
## b1p =  188.75   skew =  20794.43  with probability =  0
##  small sample skew =  20915.89  with probability =  0
## b2p =  426.32   kurtosis =  496.36  with probability =  0

cortest.bartlett(bd1)$p.value<0.05

## R was not square, finding R from data

## [1] TRUE

sedimentacion= princomp(bd1,score= TRUE, cor= TRUE)
plot(sedimentacion,type="lines")

Por medio de la rotacion varimax,evidenciamos la formacion de dos variables latente.En este caso,se usa este medio de rotacion para un mejor reparto de la proporcion de variacion explicada

# factorial
mod1 = fa(bd1,2,rotate="varimax", scores=T)

## The estimated weights for the factor scores are probably incorrect.  Try a different factor extraction method.

print(mod1,digits=2,cut = 0.2)

## Factor Analysis using method =  minres
## Call: fa(r = bd1, nfactors = 2, rotate = "varimax", scores = T)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                                 MR1  MR2   h2     u2 com
## victimas.en.su.distrito        0.83 0.54 0.98 0.0169 1.7
## victimas.en.su.barrio          0.85 0.51 0.98 0.0208 1.7
## vigilancia.policia.nacional    0.50 0.81 0.89 0.1056 1.7
## presencia.de.policia.nacional  0.85 0.51 0.99 0.0143 1.6
## confianza.segurirdad.municipal 0.76 0.65 1.00 0.0024 2.0
## confianza.en.poder.judicial    0.76 0.65 1.00 0.0037 2.0
## 
##                        MR1  MR2
## SS loadings           3.53 2.31
## Proportion Var        0.59 0.38
## Cumulative Var        0.59 0.97
## Proportion Explained  0.60 0.40
## Cumulative Proportion 0.60 1.00
## 
## Mean item complexity =  1.8
## Test of the hypothesis that 2 factors are sufficient.
## 
## The degrees of freedom for the null model are  15  and the objective function was  20.7 with Chi Square of  13606.5
## The degrees of freedom for the model are 4  and the objective function was  2.38 
## 
## The root mean square of the residuals (RMSR) is  0 
## The df corrected root mean square of the residuals is  0.01 
## 
## The harmonic number of observations is  661 with the empirical chi square  0.39  with prob <  0.98 
## The total number of observations was  661  with Likelihood Chi Square =  1559.43  with prob <  0 
## 
## Tucker Lewis Index of factoring reliability =  0.57
## RMSEA index =  0.77  and the 90 % confidence intervals are  0.736 0.8
## BIC =  1533.46
## Fit based upon off diagonal values = 1

puntaje <-mod1$scores

colnames(puntaje) <- c("victconf","cappol")

ormacion d elos clusters por medio Kmeans,basado en los scores

library(factoextra)

## Loading required package: ggplot2

## 
## Attaching package: 'ggplot2'

## The following objects are masked from 'package:psych':
## 
##     %+%, alpha

## Welcome! Related Books: `Practical Guide To Cluster Analysis in R` at https://goo.gl/13EFCZ

mod2  = eclust(puntaje,
                         FUNcluster ="kmeans",
                         k = 2, #nÃºmero de clusters -ESTO CAMBIA-
                         graph = F)

DEPARTAMENTO.scaled <- data.frame(puntaje, kmeans=mod2$cluster)

table(DEPARTAMENTO.scaled$kmeans)

## 
##   1   2 
## 624  37

aggregate(cbind(victconf,cappol) ~ kmeans, data=DEPARTAMENTO.scaled,FUN=mean)

##   kmeans    victconf     cappol
## 1      1 -0.09051543 -0.1889171
## 2      2  1.52653041  3.1860613

siluetas = mod2$silinfo$widths
siluetas[siluetas$sil_width<0,]

##     cluster neighbor   sil_width
## 21        2        1 -0.05908159
## 3         2        1 -0.06743154
## 46        2        1 -0.07247878
## 462       2        1 -0.11838391
## 236       2        1 -0.14698598
## 464       2        1 -0.16536916
## 43        2        1 -0.23447023
## 382       2        1 -0.23855867
## 216       2        1 -0.25531841
## 463       2        1 -0.32130389

Descargando el link del mapa en shapefile para ploteralo despues.Asimismo,la coloracion del mapa se baso en el porcentaje de alto riesgo,el cual el 5% a mÃ s sea asigna el color rojo para los depertamentos que tengan mayor inseguridad,ya que mÃ s del 5% de su poblacion ha sido vulnerada.En caso contrario,se asigno con color azul aquellos que no superasen en el 5%,pero esto significa que estos departamentos sean seguros,sino que son mÃ s seguros proporcionalmente si se les compara con otros departementos

URL <- "https://desarrollo.oefa.gob.pe/deam/stec/vigilancia/Limite_departamental.zip"

library(downloader)

tempdir = tempdir()
temp <- tempfile(fileext=".zip", tmpdir = tempdir)
downloader::download(url = URL, destfile = temp)
unzip(temp, exdir = tempdir)

x <- chartr("\\", "/", tempdir)

library(rgdal)

## rgdal: version: 1.3-6, (SVN revision 773)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 2.2.3, released 2017/11/20
##  Path to GDAL shared files: C:/Users/Marti/OneDrive/Documentos/R/win-library/3.5/rgdal/gdal
##  GDAL binary built with GEOS: TRUE 
##  Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
##  Path to PROJ.4 shared files: C:/Users/Marti/OneDrive/Documentos/R/win-library/3.5/rgdal/proj
##  Linking to sp version: 1.3-1

folder = tempdir
fileName = "BAS_LIM_DEPARTAMENTO.shp"
fileToRead = file.path(folder,fileName)
m <- rgdal::readOGR(dsn = fileToRead, stringsAsFactors=FALSE)

## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\Marti\AppData\Local\Temp\RtmpC0EQEw\BAS_LIM_DEPARTAMENTO.shp", layer: "BAS_LIM_DEPARTAMENTO"
## with 25 features
## It has 4 fields
## Integer64 fields read as strings:  COUNT

plot(m)

z <- data.frame(NOMBDEP = bdtemp$DEPARTAMENTO , clus = mod2$cluster)
head(z)

##    NOMBDEP clus
## 1 AMAZONAS    2
## 2 AMAZONAS    1
## 3 AMAZONAS    2
## 4 AMAZONAS    1
## 5 AMAZONAS    1
## 6 AMAZONAS    1

tail(z)

##     NOMBDEP clus
## 656 UCAYALI    1
## 657 UCAYALI    1
## 658 UCAYALI    1
## 659 UCAYALI    1
## 660 UCAYALI    1
## 661 UCAYALI    1

resul <- data.frame(bajo = table(z[z$clus==1,1]),
regular.alto = table(z[z$clus==2,1]))
n <- resul$bajo.Freq+resul$regular.alto.Freq
porc <- resul$regular.alto.Freq/n
porc <- round(porc,3)*100
resul$porc <- porc
resul

##        bajo.Var1 bajo.Freq regular.alto.Var1 regular.alto.Freq porc
## 1         ANCASH        22            ANCASH                 4 15.4
## 2       AMAZONAS        16          AMAZONAS                 3 15.8
## 3       APURIMAC        14          APURIMAC                 1  6.7
## 4       AREQUIPA        43          AREQUIPA                 0  0.0
## 5       AYACUCHO        29          AYACUCHO                 1  3.3
## 6      CAJAMARCA        26         CAJAMARCA                 1  3.7
## 7          CUSCO        35             CUSCO                 0  0.0
## 8        HUANUCO        19           HUANUCO                 1  5.0
## 9   HUANCAVELICA        12      HUANCAVELICA                 1  7.7
## 10           ICA        28               ICA                 1  3.4
## 11         JUNIN        47             JUNIN                 0  0.0
## 12   LA LIBERTAD        34       LA LIBERTAD                 3  8.1
## 13    LAMBAYEQUE        27        LAMBAYEQUE                 3 10.0
## 14          LIMA        76              LIMA                 2  2.6
## 15        LORETO        21            LORETO                 4 16.0
## 16 MADRE DE DIOS         5     MADRE DE DIOS                 1 16.7
## 17      MOQUEGUA         4          MOQUEGUA                 2 33.3
## 18         PASCO        22             PASCO                 0  0.0
## 19         PIURA        39             PIURA                 1  2.5
## 20        CALLAO         5            CALLAO                 2 28.6
## 21          PUNO        37              PUNO                 0  0.0
## 22    SAN MARTIN        39        SAN MARTIN                 2  4.9
## 23         TACNA         5             TACNA                 2 28.6
## 24        TUMBES        10            TUMBES                 1  9.1
## 25       UCAYALI         9           UCAYALI                 1 10.0

id <- rep(0,length(porc))
id[porc>=5]<-1
id

##  [1] 1 1 1 0 0 0 0 1 1 0 0 1 1 0 1 1 1 0 0 1 0 0 1 1 1

resul$id <- id
resul

##        bajo.Var1 bajo.Freq regular.alto.Var1 regular.alto.Freq porc id
## 1         ANCASH        22            ANCASH                 4 15.4  1
## 2       AMAZONAS        16          AMAZONAS                 3 15.8  1
## 3       APURIMAC        14          APURIMAC                 1  6.7  1
## 4       AREQUIPA        43          AREQUIPA                 0  0.0  0
## 5       AYACUCHO        29          AYACUCHO                 1  3.3  0
## 6      CAJAMARCA        26         CAJAMARCA                 1  3.7  0
## 7          CUSCO        35             CUSCO                 0  0.0  0
## 8        HUANUCO        19           HUANUCO                 1  5.0  1
## 9   HUANCAVELICA        12      HUANCAVELICA                 1  7.7  1
## 10           ICA        28               ICA                 1  3.4  0
## 11         JUNIN        47             JUNIN                 0  0.0  0
## 12   LA LIBERTAD        34       LA LIBERTAD                 3  8.1  1
## 13    LAMBAYEQUE        27        LAMBAYEQUE                 3 10.0  1
## 14          LIMA        76              LIMA                 2  2.6  0
## 15        LORETO        21            LORETO                 4 16.0  1
## 16 MADRE DE DIOS         5     MADRE DE DIOS                 1 16.7  1
## 17      MOQUEGUA         4          MOQUEGUA                 2 33.3  1
## 18         PASCO        22             PASCO                 0  0.0  0
## 19         PIURA        39             PIURA                 1  2.5  0
## 20        CALLAO         5            CALLAO                 2 28.6  1
## 21          PUNO        37              PUNO                 0  0.0  0
## 22    SAN MARTIN        39        SAN MARTIN                 2  4.9  0
## 23         TACNA         5             TACNA                 2 28.6  1
## 24        TUMBES        10            TUMBES                 1  9.1  1
## 25       UCAYALI         9           UCAYALI                 1 10.0  1

miniData = as.data.frame(resul[c(1,6)])

colnames(miniData) <- c("NOMBDEP","cluster")

id <- c(1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,0,0,1,1,1)
m@data$cluster <- id
m

## class       : SpatialPolygonsDataFrame 
## features    : 25 
## extent      : -203260.8, 1190991, 7964769, 9995733  (xmin, xmax, ymin, ymax)
## coord. ref. : +proj=utm +zone=18 +south +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0 
## variables   : 5
## names       :  NOMBDEP, COUNT, FIRST_IDDP,    HECTARES, cluster 
## min values  : AMAZONAS,   108,         01,    14140.95,       0 
## max values  :  UCAYALI,    94,         25, 37511598.87,       1

paleta=c("blue","red") #colores


color <- rep("blue",25)
color[m$cluster==0]<- "red"

plot(m,col='white',border=NA) 
plot(m,
     col=color,
     border='gray',add=T)

Aplicacion regresion lineal

subdata <- cbind(puntaje,tem.num.vict)

head(x = subdata, n = 30)

##          victconf       cappol tem.num.vict
##  [1,] -1.15035516  5.604313092         4313
##  [2,] -0.47018052 -0.008816447          213
##  [3,] -1.01392325  2.925062745         2114
##  [4,] -0.70044123  0.365144853          246
##  [5,] -0.40775879 -0.158570772          123
##  [6,] -0.13517816 -0.567812016          111
##  [7,] -0.47767106  0.020191398          179
##  [8,] -0.42464575  0.388547427          619
##  [9,] -0.48361093 -0.049547523          165
## [10,] -0.20338068 -0.370714001          183
## [11,]  0.09393278 -0.847816157          187
## [12,] -0.38724388 -0.076190089          262
## [13,] -0.31855450 -0.112014623          304
## [14,] -0.60401968  0.067797969          108
## [15,] -0.50669779  0.208570019          334
## [16,] -1.55935232  4.218679292         2623
## [17,] -0.66909385  0.811032617          698
## [18,] -0.42706478  0.119688246          346
## [19,] -0.24135224 -0.149123085          361
## [20,]  1.03319633  2.556108218         3292
## [21,]  1.04615399  2.179289636         3119
## [22,] -0.47063002 -0.056273214          134
## [23,] -0.44851881 -0.063498943          135
## [24,] -0.52495990  0.050336435          160
## [25,] -0.16751657 -0.627511301           33
## [26,] -0.46609834  0.191566774          388
## [27,] -0.52387977  0.045761694          173
## [28,] -0.32594542 -0.272256876          117
## [29,] -0.34603033 -0.226487090          145
## [30,]  0.03704827 -0.796320543           77

colnames(subdata)[3]="numero.de.victimizacion"

modelo1 <- glm(formula = numero.de.victimizacion ~ ., family = "gaussian", 
               data = as.data.frame(subdata))
modelo1

## 
## Call:  glm(formula = numero.de.victimizacion ~ ., family = "gaussian", 
##     data = as.data.frame(subdata))
## 
## Coefficients:
## (Intercept)     victconf       cappol  
##       691.8       1018.4        821.9  
## 
## Degrees of Freedom: 660 Total (i.e. Null);  658 Residual
## Null Deviance:       1.137e+09 
## Residual Deviance: 5447000   AIC: 7844

Residios cercanos acero: Cumple este requisito

mean(modelo1$residuals)

## [1] -1.218486e-12

Puntos influyentes y heteroceasticidad:Se observa la formacion de un embudo en las graficas de la parte izquierda.AdemÃ s,en la derecha se observan distribuciones parece no haber un comportamiento normal

par(mfrow=c(2,2))
plot(modelo1)

Baja correlacion de las variables independientes con los residuos

corr <- rep(0,2)
bd <- subdata
for(i in 2){
  x1 <- modelo1$residuals
  y1 <- bd[,i]

  t<-cor.test(x1,y1)
  corr[i]<-t$p.value
}
corr

## [1] 0 1

Existe una correlcion entre los residuos con la primera latente

No multicolinealidad

library(car)

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:usdm':
## 
##     vif

## The following object is masked from 'package:psych':
## 
##     logit

car::vif(modelo1)

## victconf   cappol 
##  1.00271  1.00271

No se presenta la multicolinealidad

Normalidad de los residuos(Shapiro-Wilk)

shapiro.test(modelo1$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  modelo1$residuals
## W = 0.74156, p-value < 2.2e-16

x <- modelo1$residuals 
h<-hist(x) 
xfit<-seq(min(x),max(x),length=40) 
yfit<-dnorm(xfit,mean=mean(x),sd=sd(x)) 
yfit <- yfit*diff(h$mids[1:2])*length(x) 
lines(xfit, yfit, col="blue", lwd=2)

Se verifica graficamente no hay normalidad de los residuos

Creacio de una subdata y recodificacion de la variable dependiente para aplicar una regresion logistica ordinal

bd4=subdata
colnames(bd4)[3]<-"seguridad"
head(bd4)

##        victconf       cappol seguridad
## [1,] -1.1503552  5.604313092      4313
## [2,] -0.4701805 -0.008816447       213
## [3,] -1.0139233  2.925062745      2114
## [4,] -0.7004412  0.365144853       246
## [5,] -0.4077588 -0.158570772       123
## [6,] -0.1351782 -0.567812016       111

bd4 <- as.data.frame(bd4)

bp=c(-Inf,99,236,591,Inf)
names <-c(" Muy inseguro", "inseguro", "seguro","Muy seguro")
bd4$seguridadcod <- cut(bd4$seguridad ,breaks =bp,labels = names)
head(bd4)

##     victconf       cappol seguridad seguridadcod
## 1 -1.1503552  5.604313092      4313   Muy seguro
## 2 -0.4701805 -0.008816447       213     inseguro
## 3 -1.0139233  2.925062745      2114   Muy seguro
## 4 -0.7004412  0.365144853       246       seguro
## 5 -0.4077588 -0.158570772       123     inseguro
## 6 -0.1351782 -0.567812016       111     inseguro

library("oglmx")

## Loading required package: maxLik

## Loading required package: miscTools

## 
## Please cite the 'maxLik' package as:
## Henningsen, Arne and Toomet, Ott (2011). maxLik: A package for maximum likelihood estimation in R. Computational Statistics 26(3), 443-458. DOI 10.1007/s00180-010-0217-1.
## 
## If you have questions, suggestions, or comments regarding the 'maxLik' package, please use a forum or 'tracker' at maxLik's R-Forge site:
## https://r-forge.r-project.org/projects/maxlik/

model2=ologit.reg(seguridadcod ~ .,
                  data=bd4)

## Warning in model.response(mf, "numeric"): using type = "numeric" with a
## factor response will be ignored

summary(model2)

## Ordered Logit Regression 
## Log-Likelihood: -0.006012488 
## No. Iterations: 300 
## McFadden's R2: 0.9999934 
## AIC: 12.01202 
##           Estimate Std. error t value Pr(>|t|)
## victconf  -112.835   2340.435 -0.0482   0.9615
## cappol     -86.456   1813.638 -0.0477   0.9620
## seguridad   13.398     25.860  0.5181   0.6044
## ----- Threshold Parameters -----
##                                     Estimate Std. error t value Pr(>|t|)
## Threshold ( Muy inseguro->inseguro)   1398.8     2846.9  0.4913   0.6232
## Threshold (inseguro->seguro)          3226.8     6188.3  0.5214   0.6021
## Threshold (seguro->Muy seguro)        7938.6    15284.6  0.5194   0.6035

Latentes epxlican en un 99% la variabilidad de la variable dependiente,pero no presenta una buen ajuste.Esto lo podemos verificar con los odds-ratio.

odds=exp(coef(model2))
ci=exp(confint(model2))
cbind(odds,ci)

##                                             odds        2.5 %       97.5 %
## victconf                            9.919101e-50 0.000000e+00          Inf
## cappol                              2.836430e-38 0.000000e+00          Inf
## seguridad                           6.589956e+05 6.409823e-17 6.775152e+27
## Threshold ( Muy inseguro->inseguro)          Inf 0.000000e+00          Inf
## Threshold (inseguro->seguro)                 Inf 0.000000e+00          Inf
## Threshold (seguro->Muy seguro)               Inf 0.000000e+00          Inf