BASE DE DATOS ATLANTIC

En este Rpbs se trata la base de datos Atlantics.

1. Cargar libreria y la base de datos

Cargamos la libreria sp, disponible para el tratamiento de datos espaciales y la libreria readr para la carga de la base de datos con extención .CSV

library(sp)   #Libraria para el tratamiento de datos espaciales
library(readr)

Cargamos la base de datos “data_atlantic”, la observamos y vemos sus dimensiones

data_atlantic<- read_csv("data_atlantic_1998_2012.csv")
View(data_atlantic) # Observar la base de datos
dim(data_atlantic)    #Observar las dimensiones de la base de datos
## [1] 666   9
names(data_atlantic)  # Da el nombre de las variables de la base de datos
## [1] "FIPS" "x"    "y"    "Rate" "POV"  "SMOK" "PM25" "NO2"  "SO2"
class(data_atlantic)  #Tipo de datos de la base de datos ("data.frame")
## [1] "spec_tbl_df" "tbl_df"      "tbl"         "data.frame"

Observamos los primeros seis datos y los ultimos dos

head(data_atlantic) # Permite observar las seis primeras filas de la base de datos
tail(data_atlantic) # Permite observar las seis ultimas filas de la base de datos

Observamos el resumen estadistico de la base de datos

summary(data_atlantic) # Resumen de datos estadisticos
##       FIPS             x                 y                Rate       
##  Min.   :10001   Min.   : 949877   Min.   : 932261   Min.   : 39.00  
##  1st Qu.:24010   1st Qu.:1248238   1st Qu.:1332531   1st Qu.: 62.00  
##  Median :37126   Median :1425055   Median :1675880   Median : 69.00  
##  Mean   :35728   Mean   :1426696   Mean   :1669654   Mean   : 69.97  
##  3rd Qu.:51033   3rd Qu.:1617599   3rd Qu.:1970044   3rd Qu.: 77.00  
##  Max.   :54109   Max.   :1916559   Max.   :2623675   Max.   :120.00  
##       POV             SMOK            PM25             NO2         
##  Min.   : 3.26   Min.   :13.51   Min.   : 7.323   Min.   : 0.5954  
##  1st Qu.:11.35   1st Qu.:24.57   1st Qu.:10.508   1st Qu.: 1.2608  
##  Median :14.54   Median :26.52   Median :11.652   Median : 1.7766  
##  Mean   :15.21   Mean   :26.06   Mean   :11.394   Mean   : 2.2015  
##  3rd Qu.:18.91   3rd Qu.:28.18   3rd Qu.:12.384   3rd Qu.: 2.6475  
##  Max.   :35.26   Max.   :34.96   Max.   :14.133   Max.   :14.6895  
##       SO2          
##  Min.   :0.001209  
##  1st Qu.:0.027947  
##  Median :0.049337  
##  Mean   :0.072520  
##  3rd Qu.:0.091953  
##  Max.   :0.420847

2. Conversión a datos espaciales

Convertimos las columnas (x,y) de la base de datos data_atlantic en un objeto espacial

coordinates(data_atlantic) = c("x", "y") # Convierte el data.frame a un objeto espacial de tipo SPACIAPOINTDATAFRAME
class(data_atlantic)
## [1] "SpatialPointsDataFrame"
## attr(,"package")
## [1] "sp"

Diagramamos la base de datos data_atlantic

plot(data_atlantic)  # Diagrama la base de datos 

plot(data_atlantic, pch=1, main="Dato espacial: Punto") # Diagrama la base de datos con forma de puntos

3. Tratamiento de los datos espaciales

Para unir los datos en forma de linea ejecutamos las siguientes lineas

# Union de datos en forma de linea
cc = coordinates(data_atlantic)
m.sl = SpatialLines(list(Lines(list(Line(cc)), "line1")))
plot(m.sl,main="Dato espacial: Línea" )

Diagramamos la distrbución espacial de la variable NO2, por medio de la libreria “laticce”

#DISTRIBUCI?N ESPACIAL DE LA VARIABLE ZINC 
library(lattice)
spplot(data_atlantic, c("NO2"))

Diagramamos la distrbución espacial conlas coordenadas (x,y)

#Distribuci?n espacial con coordenadas (x,y)
library(ggplot2)
methods(fortify)
##  [1] fortify.cld*                      fortify.confint.glht*            
##  [3] fortify.data.frame*               fortify.default*                 
##  [5] fortify.formula*                  fortify.function*                
##  [7] fortify.glht*                     fortify.grouped_df*              
##  [9] fortify.Line*                     fortify.Lines*                   
## [11] fortify.lm*                       fortify.map*                     
## [13] fortify.NULL*                     fortify.Polygon*                 
## [15] fortify.Polygons*                 fortify.sfc*                     
## [17] fortify.sfg*                      fortify.SpatialLinesDataFrame*   
## [19] fortify.SpatialPolygons*          fortify.SpatialPolygonsDataFrame*
## [21] fortify.summary.glht*             fortify.tbl*                     
## [23] fortify.tbl_df*                  
## see '?methods' for accessing help and source code
m = as(data_atlantic, "data.frame")
ggplot(m, aes(x, y)) + geom_point() + coord_equal()

Diagramamos la caja de bloques de la variable NO2

boxplot(x = data_atlantic$NO2)# Caja de bigotes de la variable NO2

4. Crear los centroides de un poligono

library(rgdal) # Incluimos la libreria 
## rgdal: version: 1.5-23, (SVN revision 1121)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.2.1, released 2020/12/29
## Path to GDAL shared files: C:/Users/asus/Documents/R/win-library/4.0/rgdal/gdal
## GDAL binary built with GEOS: TRUE 
## Loaded PROJ runtime: Rel. 7.2.1, January 1st, 2021, [PJ_VERSION: 721]
## Path to PROJ shared files: C:/Users/asus/Documents/R/win-library/4.0/rgdal/proj
## PROJ CDN enabled: FALSE
## Linking to sp version:1.4-5
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading rgdal.
## Overwritten PROJ_LIB was C:/Users/asus/Documents/R/win-library/4.0/rgdal/proj

Abrimos un archivo .shape, creamos y ploteamos los centroides

Y =  readOGR(file.choose()) # abrir un archivo shp
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\asus\Documents\DOCTORADO\MATERIAS\PROFUNDIZACION_I\R\ANTLANTIC\COUNTY_ATLANTIC.shp", layer: "COUNTY_ATLANTIC"
## with 666 features
## It has 12 fields
## Integer64 fields read as strings:  ID
class(Y)
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
plot(Y)

centroides = coordinates(Y)
class(centroides)
## [1] "matrix" "array"
cent=as.data.frame(centroides)
class(cent)
## [1] "data.frame"
head(cent)
coordinates(cent) = c("V1","V2") 
class(cent)
## [1] "SpatialPoints"
## attr(,"package")
## [1] "sp"
plot(cent,pch=1,add=T)

5. Crea y guardar archivos espaciales

crear un archivo espacial a partir de los centroides

fg=SpatialPointsDataFrame(cent,data_atlantic)

ds=“C:/Users/asus/Documents/DOCTORADO/MATERIAS/PROFUNDIZACION_I/R/ANTLANTIC”

writeOGR(fg, dsn=ds, driver=“ESRI Shapefile”,“puntos”)

Guardar archivos en tipo .RData

save(centroides,cent,file=“Prueba.RData”)

Cargar archivos en tipo .RData

load(“Prueba.RData”)

6. Libreria GWmodel

#Cargamos el archivo County Atlantic

Condado = readOGR(file.choose()) 
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\asus\Documents\DOCTORADO\MATERIAS\PROFUNDIZACION_I\R\ANTLANTIC\COUNTY_ATLANTIC.shp", layer: "COUNTY_ATLANTIC"
## with 666 features
## It has 12 fields
## Integer64 fields read as strings:  ID

Ploteamos el archivo condado

plot(Condado)

class(Condado)  #Observamos el Tipo de datos de condado ("SpatialPolygonsDataFrame")
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"

Cargamos el archivo State Atlantic

Estado = readOGR(file.choose()) 
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\asus\Documents\DOCTORADO\MATERIAS\PROFUNDIZACION_I\R\ANTLANTIC\STATE_ATLANTIC.shp", layer: "STATE_ATLANTIC"
## with 11 features
## It has 9 fields
## Integer64 fields read as strings:  ID ID2
plot(Estado)

Ploteamos el archivo Estado

class(Estado)  #Observamos el Tipo de datos de Estado ("SpatialPolygonsDataFrame")
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
Datos = read.csv(file.choose(), header=T) # Cargamos la base de datos

SPDF = merge(Condado, Datos, by = "FIPS") # unir base de datos con el SpatialPolygonsDataFrame "Condado"

names (SPDF) # Observamos los nombres de la base de datos creada
##  [1] "FIPS"       "ID"         "x.x"        "y.x"        "REGION_ID" 
##  [6] "DIVISION_I" "STATE_ID"   "COUNTY_ID"  "REGION"     "DIVISION"  
## [11] "STATE"      "COUNTY"     "x.y"        "y.y"        "Rate"      
## [16] "POV"        "SMOK"       "PM25"       "NO2"        "SO2"

Creamos un lista para los graficos e incluimos la libreria GWmodel

polys = list("sp.lines", as(Estado, "SpatialLines"), col = "grey", lwd=0.8, lty=1)

library(GWmodel)
## Loading required package: maptools
## Checking rgeos availability: TRUE
## Loading required package: robustbase
## Loading required package: Rcpp
## Loading required package: spatialreg
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## 
## Attaching package: 'spData'
## The following object is masked _by_ '.GlobalEnv':
## 
##     polys
## Loading required package: Matrix
## Registered S3 methods overwritten by 'spatialreg':
##   method                   from 
##   residuals.stsls          spdep
##   deviance.stsls           spdep
##   coef.stsls               spdep
##   print.stsls              spdep
##   summary.stsls            spdep
##   print.summary.stsls      spdep
##   residuals.gmsar          spdep
##   deviance.gmsar           spdep
##   coef.gmsar               spdep
##   fitted.gmsar             spdep
##   print.gmsar              spdep
##   summary.gmsar            spdep
##   print.summary.gmsar      spdep
##   print.lagmess            spdep
##   summary.lagmess          spdep
##   print.summary.lagmess    spdep
##   residuals.lagmess        spdep
##   deviance.lagmess         spdep
##   coef.lagmess             spdep
##   fitted.lagmess           spdep
##   logLik.lagmess           spdep
##   fitted.SFResult          spdep
##   print.SFResult           spdep
##   fitted.ME_res            spdep
##   print.ME_res             spdep
##   print.lagImpact          spdep
##   plot.lagImpact           spdep
##   summary.lagImpact        spdep
##   HPDinterval.lagImpact    spdep
##   print.summary.lagImpact  spdep
##   print.sarlm              spdep
##   summary.sarlm            spdep
##   residuals.sarlm          spdep
##   deviance.sarlm           spdep
##   coef.sarlm               spdep
##   vcov.sarlm               spdep
##   fitted.sarlm             spdep
##   logLik.sarlm             spdep
##   anova.sarlm              spdep
##   predict.sarlm            spdep
##   print.summary.sarlm      spdep
##   print.sarlm.pred         spdep
##   as.data.frame.sarlm.pred spdep
##   residuals.spautolm       spdep
##   deviance.spautolm        spdep
##   coef.spautolm            spdep
##   fitted.spautolm          spdep
##   print.spautolm           spdep
##   summary.spautolm         spdep
##   logLik.spautolm          spdep
##   print.summary.spautolm   spdep
##   print.WXImpact           spdep
##   summary.WXImpact         spdep
##   print.summary.WXImpact   spdep
##   predict.SLX              spdep
## Welcome to GWmodel version 2.2-3.
## The new version of GWmodel 2.2-4 now is ready
## 
## Attaching package: 'GWmodel'
## The following objects are masked from 'package:stats':
## 
##     BIC, fitted

Creando una paleta de colores

col.palette = colorRampPalette(c("blue", "sky blue", "green", "yellow", "red"), space = "rgb", interpolate = "linear")

la funci?n gwss Calcula medidas descriptivas del conjunto de datos geogr?ficamente ponderadas
incluye variables: Rate, PM2.5, el kernel utilizado es bisquare y ancho de banda =48

gwss.rate.pm25 = gwss(SPDF, vars = c("Rate","PM25"), kernel = "bisquare", adaptive = TRUE, bw = 99)
## Warning in proj4string(data): CRS object has comment, which is lost in output
gwss.rate.pm25
##    ***********************************************************************
##    *                       Package   GWmodel                             *
##    ***********************************************************************
## 
##    ***********************Calibration information*************************
## 
##    Local summary statistics calculated for variables:
##     Rate PM25
##    Number of summary points: 666
##    Kernel function: bisquare 
##    Summary points: the same locations as observations are used.
##    Adaptive bandwidth: 99 (number of nearest neighbours)
##    Distance metric: Euclidean distance metric is used.
## 
##    ************************Local Summary Statistics:**********************
##    Summary information for Local means:
##              Min. 1st Qu. Median 3rd Qu.   Max.
##    Rate_LM 63.601  67.120 70.595  72.102 75.026
##    PM25_LM 10.556  11.259 11.436  11.805 12.198
##    Summary information for local standard deviation :
##                 Min.  1st Qu.   Median  3rd Qu.    Max.
##    Rate_LSD 10.18465 11.13278 11.38412 11.86204 13.8873
##    PM25_LSD  0.73052  1.05446  1.15478  1.26483  1.6269
##    Summary information for local variance :
##                   Min.   1st Qu.    Median   3rd Qu.     Max.
##    Rate_LVar 103.72716 123.93877 129.59810 140.70788 192.8575
##    PM25_LVar   0.53365   1.11189   1.33352   1.59979   2.6467
##    Summary information for Local skewness:
##                  Min.  1st Qu.   Median  3rd Qu.   Max.
##    Rate_LSKe  0.14786  0.31226  0.45736  0.59853 0.7895
##    PM25_LSKe -1.43289 -0.85131 -0.39649 -0.20993 0.1126
##    Summary information for localized coefficient of variation:
##                 Min.  1st Qu.   Median  3rd Qu.   Max.
##    Rate_LCV 0.147086 0.156275 0.164858 0.175433 0.1913
##    PM25_LCV 0.060252 0.088918 0.101306 0.111091 0.1533
##    Summary information for localized Covariance and Correlation between these variables:
##                                 Min.    1st Qu.     Median    3rd Qu.   Max.
##    Cov_Rate.PM25          -2.2003666 -0.8874850 -0.3896257 -0.0545890 0.5514
##    Corr_Rate.PM25         -0.1408757 -0.0638106 -0.0291851 -0.0039810 0.0457
##    Spearman_rho_Rate.PM25 -0.1137968 -0.0305331  0.0055412  0.0501639 0.0890
## 
##    ************************************************************************

#visualizamos las diferentes medidas, por ejemplo, para la media geograficamente ponderada de la variable Lung rate seria:

Mean.Rate = spplot(gwss.rate.pm25$SDF, "Rate_LM", main = "Media Local de tasa cancer", sp.layout = list(polys), col = "transparent", col.regions = col.palette(100))
Mean.Rate

#visualizamos la media local de pobresa de la base de datos:

col.palette = colorRampPalette(c("blue", "sky blue", "green", "purple", "black"), space = "rgb", interpolate = "linear")

gwss.pov.pm25 = gwss(SPDF, vars = c("POV","PM25"), kernel = "bisquare", adaptive = TRUE, bw = 48)
## Warning in proj4string(data): CRS object has comment, which is lost in output
Mean.Rate_F = spplot(gwss.pov.pm25$SDF, "POV_LM", main = "Media Local de Pobresa", sp.layout = list(polys), col = "transparent", col.regions = col.palette(100))
Mean.Rate_F

#visualizamos la correlación entre la pobresa y el material particulado, tamaño 2.5 micrómetros:

###########################################################################33

col.palette = colorRampPalette(c("Red", "yellow", "green", "blue", "black"), space = "rgb", interpolate = "linear")

gwss.pov.pm25 = gwss(SPDF, vars = c("POV","PM25"), kernel = "bisquare", adaptive = TRUE, bw = 48)
## Warning in proj4string(data): CRS object has comment, which is lost in output
Mean.Rate_corr = spplot(gwss.pov.pm25$SDF, "Corr_POV.PM25", main = "Correlación Pobresa vs PM25", sp.layout = list(polys), col = "transparent", col.regions = col.palette(100))
Mean.Rate_corr