Spencer Leo

Lab 6

Contents:

Part 1:

Produce a map using tmap. In this case, show the zip code where more people gave more cash than in kind

# Importing Washington Contribution Dataset
link='https://github.com/EvansDataScience/DataDriven_ManagementAndPolicy/raw/master/Session6/contriWA_2016.RData'
#getting the data TABLE from the file in the cloud:
load(file=url(link))

#Creating structure of dataset
str(contriWA_2016, width = 60, strict.width = 'cut')

## 'data.frame':    374584 obs. of  10 variables:
##  $ id                  : chr  "3982630.rcpt" "3982631.rcp"..
##  $ contributor_state   : chr  "WA" "WA" "WA" "WA" ...
##  $ contributor_zip     : num  98683 98683 98683 98168 9850..
##  $ amount              : num  50 50 50 500 900 900 50 225 ..
##  $ election_year       : int  2016 2016 2016 2016 2016 201..
##  $ party               : Factor w/ 9 levels "","CONSTITUT"..
##  $ cash_or_in_kind     : Factor w/ 2 levels "Cash","In ki"..
##  $ contributor_location: chr  "(45.60817, -122.51972)" "("..
##  $ Lat                 : num  45.6 45.6 45.6 47.5 47 ...
##  $ Lon                 : num  -123 -123 -123 -122 -123 ...

# Importing Washington Map data
zippedSHP= "https://github.com/EvansDataScience/data/raw/master/WAzips.zip"

library(utils)
temp=tempfile()
download.file(zippedSHP, temp)
unzip(temp)

library(rgdal)

## Loading required package: sp

## rgdal: version: 1.4-3, (SVN revision 828)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 2.2.3, released 2017/11/20
##  Path to GDAL shared files: C:/Users/leoto/Documents/R/win-library/3.6/rgdal/gdal
##  GDAL binary built with GEOS: TRUE 
##  Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
##  Path to PROJ.4 shared files: C:/Users/leoto/Documents/R/win-library/3.6/rgdal/proj
##  Linking to sp version: 1.3-1

library(tmap)

wazipMap <- readOGR("SAEP_ZIP_Code_Tabulation_Areas.shp",stringsAsFactors=F)

## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\leoto\OneDrive\Documents\598\Week 6\Session6\SAEP_ZIP_Code_Tabulation_Areas.shp", layer: "SAEP_ZIP_Code_Tabulation_Areas"
## with 598 features
## It has 101 fields
## Integer64 fields read as strings:  OBJECTID POP2010 HHP2010 GQ2010 HU2010 OHU2010

waZips = tm_shape(wazipMap) + tm_polygons()

library(rmapshaper)
baseMap <- ms_dissolve(wazipMap)
waBorder = tm_shape(baseMap) + tm_polygons(col = 'white',lwd = 1)

library(raster)

mapCRS=crs(wazipMap) # projection of our map

contriWA_geo <- SpatialPointsDataFrame(contriWA_2016[,c(10:9)], # Lon/Lat
                    contriWA_2016,    #the original data frame
                    proj4string = mapCRS)   # assign a CRS of map 

library(dplyr)

## Registered S3 method overwritten by 'dplyr':
##   method         from     
##   print.location geojsonio

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:raster':
## 
##     intersect, select, union

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(fastDummies)

#note to make cash

cashdon <- fastDummies::dummy_cols(contriWA_2016, select_columns = "cash_or_in_kind")

cashdon$cash <- ifelse(cashdon$cash_or_in_kind_Cash==1,1,0)

WA_zip_cash= cashdon  %>%  
                    group_by(contributor_zip)  %>%  
                        summarize('Cash'=sum(cash)) 

head(WA_zip_cash)

## # A tibble: 6 x 2
##   contributor_zip  Cash
##             <dbl> <dbl>
## 1           98001  1043
## 2           98002   680
## 3           98003  1644
## 4           98004  3459
## 5           98005  1658
## 6           98006  2433

#Create cash layer
layerCash=merge(wazipMap,WA_zip_cash, 
                   by.x='ZCTA5CE10', 
                   by.y='contributor_zip',all.x=F)



library(RColorBrewer)
library(classInt)

colorForPalette='YlGnBu'

layer1= waBorder +  
        tm_shape(layerCash) +
                tm_polygons("Cash", 
                            style="quantile", 
                            n=5, 
                            title="Cash Contributions", 
                            palette=colorForPalette) 

fullMap= layer1 + tm_compass(position = c('left','TOP'),type = 'arrow') +
                  tm_scale_bar(position=c("RIGHT", "BOTTOM"),width = 0.2)


fullMap +  tm_layout(main.title = "Cash Contributions by WA Zip Codes",
                     main.title.position = 'center',
                     legend.position = c('RIGHT','center'),
                                    #bottom,left,top,right
                     inner.margins=c(0.1,0,0.1,0.3))

## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3

Part 2:

Remake the plot of dimensionality reduction (where the multidemiensional scaling plot cases where colored according to the k-means output). This time, use only the variables that represent input.

library(rio)
link="https://github.com/EvansDataScience/data/raw/master/safeCitiesIndexAll.xlsx"
safe=import(link)

names(safe)

##  [1] "city"                         "D_In_PrivacyPolicy"          
##  [3] "D_In_AwarenessDigitalThreats" "D_In_PubPrivPartnerships"    
##  [5] "D_In_TechnologyEmployed"      "D_In_CyberSecurity"          
##  [7] "D_Out_IdentityTheft"          "D_Out_CompInfected"          
##  [9] "D_Out_InternetAccess"         "H_In_EnvironmentPolicies"    
## [11] "H_In_AccessHealthcare"        "H_In_Beds_1000"              
## [13] "H_In_Doctors_1000"            "H_In_AccessFood"             
## [15] "H_In_QualityHealthServ"       "H_Out_AirQuality"            
## [17] "H_Out_WaterQuality"           "H_Out_LifeExpectY"           
## [19] "H_Out_InfMortality"           "H_Out_CancerMortality"       
## [21] "H_Out_AttacksBioChemRad"      "I_In_EnforceTransportSafety" 
## [23] "I_In_PedestrianFriendliness"  "I_In_QualityRoad"            
## [25] "I_In_QualityElectricity"      "I_In_DisasterManagement"     
## [27] "I_Out_DeathsDisaster"         "I_Out_VehicularAccidents"    
## [29] "I_Out_PedestrianDeath"        "I_Out_LiveSlums"             
## [31] "I_Out_AttacksInfrastructure"  "P_In_PoliceEngage"           
## [33] "P_In_CommunityPatrol"         "P_In_StreetCrimeData"        
## [35] "P_In_TechForCrime"            "P_In_PrivateSecurity"        
## [37] "P_In_GunRegulation"           "P_In_PoliticalStability"     
## [39] "P_Out_PettyCrime"             "P_Out_ViolentCrime"          
## [41] "P_Out_OrganisedCrime"         "P_Out_Corruption"            
## [43] "P_Out_DrugUse"                "P_Out_TerroristAttacks"      
## [45] "P_Out_SeverityTerrorist"      "P_Out_GenderSafety"          
## [47] "P_Out_PerceptionSafety"       "P_Out_ThreaTerrorism"        
## [49] "P_Out_ThreatMilitaryConf"     "P_Out_ThreatCivUnrest"

safeIn <- safe[c(1:6,10:15,22:26,32:38)]
names(safeIn)

##  [1] "city"                         "D_In_PrivacyPolicy"          
##  [3] "D_In_AwarenessDigitalThreats" "D_In_PubPrivPartnerships"    
##  [5] "D_In_TechnologyEmployed"      "D_In_CyberSecurity"          
##  [7] "H_In_EnvironmentPolicies"     "H_In_AccessHealthcare"       
##  [9] "H_In_Beds_1000"               "H_In_Doctors_1000"           
## [11] "H_In_AccessFood"              "H_In_QualityHealthServ"      
## [13] "I_In_EnforceTransportSafety"  "I_In_PedestrianFriendliness" 
## [15] "I_In_QualityRoad"             "I_In_QualityElectricity"     
## [17] "I_In_DisasterManagement"      "P_In_PoliceEngage"           
## [19] "P_In_CommunityPatrol"         "P_In_StreetCrimeData"        
## [21] "P_In_TechForCrime"            "P_In_PrivateSecurity"        
## [23] "P_In_GunRegulation"           "P_In_PoliticalStability"

library(reshape2)
library(ggiraph)

## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang

library(ggiraphExtra)
library(tidyverse)

## Registered S3 method overwritten by 'rvest':
##   method            from
##   read_xml.response xml2

## -- Attaching packages ------------------------------ tidyverse 1.2.1 --

## v ggplot2 3.1.1     v readr   1.3.1
## v tibble  2.1.1     v purrr   0.3.2
## v tidyr   0.8.3     v stringr 1.4.0
## v ggplot2 3.1.1     v forcats 0.4.0

## -- Conflicts --------------------------------- tidyverse_conflicts() --
## x tidyr::extract() masks raster::extract()
## x dplyr::filter()  masks stats::filter()
## x dplyr::lag()     masks stats::lag()
## x dplyr::select()  masks raster::select()

safeA=melt(safeIn,
           id.vars = 'city')
head(safeA)

##        city           variable value
## 1 Abu Dhabi D_In_PrivacyPolicy    50
## 2 Amsterdam D_In_PrivacyPolicy   100
## 3    Athens D_In_PrivacyPolicy    75
## 4   Bangkok D_In_PrivacyPolicy    25
## 5 Barcelona D_In_PrivacyPolicy   100
## 6   Beijing D_In_PrivacyPolicy    75

base = ggplot(data = safeA, aes(x = variable,
                                y =city)) 

heat1= base +  geom_tile(aes(fill = value)) 
heat1

distanceAmong <- dist(safeIn[,-1])

resultMDS <- cmdscale(distanceAmong,eig=TRUE, k=2)

dim1 <- resultMDS$points[,1]
dim2 <- resultMDS$points[,2]

coordinates=data.frame(dim1,dim2,city=safeIn$city)

head(coordinates)

##          dim1       dim2      city
## 1  -5.2101475  41.537274 Abu Dhabi
## 2 -88.8291715  -0.430404 Amsterdam
## 3   4.7336749  53.260067    Athens
## 4  96.8490858 -22.620966   Bangkok
## 5 -83.2920752  -1.105904 Barcelona
## 6  -0.2189441   7.133945   Beijing

library(cluster)
set.seed(123)


resultKM <- kmeans(safe[,-c(1)],
                 centers = 3)

coordinates$cluster=as.factor(resultKM$cluster)

base= ggplot(coordinates,aes(x=dim1, y=dim2,label=city, color=cluster)) 
base + geom_text(size=2)

Part 3 (For final project):

I am still collecting data for my final project and will be finished by 5/17. Over the process, I collected data on hours slept each day, daily, water intake. I intend to use this data to draw relationship over its causl effect of my daily mood which i Use daylio to track.