Contents:
Produce a map using tmap. In this case, show the zip code where more people gave more cash than in kind
# Importing Washington Contribution Dataset
link='https://github.com/EvansDataScience/DataDriven_ManagementAndPolicy/raw/master/Session6/contriWA_2016.RData'
#getting the data TABLE from the file in the cloud:
load(file=url(link))
#Creating structure of dataset
str(contriWA_2016, width = 60, strict.width = 'cut')
## 'data.frame': 374584 obs. of 10 variables:
## $ id : chr "3982630.rcpt" "3982631.rcp"..
## $ contributor_state : chr "WA" "WA" "WA" "WA" ...
## $ contributor_zip : num 98683 98683 98683 98168 9850..
## $ amount : num 50 50 50 500 900 900 50 225 ..
## $ election_year : int 2016 2016 2016 2016 2016 201..
## $ party : Factor w/ 9 levels "","CONSTITUT"..
## $ cash_or_in_kind : Factor w/ 2 levels "Cash","In ki"..
## $ contributor_location: chr "(45.60817, -122.51972)" "("..
## $ Lat : num 45.6 45.6 45.6 47.5 47 ...
## $ Lon : num -123 -123 -123 -122 -123 ...
# Importing Washington Map data
zippedSHP= "https://github.com/EvansDataScience/data/raw/master/WAzips.zip"
library(utils)
temp=tempfile()
download.file(zippedSHP, temp)
unzip(temp)
library(rgdal)
## Loading required package: sp
## rgdal: version: 1.4-3, (SVN revision 828)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 2.2.3, released 2017/11/20
## Path to GDAL shared files: C:/Users/leoto/Documents/R/win-library/3.6/rgdal/gdal
## GDAL binary built with GEOS: TRUE
## Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
## Path to PROJ.4 shared files: C:/Users/leoto/Documents/R/win-library/3.6/rgdal/proj
## Linking to sp version: 1.3-1
library(tmap)
wazipMap <- readOGR("SAEP_ZIP_Code_Tabulation_Areas.shp",stringsAsFactors=F)
## OGR data source with driver: ESRI Shapefile
## Source: "C:\Users\leoto\OneDrive\Documents\598\Week 6\Session6\SAEP_ZIP_Code_Tabulation_Areas.shp", layer: "SAEP_ZIP_Code_Tabulation_Areas"
## with 598 features
## It has 101 fields
## Integer64 fields read as strings: OBJECTID POP2010 HHP2010 GQ2010 HU2010 OHU2010
waZips = tm_shape(wazipMap) + tm_polygons()
library(rmapshaper)
baseMap <- ms_dissolve(wazipMap)
waBorder = tm_shape(baseMap) + tm_polygons(col = 'white',lwd = 1)
library(raster)
mapCRS=crs(wazipMap) # projection of our map
contriWA_geo <- SpatialPointsDataFrame(contriWA_2016[,c(10:9)], # Lon/Lat
contriWA_2016, #the original data frame
proj4string = mapCRS) # assign a CRS of map
library(dplyr)
## Registered S3 method overwritten by 'dplyr':
## method from
## print.location geojsonio
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:raster':
##
## intersect, select, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(fastDummies)
#note to make cash
cashdon <- fastDummies::dummy_cols(contriWA_2016, select_columns = "cash_or_in_kind")
cashdon$cash <- ifelse(cashdon$cash_or_in_kind_Cash==1,1,0)
WA_zip_cash= cashdon %>%
group_by(contributor_zip) %>%
summarize('Cash'=sum(cash))
head(WA_zip_cash)
## # A tibble: 6 x 2
## contributor_zip Cash
## <dbl> <dbl>
## 1 98001 1043
## 2 98002 680
## 3 98003 1644
## 4 98004 3459
## 5 98005 1658
## 6 98006 2433
#Create cash layer
layerCash=merge(wazipMap,WA_zip_cash,
by.x='ZCTA5CE10',
by.y='contributor_zip',all.x=F)
library(RColorBrewer)
library(classInt)
colorForPalette='YlGnBu'
layer1= waBorder +
tm_shape(layerCash) +
tm_polygons("Cash",
style="quantile",
n=5,
title="Cash Contributions",
palette=colorForPalette)
fullMap= layer1 + tm_compass(position = c('left','TOP'),type = 'arrow') +
tm_scale_bar(position=c("RIGHT", "BOTTOM"),width = 0.2)
fullMap + tm_layout(main.title = "Cash Contributions by WA Zip Codes",
main.title.position = 'center',
legend.position = c('RIGHT','center'),
#bottom,left,top,right
inner.margins=c(0.1,0,0.1,0.3))
## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3
Remake the plot of dimensionality reduction (where the multidemiensional scaling plot cases where colored according to the k-means output). This time, use only the variables that represent input.
library(rio)
link="https://github.com/EvansDataScience/data/raw/master/safeCitiesIndexAll.xlsx"
safe=import(link)
names(safe)
## [1] "city" "D_In_PrivacyPolicy"
## [3] "D_In_AwarenessDigitalThreats" "D_In_PubPrivPartnerships"
## [5] "D_In_TechnologyEmployed" "D_In_CyberSecurity"
## [7] "D_Out_IdentityTheft" "D_Out_CompInfected"
## [9] "D_Out_InternetAccess" "H_In_EnvironmentPolicies"
## [11] "H_In_AccessHealthcare" "H_In_Beds_1000"
## [13] "H_In_Doctors_1000" "H_In_AccessFood"
## [15] "H_In_QualityHealthServ" "H_Out_AirQuality"
## [17] "H_Out_WaterQuality" "H_Out_LifeExpectY"
## [19] "H_Out_InfMortality" "H_Out_CancerMortality"
## [21] "H_Out_AttacksBioChemRad" "I_In_EnforceTransportSafety"
## [23] "I_In_PedestrianFriendliness" "I_In_QualityRoad"
## [25] "I_In_QualityElectricity" "I_In_DisasterManagement"
## [27] "I_Out_DeathsDisaster" "I_Out_VehicularAccidents"
## [29] "I_Out_PedestrianDeath" "I_Out_LiveSlums"
## [31] "I_Out_AttacksInfrastructure" "P_In_PoliceEngage"
## [33] "P_In_CommunityPatrol" "P_In_StreetCrimeData"
## [35] "P_In_TechForCrime" "P_In_PrivateSecurity"
## [37] "P_In_GunRegulation" "P_In_PoliticalStability"
## [39] "P_Out_PettyCrime" "P_Out_ViolentCrime"
## [41] "P_Out_OrganisedCrime" "P_Out_Corruption"
## [43] "P_Out_DrugUse" "P_Out_TerroristAttacks"
## [45] "P_Out_SeverityTerrorist" "P_Out_GenderSafety"
## [47] "P_Out_PerceptionSafety" "P_Out_ThreaTerrorism"
## [49] "P_Out_ThreatMilitaryConf" "P_Out_ThreatCivUnrest"
safeIn <- safe[c(1:6,10:15,22:26,32:38)]
names(safeIn)
## [1] "city" "D_In_PrivacyPolicy"
## [3] "D_In_AwarenessDigitalThreats" "D_In_PubPrivPartnerships"
## [5] "D_In_TechnologyEmployed" "D_In_CyberSecurity"
## [7] "H_In_EnvironmentPolicies" "H_In_AccessHealthcare"
## [9] "H_In_Beds_1000" "H_In_Doctors_1000"
## [11] "H_In_AccessFood" "H_In_QualityHealthServ"
## [13] "I_In_EnforceTransportSafety" "I_In_PedestrianFriendliness"
## [15] "I_In_QualityRoad" "I_In_QualityElectricity"
## [17] "I_In_DisasterManagement" "P_In_PoliceEngage"
## [19] "P_In_CommunityPatrol" "P_In_StreetCrimeData"
## [21] "P_In_TechForCrime" "P_In_PrivateSecurity"
## [23] "P_In_GunRegulation" "P_In_PoliticalStability"
library(reshape2)
library(ggiraph)
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
library(ggiraphExtra)
library(tidyverse)
## Registered S3 method overwritten by 'rvest':
## method from
## read_xml.response xml2
## -- Attaching packages ------------------------------ tidyverse 1.2.1 --
## v ggplot2 3.1.1 v readr 1.3.1
## v tibble 2.1.1 v purrr 0.3.2
## v tidyr 0.8.3 v stringr 1.4.0
## v ggplot2 3.1.1 v forcats 0.4.0
## -- Conflicts --------------------------------- tidyverse_conflicts() --
## x tidyr::extract() masks raster::extract()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x dplyr::select() masks raster::select()
safeA=melt(safeIn,
id.vars = 'city')
head(safeA)
## city variable value
## 1 Abu Dhabi D_In_PrivacyPolicy 50
## 2 Amsterdam D_In_PrivacyPolicy 100
## 3 Athens D_In_PrivacyPolicy 75
## 4 Bangkok D_In_PrivacyPolicy 25
## 5 Barcelona D_In_PrivacyPolicy 100
## 6 Beijing D_In_PrivacyPolicy 75
base = ggplot(data = safeA, aes(x = variable,
y =city))
heat1= base + geom_tile(aes(fill = value))
heat1
distanceAmong <- dist(safeIn[,-1])
resultMDS <- cmdscale(distanceAmong,eig=TRUE, k=2)
dim1 <- resultMDS$points[,1]
dim2 <- resultMDS$points[,2]
coordinates=data.frame(dim1,dim2,city=safeIn$city)
head(coordinates)
## dim1 dim2 city
## 1 -5.2101475 41.537274 Abu Dhabi
## 2 -88.8291715 -0.430404 Amsterdam
## 3 4.7336749 53.260067 Athens
## 4 96.8490858 -22.620966 Bangkok
## 5 -83.2920752 -1.105904 Barcelona
## 6 -0.2189441 7.133945 Beijing
library(cluster)
set.seed(123)
resultKM <- kmeans(safe[,-c(1)],
centers = 3)
coordinates$cluster=as.factor(resultKM$cluster)
base= ggplot(coordinates,aes(x=dim1, y=dim2,label=city, color=cluster))
base + geom_text(size=2)
I am still collecting data for my final project and will be finished by 5/17. Over the process, I collected data on hours slept each day, daily, water intake. I intend to use this data to draw relationship over its causl effect of my daily mood which i Use daylio to track.