I download the data “1976-2020-president” from Harvard dataverse portal. The dataset is in .csv format. I could load it into R using read.table but I prefer to convert it into .xlsx format using Excel so that I can use read_excel as shown below:
# Loading packages
knitr::opts_chunk$set(echo = TRUE, eval=TRUE, message=FALSE, warning=FALSE, fig.height=4)
necessaryPackages <- c("foreign","reshape","rvest","tidyverse","dplyr","stringr","ggplot2", "stargazer","readr", "haven", "readxl")
new.packages <- necessaryPackages[
!(necessaryPackages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
lapply(necessaryPackages, require, character.only = TRUE)
## Loading required package: foreign
## Loading required package: reshape
## Loading required package: rvest
## Loading required package: xml2
## Loading required package: tidyverse
## ── Attaching packages ───────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0 ✓ purrr 0.3.3
## ✓ tibble 3.0.0 ✓ dplyr 1.0.2
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ──────────────────────────────────────────────── tidyverse_conflicts() ──
## x tidyr::expand() masks reshape::expand()
## x dplyr::filter() masks stats::filter()
## x readr::guess_encoding() masks rvest::guess_encoding()
## x dplyr::lag() masks stats::lag()
## x purrr::pluck() masks rvest::pluck()
## x dplyr::rename() masks reshape::rename()
## Loading required package: stargazer
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
## Loading required package: haven
## Loading required package: readxl
## [[1]]
## [1] TRUE
##
## [[2]]
## [1] TRUE
##
## [[3]]
## [1] TRUE
##
## [[4]]
## [1] TRUE
##
## [[5]]
## [1] TRUE
##
## [[6]]
## [1] TRUE
##
## [[7]]
## [1] TRUE
##
## [[8]]
## [1] TRUE
##
## [[9]]
## [1] TRUE
##
## [[10]]
## [1] TRUE
##
## [[11]]
## [1] TRUE
#Load dataset
presidentData <- read_excel("/Users/twinkleroy/Downloads/dataverse_files/1976-2020-president.xlsx", 1)
## Warning in read_fun(path = enc2native(normalizePath(path)), sheet_i = sheet, :
## Expecting logical in J4013 / R4013C10: got 'NA'
## Warning in read_fun(path = enc2native(normalizePath(path)), sheet_i = sheet, :
## Expecting logical in J4014 / R4014C10: got 'NA'
## Warning in read_fun(path = enc2native(normalizePath(path)), sheet_i = sheet, :
## Expecting logical in J4015 / R4015C10: got 'NA'
str(presidentData)
## tibble [4,287 × 15] (S3: tbl_df/tbl/data.frame)
## $ year : num [1:4287] 1976 1976 1976 1976 1976 ...
## $ state : chr [1:4287] "ALABAMA" "ALABAMA" "ALABAMA" "ALABAMA" ...
## $ state_po : chr [1:4287] "AL" "AL" "AL" "AL" ...
## $ state_fips : num [1:4287] 1 1 1 1 1 1 1 2 2 2 ...
## $ state_cen : num [1:4287] 63 63 63 63 63 63 63 94 94 94 ...
## $ state_ic : num [1:4287] 41 41 41 41 41 41 41 81 81 81 ...
## $ office : chr [1:4287] "US PRESIDENT" "US PRESIDENT" "US PRESIDENT" "US PRESIDENT" ...
## $ candidate : chr [1:4287] "CARTER, JIMMY" "FORD, GERALD" "MADDOX, LESTER" "BUBAR, BENJAMIN \"\"BEN\"\"" ...
## $ party_detailed: chr [1:4287] "DEMOCRAT" "REPUBLICAN" "AMERICAN INDEPENDENT PARTY" "PROHIBITION" ...
## $ writein : logi [1:4287] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ candidatevotes: num [1:4287] 659170 504070 9198 6669 1954 ...
## $ totalvotes : num [1:4287] 1182850 1182850 1182850 1182850 1182850 ...
## $ version : num [1:4287] 20210113 20210113 20210113 20210113 20210113 ...
## $ notes : chr [1:4287] "NA" "NA" "NA" "NA" ...
## $ party : chr [1:4287] "DEMOCRAT" "REPUBLICAN" "OTHER" "OTHER" ...
summary(presidentData$candidatevotes)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 1177 7499 311908 199242 11110250
summarize(presidentData)
## # A tibble: 1 x 0
You should aim to make a dataset at the state*year level, with infomation on the party who won in that state and that year. The other goal is to define the color of each party (blue for democrats, red for republicans, purple for other parties) and select a subset of the data: the six variables (year, state, state_fips, party, candidate, color).
#calculating summary statistics by group
groupwinner <- presidentData %>%
group_by(state, year) %>%
top_n(1, candidatevotes)
#select this three variables into a newdataset year, statefips, and party
NewpresidentData <- select(groupwinner, year, state, state_fips, party)
#View(NewpresidentData)
#str(NewpresidentData$party)
NewpresidentData <- mutate(NewpresidentData, color=recode(as.character(party), "DEMOCRAT"="Blue", "REPUBLICAN"="Red", .default="white", "OTHER" = "Purple"))
head(NewpresidentData)
## # A tibble: 6 x 5
## # Groups: state, year [6]
## year state state_fips party color
## <dbl> <chr> <dbl> <chr> <chr>
## 1 1976 ALABAMA 1 DEMOCRAT Blue
## 2 1976 ALASKA 2 REPUBLICAN Red
## 3 1976 ARIZONA 4 REPUBLICAN Red
## 4 1976 ARKANSAS 5 DEMOCRAT Blue
## 5 1976 CALIFORNIA 6 REPUBLICAN Red
## 6 1976 COLORADO 8 REPUBLICAN Red
library(dplyr)
library(maps)
candidatevotes <- NewpresidentData[NewpresidentData$year==2000, c("state_fips", "color")]
candidatevotes <- candidatevotes[match(paste(state.fips$fips), paste(candidatevotes$state_fips), paste(candidatevotes$color)),]
par(mfrow=c(1,1), mar=c(0,0,0,0))
map("state", col=candidatevotes$color, fill=TRUE)
title("Presidential election results by State in 2000",
adj = 0.25, line = 1)
legend("bottom", legend=c("Democratic","Republican", "Other"),
col=c("black","black","black"), pch=c(22, 22, 22), pt.cex=1.5,
horiz=TRUE, cex=0.75, border=TRUE, box.lty=1, ncol=1,
title=NULL,
pt.bg=adjustcolor(c('blue', 'red', 'purple')))
Making map loop for presidential candidates
#Map looping for year 1976 to 2020
library(dplyr)
library(maps)
years= seq(1976,2020,by =4)
par(mfrow=c(4,3), mar=c(0,0,0,0))
for(i in years) {
candidatevotes <- NewpresidentData[NewpresidentData$year==i, c("state_fips", "party", "color")]
candidatevotes <- candidatevotes[match(paste(state.fips$fips), paste(candidatevotes$state_fips), paste(candidatevotes$color)),]
map("state", col=candidatevotes$color, fill=TRUE, main= "Presidential election results by state from 1996 to 2020")
mtext(i,side=3,line=1)
}
library(leaflet)
myMap <- leaflet() %>%
addProviderTiles(providers$OpenStreetMap)
myMap
Go to data.humdata.org and download the shapefile for Rohingya refugee camps.
library(sf)
sp <- read_sf("/Users/twinkleroy/Downloads/200908_RRC_Outline_Block_AL2/200908_RRC_Outline_Block_AL2.shp")
campShapeFile <- sp %>%
st_transform(4326)
head(campShapeFile)
## Simple feature collection with 6 features and 10 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 92.13366 ymin: 21.2007 xmax: 92.13933 ymax: 21.21256
## CRS: 4326
## # A tibble: 6 x 11
## Block_Let Camp_SSID Block_Name Block_SSID SMSD_Cname Camp_Alias NPM_Cname
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 I CXB-232 C04X_I CXB-232_I… Camp 04X Camp 4 Ex… Camp 04 …
## 2 B CXB-232 C04X_B CXB-232_B… Camp 04X Camp 4 Ex… Camp 04 …
## 3 F CXB-232 C04X_F CXB-232_F… Camp 04X Camp 4 Ex… Camp 04 …
## 4 C CXB-232 C04X_C CXB-232_C… Camp 04X Camp 4 Ex… Camp 04 …
## 5 E CXB-232 C04X_E CXB-232_E… Camp 04X Camp 4 Ex… Camp 04 …
## 6 H CXB-232 C04X_H CXB-232_H… Camp 04X Camp 4 Ex… Camp 04 …
## # … with 4 more variables: Area_Acres <dbl>, CampName <chr>, Area_SqM <chr>,
## # geometry <MULTIPOLYGON [°]>
Zoom the map onto South-Eastern Bangladesh (setView(92.14871, 21.18780, zoom = 12)) and add the shapefile to the map as follows:
library(leaflet)
leaflet() %>%
addProviderTiles(providers$OpenStreetMap) %>%
setView(92.14871, 21.18780, zoom = 12) %>%
addPolygons(data=campShapeFile, fill=TRUE, stroke=TRUE, weight=1)
Add some highlights and labels
leaflet() %>%
addProviderTiles(providers$Esri) %>%
setView(92.14871, 21.18780, zoom = 12) %>%
addPolygons(data=campShapeFile, fill=TRUE, stroke=TRUE, weight=1, highlight = highlightOptions(fillOpacity = 0.7), label = campShapeFile$Block_No)
Adding tiles on a new leaflet map of the USA.
mapUSA = map("state", fill=T, plot=F)
leaflet() %>%
addPolygons(data=mapUSA, fill=TRUE, stroke=TRUE, weight=1) %>%
addWMSTiles(
"http://mesonet.agron.iastate.edu/cgi-bin/wms/nexrad/n0r.cgi",
layers = "nexrad-n0r-900913",
options = WMSTileOptions(format = "image/png", transparent = TRUE),
attribution = "Weather data © 2012 IEM Nexrad"
)
Go to [this page] (https://rstudio.github.io/leaflet/choropleths.html)