# Plot ME
# - individual data and venue data
# - match based on:
# - D (individual file, venueLocation) and F (venue file, venueLocationOld) columns, and
# - C (individual file, venueName) and B (venue file, venueNameOld) columns
# - from the venue file we need the new name and the new location in the final set, and the confident information
# - from the individual file we need everything
# working folder location R:/ISGMH/EDIT/Projects/PLoT ME/Data/PLoT ME Network Data/NetworkData/2modeData
# output file: plotMe2modeData.csv
# (1) packages
library(car)
library(data.table)
library(dplyr)
## Warning: Installed Rcpp (0.12.10) different from Rcpp used to build dplyr (0.12.11).
## Please reinstall dplyr to avoid random crashes or undefined behavior.
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# (2) wd
rm(list=ls())
whoareyou <- ""
root <- paste0("R:/",whoareyou,"/ISGMH/EDIT/Projects/PLoT ME/Data/PLoT ME Network Data/NetworkData/2modeData")
setwd(root)
# getwd()
# list.files()
# (3) data
# venue data
dv <- read.csv("PLoT ME - unique venues - CLEAN - 08.18.17.csv")
dv <- select(dv, venueNameOld, venueNameNew, venueLocationOld,venueLocationNew, nameConfident, locationConfident) # keep what we actually need
# individual (and venue) data
di <- read.csv("venueEdgeList.csv")
naI <- is.na(di)
sum(naI)
## [1] 0
naV <- is.na(di)
sum(naV)
## [1] 0
# (4) merge
# complete merge
ddAll <- merge(di, dv, by.x = c("venueName", "venueLocation"), by.y = c("venueNameOld", "venueLocationOld"), all.x = T, all.y = T)
# some data structure check
ddAll <- ddAll[with(ddAll, order(venueLocationNew)), ]
ddAll <- ddAll[with(ddAll, order(radarId)), ]
naAll <- is.na(ddAll)
sum(naAll)
## [1] 0
# some recoding
ddAll$nameConfident <- as.character(ddAll$nameConfident)
ddAll$nameConfident[ddAll$nameConfident == 'Y'] <- 1
ddAll$nameConfident <- as.character(ddAll$nameConfident)
ddAll$nameConfident[ddAll$nameConfident == 'N'] <- 0
ddAll$locationConfident <- as.character(ddAll$locationConfident)
ddAll$locationConfident[ddAll$locationConfident == 'Y'] <- 1
ddAll$locationConfident <- as.character(ddAll$locationConfident)
ddAll$locationConfident[ddAll$locationConfident == 'N'] <- 0
# (5) output
write.csv(ddAll, file = "plotMe2modeData.csv")
# this was exported for additional cleaning
# dToClean <- ddAll[4567:4598,]
# write.csv(dToClean, file = "missingVenues.csv")