path <- "C:/users/Steven/Desktop/2016_presidential_election_2016-02-26-02-28-37/2016_presidential_election";
filepath<-dir(path)
filepath2<-filepath[grep("\\.csv$",filepath)]#regular pattern
csv.path <- sapply(filepath2,function(names) paste(path,names,sep='/')) #the second parameter is a function written in the bracket,take in things from the first parameter
#no engo bracket needed
csv<- sapply(csv.path, function(csv) read.csv(csv))
filepath2
## [1] "county_facts.csv" "county_facts_dictionary.csv"
## [3] "primary_results.csv"
result<-csv$primary_results.csv
fact<-csv$county_facts.csv
fact.dict<-csv$county_facts_dictionary.csv
library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
## Loading required package: DBI
GOP.candi<- sqldf("select distinct candidate from result where party='Republican'")
## Loading required package: tcltk
DEMO.candi<-sqldf("select distinct candidate from result where party='Democrat'")
candidate<-rbind(GOP.candi,DEMO.candi)
county<-sqldf("select distinct area_name from fact where state_abbreviation !=''")
countNA<-sqldf("select count(fraction_votes) from result where fraction_votes=''")#0. means all the fraction data are available
#RHI725214 Hispanic or Latino, percent, 2014
#EDU685213 Bachelor's degree or higher, percent of persons age
#PVY020213 Persons below poverty level, percent, 2009-2013
#VET605213 Veterans, 2009-2013
#AGE775214 Persons 65 years and over, percent, 2014
#SBO415207 Hispanic-owned firms, percent, 2007
result1<-sqldf("select fraction_votes,county from result where candidate='Donald Trump'")
colnames(result1)<-c("Trump","county")
result2<-sqldf("select fraction_votes,county from result where candidate='Ted Cruz'")
colnames(result2)<-c("Cruz","county")
result3<-sqldf("select fraction_votes,county from result where candidate='Marco Rubio'")
colnames(result3)<-c("Rubio","county")
fact1<-sqldf("select area_name,state_abbreviation,RHI725214,EDU685213,PVY020213,VET605213,AGE775214,SBO415207 from fact where state_abbreviation!=''")
fact1$county <- sub(pattern=" County", replacement="", fact1$area_name)
raw<-sqldf("select * from fact1 a inner join result1 b on a.county=b.county")
raw<-raw[c(-1,-11)]
head(raw)
## state_abbreviation RHI725214 EDU685213 PVY020213 VET605213 AGE775214
## 1 AL 1.2 14.0 28.4 1497 18.0
## 2 AL 3.5 16.1 21.9 11385 16.0
## 3 AL 3.5 16.1 21.9 11385 16.0
## 4 AL 1.5 12.8 21.2 2174 20.9
## 5 AL 1.5 12.8 21.2 2174 20.9
## 6 AL 1.3 11.2 29.3 1583 18.0
## SBO415207 county Trump
## 1 0.0 Butler 0.242
## 2 0.5 Calhoun 0.244
## 3 0.5 Calhoun 0.352
## 4 0.0 Cherokee 0.296
## 5 0.0 Cherokee 0.420
## 6 0.0 Clarke 0.363
raw2<-sqldf("select * from raw a inner join result2 b on a.county=b.county")
raw2<-raw2[-11]
raw3 <- sqldf("select * from raw2 a inner join result3 b on a.county=b.county")
raw3<-raw3[-12]
final<-raw3
par(cex=0.5)
plot(final$RHI725214/100,final$Trump,xlab = "hispanic ratio",ylab = "voting ratio",col="blue")
lines(final$RHI725214/100,final$Cruz,type = "p",col="red")
lines(final$RHI725214/100,final$Rubio,type = "p",col="green3")
legend("topright",legend = c("Trump","Cruz","Rubio"),fill = c("blue","red","green3"))

plot(final$PVY020213/100,final$Trump,xlab = "poverty ratio",ylab = "voting ratio",col="blue")
lines(final$PVY020213/100,final$Cruz,type = "p",col="red")
lines(final$PVY020213/100,final$Rubio,type = "p",col="green3")
legend("topright",legend = c("Trump","Cruz","Rubio"),fill = c("blue","red","green3"))
