path <- "C:/users/Steven/Desktop/2016_presidential_election_2016-02-26-02-28-37/2016_presidential_election";

filepath<-dir(path)

filepath2<-filepath[grep("\\.csv$",filepath)]#regular pattern

csv.path <- sapply(filepath2,function(names) paste(path,names,sep='/')) #the second parameter is a function written in the bracket,take in things from the first parameter
                                                                        #no engo bracket needed
csv<- sapply(csv.path, function(csv) read.csv(csv))

filepath2
## [1] "county_facts.csv"            "county_facts_dictionary.csv"
## [3] "primary_results.csv"
result<-csv$primary_results.csv
fact<-csv$county_facts.csv
fact.dict<-csv$county_facts_dictionary.csv

library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
## Loading required package: DBI
GOP.candi<- sqldf("select distinct candidate from result where party='Republican'")
## Loading required package: tcltk
DEMO.candi<-sqldf("select distinct candidate from result where party='Democrat'")
candidate<-rbind(GOP.candi,DEMO.candi)

county<-sqldf("select distinct area_name from fact where state_abbreviation !=''")

countNA<-sqldf("select count(fraction_votes) from result where fraction_votes=''")#0. means all the fraction data are available

#RHI725214 Hispanic or Latino, percent, 2014
#EDU685213 Bachelor's degree or higher, percent of persons age
#PVY020213 Persons below poverty level, percent, 2009-2013
#VET605213 Veterans, 2009-2013
#AGE775214 Persons 65 years and over, percent, 2014
#SBO415207 Hispanic-owned firms, percent, 2007

result1<-sqldf("select fraction_votes,county from result where candidate='Donald Trump'")
colnames(result1)<-c("Trump","county")

result2<-sqldf("select fraction_votes,county from result where candidate='Ted Cruz'")
colnames(result2)<-c("Cruz","county")

result3<-sqldf("select fraction_votes,county from result where candidate='Marco Rubio'")
colnames(result3)<-c("Rubio","county")


fact1<-sqldf("select area_name,state_abbreviation,RHI725214,EDU685213,PVY020213,VET605213,AGE775214,SBO415207 from fact where state_abbreviation!=''")
fact1$county <- sub(pattern=" County", replacement="", fact1$area_name) 
raw<-sqldf("select * from fact1 a inner join result1 b on a.county=b.county")
raw<-raw[c(-1,-11)]
head(raw)
##   state_abbreviation RHI725214 EDU685213 PVY020213 VET605213 AGE775214
## 1                 AL       1.2      14.0      28.4      1497      18.0
## 2                 AL       3.5      16.1      21.9     11385      16.0
## 3                 AL       3.5      16.1      21.9     11385      16.0
## 4                 AL       1.5      12.8      21.2      2174      20.9
## 5                 AL       1.5      12.8      21.2      2174      20.9
## 6                 AL       1.3      11.2      29.3      1583      18.0
##   SBO415207   county Trump
## 1       0.0   Butler 0.242
## 2       0.5  Calhoun 0.244
## 3       0.5  Calhoun 0.352
## 4       0.0 Cherokee 0.296
## 5       0.0 Cherokee 0.420
## 6       0.0   Clarke 0.363
raw2<-sqldf("select * from raw a inner join result2 b on a.county=b.county")
raw2<-raw2[-11]
raw3 <- sqldf("select * from raw2 a inner join result3 b on a.county=b.county")
raw3<-raw3[-12]

final<-raw3
par(cex=0.5)
plot(final$RHI725214/100,final$Trump,xlab = "hispanic ratio",ylab = "voting ratio",col="blue")
lines(final$RHI725214/100,final$Cruz,type = "p",col="red")
lines(final$RHI725214/100,final$Rubio,type = "p",col="green3")
legend("topright",legend = c("Trump","Cruz","Rubio"),fill = c("blue","red","green3"))

plot(final$PVY020213/100,final$Trump,xlab = "poverty ratio",ylab = "voting ratio",col="blue")
lines(final$PVY020213/100,final$Cruz,type = "p",col="red")
lines(final$PVY020213/100,final$Rubio,type = "p",col="green3")
legend("topright",legend = c("Trump","Cruz","Rubio"),fill = c("blue","red","green3"))