getwd()
## [1] "D:/rlang"
list.files()
## [1] "Assigment 2- script.R" "assignment 4.pdf"
## [3] "baltcam1" "best.R"
## [5] "chicagodata" "data"
## [7] "decision_tree.png" "hospital-data.csv"
## [9] "Hospital_Revised_Flatfiles.pdf" "midterm exam"
## [11] "midtermdata" "myfirstrprogram.R"
## [13] "outcome-of-care-measures.csv" "PollingData_Imputed.csv"
## [15] "pollingdatanalaysis.html" "pollingdatanalaysis.Rmd"
## [17] "projectwork.Rmd" "quality.csv"
## [19] "rankall.R" "rpackages"
## [21] "Rplot.pdf" "Rplot01.png"
## [23] "rsconnect" "rtest.R"
## [25] "run_analysis-dd.R" "run_analysis1.R"
## [27] "run_analysis2.R" "secTidySet.txt"
## [29] "specdata" "survey.csv"
## [31] "temp1.html" "temp1.Rmd"
## [33] "test.html" "test.Rmd"
## [35] "testindoc.html" "testindoc.Rmd"
## [37] "testing123.html" "testing123.Rmd"
## [39] "testing456.html" "testing456.Rmd"
## [41] "testing4567.html" "testing4567.Rmd"
tryme = read.table("PollingData_Imputed.csv", sep = ",", header = TRUE)
head(tryme)
## State Year Rasmussen SurveyUSA DiffCount PropR Republican
## 1 Alabama 2004 11 18 5 1 1
## 2 Alabama 2008 21 25 5 1 1
## 3 Alaska 2004 19 21 1 1 1
## 4 Alaska 2008 16 18 6 1 1
## 5 Arizona 2004 5 15 8 1 1
## 6 Arizona 2008 5 3 9 1 1
names(tryme) = c("St", "Yr", "Ras", "SurvUSA", "DC", "PR", "Rep")
head(tryme)
## St Yr Ras SurvUSA DC PR Rep
## 1 Alabama 2004 11 18 5 1 1
## 2 Alabama 2008 21 25 5 1 1
## 3 Alaska 2004 19 21 1 1 1
## 4 Alaska 2008 16 18 6 1 1
## 5 Arizona 2004 5 15 8 1 1
## 6 Arizona 2008 5 3 9 1 1
There is no favorable coefficient that has direct relations Additionally the values of St have to be changed
class(tryme$St)
## [1] "factor"
tryme$St = as.numeric(tryme$St)
head(tryme)
## St Yr Ras SurvUSA DC PR Rep
## 1 1 2004 11 18 5 1 1
## 2 1 2008 21 25 5 1 1
## 3 2 2004 19 21 1 1 1
## 4 2 2008 16 18 6 1 1
## 5 3 2004 5 15 8 1 1
## 6 3 2008 5 3 9 1 1
#install.packages("caTools")
library(caTools)
#set.seed(88)
#split = sample.split(tryme$Rep, SplitRatio = 0.75)
#head(split)
#pollingtrain = subset(tryme, split == TRUE)
#pollingtest = subset(tryme, split == FALSE)
#head(pollingtrain)
#head(pollingtest)
#model = glm(Rep ~ St + Yr + Ras + SurvUSA +DC + PR, data = pollingtrain, family = binomial)
#model
There is no favorable coefficient that has direct relations
#predict(model, pollingtest, type = "response")
5 6 8 10 18 20 28 34 37
1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 2.220446e-16 2.220446e-16 1.000000e+00 2.220446e-16 1.000000e+00 38 39 42 44 45 46 49 54 56 9.998519e-01 1.000000e+00 2.220446e-16 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 2.220446e-16 2.220446e-16 62 64 66 68 71 72 75 76 86 2.220446e-16 4.337903e-11 2.220446e-16 1.000000e+00 1.996228e-08 1.000000e+00 1.000000e+00 1.000000e+00 2.220446e-16 99 102 107 114 115 130 136 143 145 1.000000e+00 2.220446e-16 2.220446e-16 2.220446e-16 1.000000e+00 2.220446e-16 2.220446e-16 2.220446e-16 1.000000e+00
Need to understand this
# install the new package "party"
#install.packages("party")
library(party)
## Loading required package: grid
## Loading required package: mvtnorm
## Loading required package: modeltools
## Loading required package: stats4
## Loading required package: strucchange
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
input.data = tryme[c(1:100),]
png(file = "decision_tree.png")
library(party)
png(file = "decision_tree.png")
output.tree = ctree(Rep ~ St + Yr + Ras + SurvUSA + DC + PR, data = tryme)
plot(output.tree)
dev.off()
## png
## 2