Data Management

sejong.poll<-read.table("sejong_poll.txt",header=TRUE,sep="")
str(sejong.poll)
## 'data.frame':    44 obs. of  4 variables:
##  $ counts: int  21 194 259 393 443 117 1123 71 29 5 ...
##  $ vote  : chr  "yes" "no" "yes" "no" ...
##  $ class : chr  "high" "high" "third.current" "third.current" ...
##  $ region: chr  "Seoul" "Seoul" "Seoul" "Seoul" ...
sejong.poll
##    counts vote         class      region
## 1      21  yes          high       Seoul
## 2     194   no          high       Seoul
## 3     259  yes third.current       Seoul
## 4     393   no third.current       Seoul
## 5     443  yes      third.ex       Seoul
## 6     117   no      third.ex       Seoul
## 7    1123  yes      ordinary        yuhu
## 8      71   no      ordinary        yuhu
## 9      29  yes         chief     gyunggi
## 10      5   no         chief     gyunggi
## 11  17076  yes      ordinary     gyunggi
## 12    236   no      ordinary     gyunggi
## 13      1   no          high     pyungan
## 14      6  yes         chief     pyungan
## 15     35   no         chief     pyungan
## 16   1326  yes      ordinary     pyungan
## 17  28474   no      ordinary     pyungan
## 18     17  yes         chief    hwanghae
## 19     17   no         chief    hwanghae
## 20   4454  yes      ordinary    hwanghae
## 21  15601   no      ordinary    hwanghae
## 22      2   no          high chungcheong
## 23     35  yes         chief chungcheong
## 24     26   no         chief chungcheong
## 25   6982  yes      ordinary chungcheong
## 26  14013   no      ordinary chungcheong
## 27      5  yes         chief     kangwon
## 28     10   no         chief     kangwon
## 29    939  yes      ordinary     kangwon
## 30   6888   no      ordinary     kangwon
## 31      1   no          high      hamgil
## 32      3  yes         chief      hamgil
## 33     14   no         chief      hamgil
## 34     75  yes      ordinary      hamgil
## 35   7387   no      ordinary      hamgil
## 36     55  yes         chief   gyungsang
## 37     16   no         chief   gyungsang
## 38  36262  yes      ordinary   gyungsang
## 39    377   no      ordinary   gyungsang
## 40      2   no          high      jeolla
## 41     42  yes         chief      jeolla
## 42     12   no         chief      jeolla
## 43  29505  yes      ordinary      jeolla
## 44    257   no      ordinary      jeolla
sejong.poll.2<-sejong.poll
sejong.poll.2$vote<-factor(sejong.poll.2$vote, levels=c("yes","no"), labels=c("yes","no"))
str(sejong.poll.2)
## 'data.frame':    44 obs. of  4 variables:
##  $ counts: int  21 194 259 393 443 117 1123 71 29 5 ...
##  $ vote  : Factor w/ 2 levels "yes","no": 1 2 1 2 1 2 1 2 1 2 ...
##  $ class : chr  "high" "high" "third.current" "third.current" ...
##  $ region: chr  "Seoul" "Seoul" "Seoul" "Seoul" ...
sejong.poll.2$class<-factor(sejong.poll.2$class, levels=c("high","third.current", "third.ex", "chief", "ordinary"), labels=c("High","3rd.current", "3rd.former", "Chief", "Commons"))
sejong.poll.2$region<-factor(sejong.poll.2$region, levels=c("Seoul","yuhu", "gyunggi", "pyungan", "hwanghae", "chungcheong", "kangwon", "hamgil", "gyungsang", "jeolla"), labels=c("Seoul","Yuhu", "Gyunggi", "Pyungan", "Hwanghae", "Chungcheong", "Kangwon", "Hamgil", "Gyungsang", "Jeolla"))
str(sejong.poll.2)
## 'data.frame':    44 obs. of  4 variables:
##  $ counts: int  21 194 259 393 443 117 1123 71 29 5 ...
##  $ vote  : Factor w/ 2 levels "yes","no": 1 2 1 2 1 2 1 2 1 2 ...
##  $ class : Factor w/ 5 levels "High","3rd.current",..: 1 1 2 2 3 3 5 5 4 4 ...
##  $ region: Factor w/ 10 levels "Seoul","Yuhu",..: 1 1 1 1 1 1 2 2 3 3 ...
sejong.poll.2$color[sejong.poll.2$vote=="yes"]<-"cyan"
sejong.poll.2$color[sejong.poll.2$vote=="no"]<-"red"
options(digits=3)
xtabs(counts~vote, data=sejong.poll.2)
## vote
##   yes    no 
## 98657 74149
prop.table(xtabs(counts~vote, data=sejong.poll.2))
## vote
##   yes    no 
## 0.571 0.429
pie(xtabs(counts~vote, data=sejong.poll.2), col=sejong.poll.2$color)
title(main="Overall Yes or No")
text(x=0, y=c(0.4,-0.4), labels=c("98657", "74149"))

xtabs(counts~vote+class, data=sejong.poll.2)
##      class
## vote   High 3rd.current 3rd.former Chief Commons
##   yes    21         259        443   192   97742
##   no    200         393        117   135   73304
sejong.poll.2$class.2<-ifelse(sejong.poll.2$class=="Commons", "Commons", "Bureaus")
str(sejong.poll.2)
## 'data.frame':    44 obs. of  6 variables:
##  $ counts : int  21 194 259 393 443 117 1123 71 29 5 ...
##  $ vote   : Factor w/ 2 levels "yes","no": 1 2 1 2 1 2 1 2 1 2 ...
##  $ class  : Factor w/ 5 levels "High","3rd.current",..: 1 1 2 2 3 3 5 5 4 4 ...
##  $ region : Factor w/ 10 levels "Seoul","Yuhu",..: 1 1 1 1 1 1 2 2 3 3 ...
##  $ color  : chr  "cyan" "red" "cyan" "red" ...
##  $ class.2: chr  "Bureaus" "Bureaus" "Bureaus" "Bureaus" ...
xtabs(counts~vote+class.2, data=sejong.poll.2)
##      class.2
## vote  Bureaus Commons
##   yes     915   97742
##   no      845   73304
addmargins(xtabs(counts~vote+class.2, data=sejong.poll.2))
##      class.2
## vote  Bureaus Commons    Sum
##   yes     915   97742  98657
##   no      845   73304  74149
##   Sum    1760  171046 172806
prop.table(xtabs(counts~vote+class.2, data=sejong.poll.2), margin=2)
##      class.2
## vote  Bureaus Commons
##   yes   0.520   0.571
##   no    0.480   0.429
attach(sejong.poll.2)
par(mfrow=c(1,2))
pie(xtabs(counts~vote+class.2, data=sejong.poll.2[class.2=="Bureaus",], drop=T), labels=c("yes", "no"), col=color)
title(main="Bureacrats by vote")
text(x=0, y=c(0.4,-0.4), labels=c("915", "845"))
pie(xtabs(counts~vote+class.2, data=sejong.poll.2[class.2=="Commons",], drop=T), labels=c("yes", "no"), col=color)
title(main="Commons by vote")
text(x=0, y=c(0.4,-0.4), labels=c("97742", "73304"))

par(mfrow=c(1,1))
xtabs(counts~vote+region, data=sejong.poll.2[class.2=="Bureaus",], drop=T)
##      region
## vote  Seoul Gyunggi Pyungan Hwanghae Chungcheong Kangwon Hamgil Gyungsang
##   yes   723      29       6       17          35       5      3        55
##   no    704       5      36       17          28      10     15        16
##      region
## vote  Jeolla
##   yes     42
##   no      14
xtabs(counts~vote+region, data=sejong.poll.2[class.2=="Commons",], drop=T)
##      region
## vote   Yuhu Gyunggi Pyungan Hwanghae Chungcheong Kangwon Hamgil Gyungsang
##   yes  1123   17076    1326     4454        6982     939     75     36262
##   no     71     236   28474    15601       14013    6888   7387       377
##      region
## vote  Jeolla
##   yes  29505
##   no     257
xtabs(counts~vote+class, data=sejong.poll.2[region=="Seoul",], drop=T)
##      class
## vote  High 3rd.current 3rd.former
##   yes   21         259        443
##   no   194         393        117
barplot(xtabs(counts~vote+class, data=sejong.poll.2[region=="Seoul",], drop=T), col=color)
title(main="Seoul by vote")
text(x=c(0.7, 1.9, 1.9, 3.1, 3.1), y=c(120, 450, 135, 500, 220), labels=c("194","393", "259", "117", "443"))
legend("topleft", inset=0.05, fill=c("cyan", "red"), legend=c("yes", "no"))

mosaicplot(xtabs(counts~class+vote, data=sejong.poll.2[region=="Seoul",], drop=T), col=color, main="Seoul by vote")

xtabs(counts~vote+region, data=sejong.poll.2[class.2=="Bureaus" & !region=="Seoul",], drop=T)
##      region
## vote  Gyunggi Pyungan Hwanghae Chungcheong Kangwon Hamgil Gyungsang Jeolla
##   yes      29       6       17          35       5      3        55     42
##   no        5      36       17          28      10     15        16     14
barplot(xtabs(counts~vote+region, data=sejong.poll.2[class.2=="Bureaus" & !region=="Seoul",], drop=T), col=color)
title(main="Bureacrats' vote by region other than Seoul")
legend("topleft", inset=0.05, fill=c("cyan", "red"), legend=c("yes", "no"))

mosaicplot(xtabs(counts~region+vote, data=sejong.poll.2[class.2=="Bureaus" & !region=="Seoul",], drop=T), col=color, main="")
title(main="Bureacrats' vote by region other than Seoul")

xtabs(counts~vote+region, data=sejong.poll.2[class.2=="Commons",], drop=T)
##      region
## vote   Yuhu Gyunggi Pyungan Hwanghae Chungcheong Kangwon Hamgil Gyungsang
##   yes  1123   17076    1326     4454        6982     939     75     36262
##   no     71     236   28474    15601       14013    6888   7387       377
##      region
## vote  Jeolla
##   yes  29505
##   no     257
barplot(xtabs(counts~vote+region, data=sejong.poll.2[class.2=="Commons",], drop=T), col=color)
title(main="Commons' vote by region")
legend("topleft", inset=0.05, fill=c("cyan", "red"), legend=c("yes", "no"))

mosaicplot(xtabs(counts~region+vote, data=sejong.poll.2[class.2=="Commons",], drop=T), col=color, main="Commons' votes by region")

xtabs(counts~vote+class, data=sejong.poll.2[region=="Chungcheong",], drop=T)
##      class
## vote   High Chief Commons
##   yes     0    35    6982
##   no      2    26   14013
prop.table(xtabs(counts~vote+class, data=sejong.poll.2[region=="Chungcheong",], drop=T), margin=2)
##      class
## vote   High Chief Commons
##   yes 0.000 0.574   0.333
##   no  1.000 0.426   0.667
barplot(prop.table(xtabs(counts~vote+class, data=sejong.poll.2[region=="Chungcheong",], drop=T), margin=2), col=color, ylim=c(0, 1.5), axes=F)
axis(side=2, at=c(0, 0.5, 1.0), labels=c("0", "50%", "100%"))
title(main="Chungcheong's vote proportion by class")
legend("topleft", inset=0.05, fill=c("cyan", "red"), legend=c("yes", "no"))
text(x=c(0.7, 1.9, 1.9, 3.1, 3.1), y=c(0.5, 0.3, 0.8, 0.15, 0.65), labels=c(2, 35, 26, 6982, 14013))

mosaicplot(xtabs(counts~class+vote, data=sejong.poll.2[region=="Chungcheong",], drop=T), col=color, main="")
title(main="Chungcheong's vote")

save.image(file="sejong_poll0328.rda")
savehistory(file="sejong_poll0328.Rhistory")
q("no")