Dataset Import and summary

getwd()
## [1] "\\\\vrisi01/users$/oadesanya/Documents"
LWData <-read.csv("LWV_Data.csv")
LWData1 <- data.frame(LWData)
LWPop <-LWData1[!is.na(LWData1$control),]
head(LWPop, 20)
##     VOTED2014 Young.Hispanic.Status ID.Number Voter.Status Voted.11.2012
## 39          0               non_y_h      5461            A             1
## 51          0               non_y_h      6832            A             0
## 217         0           non_y_non_h     16298            A             0
## 313         1               non_y_h     20802            A             0
## 370         0           non_y_non_h     23641            A             0
## 374         0           non_y_non_h     23821            A             0
## 406         0           non_y_non_h     25164            A             0
## 432         0           non_y_non_h     27016            A             0
## 450         1           non_y_non_h     28260            A             0
## 455         0           non_y_non_h     28523            A             0
## 465         0           non_y_non_h     28929            A             0
## 487         1               non_y_h     31178            A             0
## 531         0           non_y_non_h     34400            A             0
## 539         0           non_y_non_h     35136            A             0
## 546         1           non_y_non_h     36004            A             0
## 605         0           non_y_non_h     40437            A             0
## 615         0           non_y_non_h     40970            A             0
## 779         0               non_y_h     55715            A             1
## 861         0           non_y_non_h     61902            A             0
## 871         0           non_y_non_h     63126            A             0
##     Voted.Gen..Elec..09.2010 Voted.Gen..Elec..07.2008
## 39                         0                        0
## 51                         0                        0
## 217                        0                        1
## 313                        0                        1
## 370                        1                        0
## 374                        0                        1
## 406                        0                        1
## 432                        1                        0
## 450                        0                        0
## 455                        0                        0
## 465                        0                        0
## 487                        0                        1
## 531                        0                        0
## 539                        0                        1
## 546                        0                        1
## 605                        0                        0
## 615                        0                        0
## 779                        0                        0
## 861                        0                        0
## 871                        0                        0
##     Number.General.Elections Hispanic.Surname Young.Voter Eligible.2012
## 39                         1                1           0             1
## 51                         0                1           0             1
## 217                        1                0           0             1
## 313                        1                1           0             1
## 370                        1                0           0             1
## 374                        1                0           0             1
## 406                        1                0           0             1
## 432                        1                0           0             1
## 450                        0                0           0             1
## 455                        0                0           0             1
## 465                        0                0           0             1
## 487                        1                1           0             1
## 531                        0                0           0             1
## 539                        1                0           0             1
## 546                        1                0           0             1
## 605                        0                0           0             1
## 615                        0                0           0             1
## 779                        1                1           0             1
## 861                        0                0           0             1
## 871                        0                0           0             1
##     Eligible.2010 Eligible.2008 Young.in.2012 Young.in.2010 Young.in.2008
## 39              1             1             0             0             0
## 51              1             1             0             0             0
## 217             1             1             0             0             0
## 313             1             1             0             0             0
## 370             1             1             0             0             0
## 374             1             1             0             0             0
## 406             1             1             0             0             0
## 432             1             1             0             0             0
## 450             1             1             0             0             0
## 455             1             1             0             0             0
## 465             1             1             0             0             0
## 487             1             1             0             0             0
## 531             1             1             0             0             0
## 539             1             1             0             0             0
## 546             1             1             0             0             0
## 605             1             1             0             0             0
## 615             1             1             0             0             0
## 779             1             1             0             0             0
## 861             1             1             0             0             0
## 871             1             1             0             0             0
##       Voter.Category             type    ID control post flyer LOWPROP
## 39      Old Hispanic     Non_y_h_POST  5461       0    1     0       1
## 51      Old Hispanic     Non_y_h_POST  6832       0    1     0       1
## 217 Old Not Hispanic Non_y_non_h_POST 16298       0    1     0       1
## 313     Old Hispanic     Non_y_h_POST 20802       0    1     0       1
## 370 Old Not Hispanic Non_y_non_h_POST 23641       0    1     0       1
## 374 Old Not Hispanic Non_y_non_h_POST 23821       0    1     0       1
## 406 Old Not Hispanic Non_y_non_h_POST 25164       0    1     0       1
## 432 Old Not Hispanic Non_y_non_h_POST 27016       0    1     0       1
## 450 Old Not Hispanic Non_y_non_h_POST 28260       0    1     0       1
## 455 Old Not Hispanic Non_y_non_h_POST 28523       0    1     0       1
## 465 Old Not Hispanic Non_y_non_h_POST 28929       0    1     0       1
## 487     Old Hispanic     Non_y_h_POST 31178       0    1     0       1
## 531 Old Not Hispanic Non_y_non_h_POST 34400       0    1     0       1
## 539 Old Not Hispanic Non_y_non_h_POST 35136       0    1     0       1
## 546 Old Not Hispanic Non_y_non_h_POST 36004       0    1     0       1
## 605 Old Not Hispanic Non_y_non_h_POST 40437       0    1     0       1
## 615 Old Not Hispanic Non_y_non_h_POST 40970       0    1     0       1
## 779     Old Hispanic     Non_y_h_POST 55715       0    1     0       1
## 861 Old Not Hispanic Non_y_non_h_POST 61902       0    1     0       1
## 871 Old Not Hispanic Non_y_non_h_POST 63126       0    1     0       1
##              city   zip U_S__CONGRESS byear
## 39     CARROLLTON 75007            24  1937
## 51     CARROLLTON 75006            24  1911
## 217       GARLAND 75042            32  1922
## 313       GARLAND 75040            32  1938
## 370       GARLAND 75041            32  1911
## 374       GARLAND 75041            32  1927
## 406       GARLAND 75043            32  1926
## 432       GARLAND 75040            32  1928
## 450       GARLAND 75040            32  1944
## 455       GARLAND 75040            32  1931
## 465       GARLAND 75044            32  1941
## 487       GARLAND 75041            32  1911
## 531       GARLAND 75041            32  1911
## 539    RICHARDSON 75081            32  1936
## 546      MESQUITE 75149             5  1939
## 605       GARLAND 75043            32  1938
## 615       GARLAND 75043            32  1911
## 779 GRAND PRAIRIE 75050            33  1944
## 861 GRAND PRAIRIE 75051            33  1954
## 871 GRAND PRAIRIE 75051            33  1909
summary(LWPop)
##    VOTED2014      Young.Hispanic.Status   ID.Number       Voter.Status
##  Min.   :0.0000   non_y_h    :6416      Min.   :   5461   A:24000     
##  1st Qu.:0.0000   non_y_non_h:6499      1st Qu.:3410570               
##  Median :0.0000   y_h        :5584      Median :3870405               
##  Mean   :0.1238   y_non_h    :5501      Mean   :3604325               
##  3rd Qu.:0.0000                         3rd Qu.:4067789               
##  Max.   :1.0000                         Max.   :4216505               
##                                                                       
##  Voted.11.2012    Voted.Gen..Elec..09.2010 Voted.Gen..Elec..07.2008
##  Min.   :0.0000   Min.   :0.000000         Min.   :0.0000          
##  1st Qu.:0.0000   1st Qu.:0.000000         1st Qu.:0.0000          
##  Median :0.0000   Median :0.000000         Median :0.0000          
##  Mean   :0.2317   Mean   :0.009583         Mean   :0.1155          
##  3rd Qu.:0.0000   3rd Qu.:0.000000         3rd Qu.:0.0000          
##  Max.   :1.0000   Max.   :1.000000         Max.   :1.0000          
##                                                                    
##  Number.General.Elections Hispanic.Surname  Young.Voter    
##  Min.   :0.0000           Min.   :0.0      Min.   :0.0000  
##  1st Qu.:0.0000           1st Qu.:0.0      1st Qu.:0.0000  
##  Median :0.0000           Median :0.5      Median :0.0000  
##  Mean   :0.3568           Mean   :0.5      Mean   :0.4619  
##  3rd Qu.:1.0000           3rd Qu.:1.0      3rd Qu.:1.0000  
##  Max.   :1.0000           Max.   :1.0      Max.   :1.0000  
##                                                            
##  Eligible.2012    Eligible.2010    Eligible.2008    Young.in.2012 
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.00  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00  
##  Median :1.0000   Median :1.0000   Median :0.0000   Median :1.00  
##  Mean   :0.7338   Mean   :0.5009   Mean   :0.4216   Mean   :0.54  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.00  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.00  
##                                                                   
##  Young.in.2010    Young.in.2008               Voter.Category
##  Min.   :0.0000   Min.   :0.0000   Old Hispanic      :6416  
##  1st Qu.:0.0000   1st Qu.:0.0000   Old Not Hispanic  :6499  
##  Median :1.0000   Median :1.0000   Young Hispanic    :5584  
##  Mean   :0.5756   Mean   :0.6071   Young Not Hispanic:5501  
##  3rd Qu.:1.0000   3rd Qu.:1.0000                            
##  Max.   :1.0000   Max.   :1.0000                            
##                                                             
##                   type             ID             control      
##  Non_y_h_CONTROL    : 2000   Min.   :   5461   Min.   :0.0000  
##  Non_y_h_FLYER      : 2000   1st Qu.:3410570   1st Qu.:0.0000  
##  Non_y_h_POST       : 2000   Median :3870405   Median :0.0000  
##  Non_y_non_h_CONTROL: 2000   Mean   :3604325   Mean   :0.3333  
##  Non_y_non_h_FLYER  : 2000   3rd Qu.:4067789   3rd Qu.:1.0000  
##  Non_y_non_h_POST   : 2000   Max.   :4216505   Max.   :1.0000  
##  (Other)            :12000                                     
##       post            flyer           LOWPROP             city      
##  Min.   :0.0000   Min.   :0.0000   Min.   :1   DALLAS       :12271  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:1   GARLAND      : 2190  
##  Median :0.0000   Median :0.0000   Median :1   IRVING       : 1907  
##  Mean   :0.3333   Mean   :0.3333   Mean   :1   MESQUITE     : 1381  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1   GRAND PRAIRIE: 1333  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1   RICHARDSON   :  657  
##                                                (Other)      : 4261  
##       zip        U_S__CONGRESS       byear     
##  Min.   :75001   Min.   : 5.00   Min.   :1908  
##  1st Qu.:75062   1st Qu.:30.00   1st Qu.:1967  
##  Median :75204   Median :30.00   Median :1984  
##  Mean   :75155   Mean   :27.21   Mean   :1977  
##  3rd Qu.:75223   3rd Qu.:32.00   3rd Qu.:1990  
##  Max.   :75254   Max.   :33.00   Max.   :1996  
## 

The first 100 rows from the population shows that majority of the voters are old and non hispanic people. The popluation does not reflect sample representation of young and hispanc voters

Descriptive statistics

library(pastecs) #descriptive statistics package
## Warning: package 'pastecs' was built under R version 3.3.1
## Loading required package: boot
options(scipen =  100) #change format display for descriptive statistics
options (digits = 2) #change format display for descriptive statistics
stat.desc(LWPop, basic = F) #Basic Descriptive statistics of the population
##              VOTED2014 Young.Hispanic.Status      ID.Number Voter.Status
## median          0.0000                    NA      3870405.0           NA
## mean            0.1238                    NA      3604325.4           NA
## SE.mean         0.0021                    NA         4603.4           NA
## CI.mean.0.95    0.0042                    NA         9023.0           NA
## var             0.1085                    NA 508600932920.5           NA
## std.dev         0.3294                    NA       713162.6           NA
## coef.var        2.6600                    NA            0.2           NA
##              Voted.11.2012 Voted.Gen..Elec..09.2010
## median              0.0000                  0.00000
## mean                0.2317                  0.00958
## SE.mean             0.0027                  0.00063
## CI.mean.0.95        0.0053                  0.00123
## var                 0.1780                  0.00949
## std.dev             0.4219                  0.09743
## coef.var            1.8210                 10.16623
##              Voted.Gen..Elec..07.2008 Number.General.Elections
## median                         0.0000                   0.0000
## mean                           0.1155                   0.3568
## SE.mean                        0.0021                   0.0031
## CI.mean.0.95                   0.0040                   0.0061
## var                            0.1022                   0.2295
## std.dev                        0.3196                   0.4791
## coef.var                       2.7674                   1.3427
##              Hispanic.Surname Young.Voter Eligible.2012 Eligible.2010
## median                 0.5000      0.0000        1.0000        1.0000
## mean                   0.5000      0.4619        0.7338        0.5009
## SE.mean                0.0032      0.0032        0.0029        0.0032
## CI.mean.0.95           0.0063      0.0063        0.0056        0.0063
## var                    0.2500      0.2486        0.1954        0.2500
## std.dev                0.5000      0.4986        0.4420        0.5000
## coef.var               1.0000      1.0794        0.6024        0.9983
##              Eligible.2008 Young.in.2012 Young.in.2010 Young.in.2008
## median              0.0000        1.0000        1.0000        1.0000
## mean                0.4216        0.5400        0.5756        0.6071
## SE.mean             0.0032        0.0032        0.0032        0.0032
## CI.mean.0.95        0.0062        0.0063        0.0063        0.0062
## var                 0.2439        0.2484        0.2443        0.2385
## std.dev             0.4938        0.4984        0.4943        0.4884
## coef.var            1.1713        0.9230        0.8586        0.8044
##              Voter.Category type             ID control  post flyer
## median                   NA   NA      3870405.0   0.000 0.000 0.000
## mean                     NA   NA      3604325.4   0.333 0.333 0.333
## SE.mean                  NA   NA         4603.4   0.003 0.003 0.003
## CI.mean.0.95             NA   NA         9023.0   0.006 0.006 0.006
## var                      NA   NA 508600932920.5   0.222 0.222 0.222
## std.dev                  NA   NA       713162.6   0.471 0.471 0.471
## coef.var                 NA   NA            0.2   1.414 1.414 1.414
##              LOWPROP city        zip U_S__CONGRESS     byear
## median             1   NA 75204.0000        30.000 1983.5000
## mean               1   NA 75154.9353        27.211 1977.1323
## SE.mean            0   NA     0.5120         0.057    0.1069
## CI.mean.0.95       0   NA     1.0035         0.111    0.2094
## var                0   NA  6290.8031        77.243  274.0387
## std.dev            0   NA    79.3146         8.789   16.5541
## coef.var           0   NA     0.0011         0.323    0.0084

This shows the count of old and young people included in the population. The result shows more old people constitute the population.

Counts of Population

table(LWPop$Young.Hispanic.Status) #to do a count of young voters
## 
##     non_y_h non_y_non_h         y_h     y_non_h 
##        6416        6499        5584        5501
table(LWPop$Young.Voter)  #sum of all young voter compared to old voters
## 
##     0     1 
## 12915 11085
table(LWPop$Voter.Category) #to do a count of young voter
## 
##       Old Hispanic   Old Not Hispanic     Young Hispanic 
##               6416               6499               5584 
## Young Not Hispanic 
##               5501
table(LWPop$control) #sample population for control group
## 
##     0     1 
## 16000  8000
table(LWPop$post) #sample population for post group
## 
##     0     1 
## 16000  8000
table(LWPop$flyer) #sample population for flyer group
## 
##     0     1 
## 16000  8000
table(LWPop$VOTED2014) #number of people who voted from the population of 24000
## 
##     0     1 
## 21028  2972

Here, we plotted the sample data and the whole data to compare our young and hispanic voters representation.The majority voters in the whole data are non young and non hispanic. This is not a representative of what the researcher will like to perform.

Plot

plot(LWPop$Young.Hispanic.Status) #Graphical representation to validate Sampling error for sample population

plot(LWData1$Young.Hispanic.Status) #Graphical representation of all population