Vamos a ver algunos detalles de procesamiento de nuestra base de datos

setwd("/Users/mau/Dropbox/Cursos mau/Maestria CEI")
data<-read.csv("BaseIP.csv",header=TRUE)

??que hacemos si no estamos seguros de que tipo de variable es nuestra variable?

str(data$s2)
##  Factor w/ 81 levels "18","19","20",..: 4 7 6 4 7 7 10 1 11 10 ...
str(data$p1)
##  Factor w/ 6 levels "Algo","Mucho",..: 1 5 1 1 2 6 1 1 1 5 ...

Primero vamos a tratar de convertir s2 a numerica sin perder la informacion original Para eso vamos a crear una nueva variable “duplicando” s2

data$edad<-data$s2
str(data$edad)
##  Factor w/ 81 levels "18","19","20",..: 4 7 6 4 7 7 10 1 11 10 ...
table(data$edad)
## 
##    18    19    20    21    22    23    24    25    26    27    28    29 
##   277   255   253   226   241   240   252   226   185   213   241   237 
##    30    31    32    33    34    35    36    37    38    39    40    41 
##   285   154   228   212   191   204   244   216   248   218   335   135 
##    42    43    44    45    46    47    48    49    50    51    52    53 
##   299   207   178   280   165   185   274   200   274   147   228   192 
##    54    55    56    57    58    59    60    61    62    63    64    65 
##   184   151   200   167   177   139   174    95   126   119    85   135 
##    66    67    68    69    70    71    72    73    74    75    76    77 
##    85    87   110    53   117    45    69    53    61    56    64    36 
##    78    79    80    81    82    83    84    85    86    87    88    89 
##    39    36    47    15    14    27    23    15     9    12     5     7 
##    90    91    92    93    94    96    97    98 NS/NC 
##     5     3     2     1     2     2     1     1     1

Vamos a recodificar la variable en grupos de edad

Tenemos que indicar a R que las etiquetas realmente representan valores numericos para eso tenemos que usar otra funcion

data$edad<-as.numeric(as.character(data$s2))
## Warning: NAs introducidos por coerci'on
str(data$edad)
##  num [1:11000] 21 24 23 21 24 24 27 18 28 27 ...
table(data$edad)
## 
##  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35 
## 277 255 253 226 241 240 252 226 185 213 241 237 285 154 228 212 191 204 
##  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53 
## 244 216 248 218 335 135 299 207 178 280 165 185 274 200 274 147 228 192 
##  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71 
## 184 151 200 167 177 139 174  95 126 119  85 135  85  87 110  53 117  45 
##  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89 
##  69  53  61  56  64  36  39  36  47  15  14  27  23  15   9  12   5   7 
##  90  91  92  93  94  96  97  98 
##   5   3   2   1   2   2   1   1

Asi pudimos mantener los valores originales

Ahora si vamos a recodificarla

Podemos recodificar un solo valor

library(car)
data$gedad<-recode(data$edad,"18=19")
table(data$gedad)
## 
##  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36 
## 532 253 226 241 240 252 226 185 213 241 237 285 154 228 212 191 204 244 
##  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54 
## 216 248 218 335 135 299 207 178 280 165 185 274 200 274 147 228 192 184 
##  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72 
## 151 200 167 177 139 174  95 126 119  85 135  85  87 110  53 117  45  69 
##  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90 
##  53  61  56  64  36  39  36  47  15  14  27  23  15   9  12   5   7   5 
##  91  92  93  94  96  97  98 
##   3   2   1   2   2   1   1
277+255
## [1] 532

Tambien podemos recodificar un conjunto de valores…

data$gedad<-recode(data$edad,"c(18,19)=1;c(20,21,22,23,24)=2")
table(data$gedad)
## 
##    1    2   25   26   27   28   29   30   31   32   33   34   35   36   37 
##  532 1212  226  185  213  241  237  285  154  228  212  191  204  244  216 
##   38   39   40   41   42   43   44   45   46   47   48   49   50   51   52 
##  248  218  335  135  299  207  178  280  165  185  274  200  274  147  228 
##   53   54   55   56   57   58   59   60   61   62   63   64   65   66   67 
##  192  184  151  200  167  177  139  174   95  126  119   85  135   85   87 
##   68   69   70   71   72   73   74   75   76   77   78   79   80   81   82 
##  110   53  117   45   69   53   61   56   64   36   39   36   47   15   14 
##   83   84   85   86   87   88   89   90   91   92   93   94   96   97   98 
##   27   23   15    9   12    5    7    5    3    2    1    2    2    1    1

La ventaja de esta forma es que los valores no deben ser consecutivos

data$gedad<-recode(data$edad,"c(20,30,40,50,60,70,80,90)=1;else=2")
table(data$gedad)
## 
##    1    2 
## 1490 9510
table(data$edad,data$gedad)
##     
##        1   2
##   18   0 277
##   19   0 255
##   20 253   0
##   21   0 226
##   22   0 241
##   23   0 240
##   24   0 252
##   25   0 226
##   26   0 185
##   27   0 213
##   28   0 241
##   29   0 237
##   30 285   0
##   31   0 154
##   32   0 228
##   33   0 212
##   34   0 191
##   35   0 204
##   36   0 244
##   37   0 216
##   38   0 248
##   39   0 218
##   40 335   0
##   41   0 135
##   42   0 299
##   43   0 207
##   44   0 178
##   45   0 280
##   46   0 165
##   47   0 185
##   48   0 274
##   49   0 200
##   50 274   0
##   51   0 147
##   52   0 228
##   53   0 192
##   54   0 184
##   55   0 151
##   56   0 200
##   57   0 167
##   58   0 177
##   59   0 139
##   60 174   0
##   61   0  95
##   62   0 126
##   63   0 119
##   64   0  85
##   65   0 135
##   66   0  85
##   67   0  87
##   68   0 110
##   69   0  53
##   70 117   0
##   71   0  45
##   72   0  69
##   73   0  53
##   74   0  61
##   75   0  56
##   76   0  64
##   77   0  36
##   78   0  39
##   79   0  36
##   80  47   0
##   81   0  15
##   82   0  14
##   83   0  27
##   84   0  23
##   85   0  15
##   86   0   9
##   87   0  12
##   88   0   5
##   89   0   7
##   90   5   0
##   91   0   3
##   92   0   2
##   93   0   1
##   94   0   2
##   96   0   2
##   97   0   1
##   98   0   1

Otra forma de recodificar es mediante el uso de rangos

data$gedad<-recode(data$edad,"18:19=1;20:24=2;25:hi=3")
table(data$gedad)
## 
##    1    2    3 
##  532 1212 9255
table(data$edad,data$gedad)
##     
##        1   2   3
##   18 277   0   0
##   19 255   0   0
##   20   0 253   0
##   21   0 226   0
##   22   0 241   0
##   23   0 240   0
##   24   0 252   0
##   25   0   0 226
##   26   0   0 185
##   27   0   0 213
##   28   0   0 241
##   29   0   0 237
##   30   0   0 285
##   31   0   0 154
##   32   0   0 228
##   33   0   0 212
##   34   0   0 191
##   35   0   0 204
##   36   0   0 244
##   37   0   0 216
##   38   0   0 248
##   39   0   0 218
##   40   0   0 335
##   41   0   0 135
##   42   0   0 299
##   43   0   0 207
##   44   0   0 178
##   45   0   0 280
##   46   0   0 165
##   47   0   0 185
##   48   0   0 274
##   49   0   0 200
##   50   0   0 274
##   51   0   0 147
##   52   0   0 228
##   53   0   0 192
##   54   0   0 184
##   55   0   0 151
##   56   0   0 200
##   57   0   0 167
##   58   0   0 177
##   59   0   0 139
##   60   0   0 174
##   61   0   0  95
##   62   0   0 126
##   63   0   0 119
##   64   0   0  85
##   65   0   0 135
##   66   0   0  85
##   67   0   0  87
##   68   0   0 110
##   69   0   0  53
##   70   0   0 117
##   71   0   0  45
##   72   0   0  69
##   73   0   0  53
##   74   0   0  61
##   75   0   0  56
##   76   0   0  64
##   77   0   0  36
##   78   0   0  39
##   79   0   0  36
##   80   0   0  47
##   81   0   0  15
##   82   0   0  14
##   83   0   0  27
##   84   0   0  23
##   85   0   0  15
##   86   0   0   9
##   87   0   0  12
##   88   0   0   5
##   89   0   0   7
##   90   0   0   5
##   91   0   0   3
##   92   0   0   2
##   93   0   0   1
##   94   0   0   2
##   96   0   0   2
##   97   0   0   1
##   98   0   0   1

Otra forma de hacer la recodificacion es usando “else”

data$gedad<-recode(data$edad,"18:19=1;20:24=2;25:29=3;
                   else=4")
table(data$gedad)
## 
##    1    2    3    4 
##  532 1212 1102 8154

Finalmente, creamos grupos quinquenales de edad

data$gedad<-recode(data$edad,"18:19=1;20:24=2;25:29=3;
                   30:34=4;35:39=5;40:44=6;45:49=7;
                   50:54=8;55:59=9;60:64=10;65:69=11;
                   70:74=12;75:hi=13")
table(data$gedad)
## 
##    1    2    3    4    5    6    7    8    9   10   11   12   13 
##  532 1212 1102 1070 1130 1154 1104 1025  834  599  470  345  422

vamos a asignar la variable s1 a una nueva variable

data$sexo<-data$s1
table(data$sexo,data$gedad)
##         
##            1   2   3   4   5   6   7   8   9  10  11  12  13
##   Hombre 230 544 458 436 460 514 491 444 391 288 219 166 217
##   Mujer  302 668 644 634 670 640 613 581 443 311 251 179 205

Primero vamos a crear una variable resumen de “libertades civiles”, Usaremos las variables p21_1-> Libertad para decir lo que uno piensa p21_2-> Libertad para votar por el partido deseado p21_3-> Libertad para practicar religion libremente

data$libertades<-2
data$libertades<-ifelse(data$p21_1=="Nunca" | data$p21_2=="Nunca" 
                        | data$p21_3=="Nunca",1,data$libertades)
data$libertades<-ifelse(data$p21_1=="Muchas veces" & data$p21_2=="Muchas veces"
                        & data$p21_3=="Muchas veces",3,data$libertades)
data$libertades<-ifelse(data$p21_1=="Muchas veces" & data$p21_2=="Muchas veces" 
                        & data$p21_3=="Siempre",3,data$libertades)
data$libertades<-ifelse(data$p21_2=="Muchas veces" & data$p21_1=="Muchas veces" 
                        & data$p21_3=="Siempre",3,data$libertades)
data$libertades<-ifelse(data$p21_3=="Muchas veces" & data$p21_1=="Muchas veces" 
                        & data$p21_3=="Siempre",3,data$libertades)
data$libertades<-ifelse(data$p21_1=="Muchas veces" & data$p21_2=="Siempre" &
                          data$p21_3=="Siempre",3,data$libertades)
data$libertades<-ifelse(data$p21_2=="Muchas veces" & data$p21_1=="Siempre" &
                          data$p21_3=="Siempre",3,data$libertades)
data$libertades<-ifelse(data$p21_3=="Muchas veces" & data$p21_1=="Siempre" &
                          data$p21_2=="Siempre",3,data$libertades)
data$libertades<-ifelse(data$p21_1=="Siempre" & data$p21_2=="Siempre" &
                          data$p21_3=="Siempre",4,data$libertades)
table(data$libertades)
## 
##    1    2    3    4 
##  555 4456 1693 4296
data$libertades<-ifelse(data$p21_1=="NS/NC",NA,data$libertades)
data$libertades<-ifelse(data$p21_2=="NS/NC",NA,data$libertades)
data$libertades<-ifelse(data$p21_3=="NS/NC",NA,data$libertades)
table(data$libertades)
## 
##    1    2    3    4 
##  548 4384 1693 4296
table(data$sexo,data$libertades)
##         
##             1    2    3    4
##   Hombre  258 1890  744 1928
##   Mujer   290 2494  949 2368
library(questionr)
wtd.table(data$libertades, weights=data$ponde)
##        1        2        3        4 
##  5268046 36045526 16943751 27846695
wtd.table(data$sexo,data$libertades, weights=data$ponde)
##               1        2        3        4
## Hombre  2727516 17465688  8115296 13465394
## Mujer   2540530 18579838  8828455 14381301

Podemos hacer algunos analisis preliminares…

prop.table(table(data$sexo,data$libertades),2)
##         
##                  1         2         3         4
##   Hombre 0.4708029 0.4311131 0.4394566 0.4487896
##   Mujer  0.5291971 0.5688869 0.5605434 0.5512104
t.test(data$libertades~data$sexo)
## 
##  Welch Two Sample t-test
## 
## data:  data$libertades by data$sexo
## t = 0.86456, df = 10288, p-value = 0.3873
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.02097154  0.05406876
## sample estimates:
## mean in group Hombre  mean in group Mujer 
##             2.900830             2.884281

Vamos a crear un indice

Para aprender mas de la creacion de indices recomiendo revisar:

Cecchini, S. (2005). Indicadores sociales en America Latina y El Caribe. CEPAL, Division de Estadistica y Proyecciones Economicas, Serie Estudios estadisticos y prospectivos No. 34 http://repositorio.cepal.org/bitstream/handle/11362/4735/1/S05707_es.pdf

Los indicadores deben ser: - Precisos: Midan el fenomeno que se desea medir - Mensurables: Replicables y con datos disponibles - Relevantes: Si realmente sirven

Ademas - Faciles de interpretar - Fiables - Oportunos y puntuales

Ademas, vamos a aprovechar para hacer algunos agregados…

data$libIde<-ifelse(data$p21_1=="Siempre" | data$p21_1=="Muchas veces",1,0)
data$libPol<-ifelse(data$p21_2=="Siempre" | data$p21_2=="Muchas veces",1,0)
data$libRel<-ifelse(data$p21_3=="Siempre" | data$p21_3=="Muchas veces",1,0)
data$discRel<-ifelse(data$p45_7=="No",1,0)
data$discPol<-ifelse(data$p45_6=="No",1,0)
data$total<-1

data$libIde2<-ifelse(data$p21_1=="Siempre" | data$p21_1=="Muchas veces",data$ponde,0)
data$libPol2<-ifelse(data$p21_2=="Siempre" | data$p21_2=="Muchas veces",data$ponde,0)
data$libRel2<-ifelse(data$p21_3=="Siempre" | data$p21_3=="Muchas veces",data$ponde,0)
data$discRel2<-ifelse(data$p45_7=="No",data$ponde,0)
data$discPol2<-ifelse(data$p45_6=="No",data$ponde,0)
data$total2<-data$ponde


summary(data$libIde)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  1.0000  0.6194  1.0000  1.0000
summary(data$libPol)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   1.000   1.000   0.816   1.000   1.000
summary(data$libRel)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   1.000   1.000   0.852   1.000   1.000
columns<-c("edo","muni","libIde","libPol","libRel","total","libIde2","libPol2",
           "libRel2","total2","discRel","discPol","discRel2","discPol2")
data2a<-data[columns]
data2<-aggregate(cbind(libIde,libPol,libRel,total,libIde2,libPol2,libRel2,
                       discRel,discPol,discRel2,discPol2,total2)~
                   edo+muni,data=data,sum)
data2$plib1<-data2$libIde2/data2$total2*100
data2$plib2<-data2$libPol2/data2$total2*100
data2$plib3<-data2$libRel2/data2$total2*100
data2$disc1<-data2$discRel2/data2$total2*100
data2$disc2<-data2$discPol2/data2$total2*100

summary(data2$plib1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   47.59   62.21   62.46   79.29  100.00
summary(data2$plib2)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   70.68   84.03   80.11   95.20  100.00
summary(data2$plib3)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   74.09   89.22   82.80  100.00  100.00
summary(data2$disc1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   27.52   43.22   44.88   61.69  100.00
summary(data2$disc2)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   27.93   44.94   46.84   65.04  100.00

Ahora usamos Analisis de Componentes Principales para crear alguna medida resumen

pca<-prcomp(~plib1+plib2+plib3+disc1+disc2,data=data2,scale=T,retx=T)
print(pca)
## Standard deviations:
## [1] 1.5784498 1.3045284 0.6985024 0.4294492 0.3665645
## 
## Rotation:
##              PC1        PC2         PC3         PC4        PC5
## plib1 -0.4908836  0.1597613 -0.85330243  0.06548943  0.0331017
## plib2 -0.5291587  0.3064807  0.30197613 -0.72269453 -0.1121772
## plib3 -0.5045560  0.3423987  0.41025080  0.66740133  0.1202335
## disc1 -0.3468340 -0.6080516  0.06882068  0.13322289 -0.6982083
## disc2 -0.3227286 -0.6273614  0.08742644 -0.10124665  0.6959667
plot(pca,type="l")

summary(pca)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5
## Standard deviation     1.5784 1.3045 0.69850 0.42945 0.36656
## Proportion of Variance 0.4983 0.3404 0.09758 0.03689 0.02687
## Cumulative Proportion  0.4983 0.8387 0.93624 0.97313 1.00000
pca$rotation
##              PC1        PC2         PC3         PC4        PC5
## plib1 -0.4908836  0.1597613 -0.85330243  0.06548943  0.0331017
## plib2 -0.5291587  0.3064807  0.30197613 -0.72269453 -0.1121772
## plib3 -0.5045560  0.3423987  0.41025080  0.66740133  0.1202335
## disc1 -0.3468340 -0.6080516  0.06882068  0.13322289 -0.6982083
## disc2 -0.3227286 -0.6273614  0.08742644 -0.10124665  0.6959667
data2$pc1<-pca$x[,1]
data2$pc2<-pca$x[,2]

data2$cdmx<-ifelse(data2$edo==9,1,0)

library(psych)
## 
## Attaching package: 'psych'
## The following object is masked from 'package:questionr':
## 
##     describe
## The following object is masked from 'package:car':
## 
##     logit
describeBy(data2$plib1,group=data2$cdmx)
## 
##  Descriptive statistics by group 
## group: 0
##    vars   n mean    sd median trimmed   mad min max range  skew kurtosis
## X1    1 334 62.7 22.97  62.59   63.71 23.23   0 100   100 -0.36    -0.29
##      se
## X1 1.26
## -------------------------------------------------------- 
## group: 1
##    vars  n  mean   sd median trimmed   mad   min   max range skew kurtosis
## X1    1 14 56.89 9.65  57.63   56.66 13.52 44.65 71.83 27.18  0.2    -1.52
##      se
## X1 2.58
describeBy(data2$plib2,group=data2$cdmx)
## 
##  Descriptive statistics by group 
## group: 0
##    vars   n  mean    sd median trimmed   mad min max range skew kurtosis
## X1    1 334 80.09 19.88  84.39   83.13 19.33   0 100   100 -1.3     1.66
##      se
## X1 1.09
## -------------------------------------------------------- 
## group: 1
##    vars  n  mean   sd median trimmed  mad   min   max range  skew kurtosis
## X1    1 14 80.49 8.59  82.17   80.41 8.41 67.89 93.99  26.1 -0.12    -1.46
##     se
## X1 2.3
describeBy(data2$plib3,group=data2$cdmx)
## 
##  Descriptive statistics by group 
## group: 0
##    vars   n  mean    sd median trimmed   mad min max range  skew kurtosis
## X1    1 334 82.52 19.71  89.03   85.87 16.27   0 100   100 -1.58     2.72
##      se
## X1 1.08
## -------------------------------------------------------- 
## group: 1
##    vars  n  mean   sd median trimmed mad   min   max range  skew kurtosis
## X1    1 14 89.61 5.37  91.55   89.97 3.6 78.96 95.97 17.01 -0.78    -0.73
##      se
## X1 1.44
describeBy(data2$disc1,group=data2$cdmx)
## 
##  Descriptive statistics by group 
## group: 0
##    vars   n  mean    sd median trimmed   mad min max range skew kurtosis
## X1    1 334 44.85 25.33  43.22   44.07 26.08   0 100   100 0.27    -0.64
##      se
## X1 1.39
## -------------------------------------------------------- 
## group: 1
##    vars  n mean    sd median trimmed  mad   min   max range skew kurtosis
## X1    1 14 45.6 15.15  40.86   43.49 8.99 30.11 86.41  56.3 1.31     1.04
##      se
## X1 4.05
describeBy(data2$disc2,group=data2$cdmx)
## 
##  Descriptive statistics by group 
## group: 0
##    vars   n  mean    sd median trimmed   mad min max range skew kurtosis
## X1    1 334 47.08 26.19  45.04   46.07 29.39   0 100   100  0.3    -0.76
##      se
## X1 1.43
## -------------------------------------------------------- 
## group: 1
##    vars  n  mean    sd median trimmed   mad   min max range skew kurtosis
## X1    1 14 41.14 11.16  41.18   40.56 11.98 26.28  63 36.72 0.28    -1.21
##      se
## X1 2.98
describeBy(data2$pc1,group=data2$cdmx)
## 
##  Descriptive statistics by group 
## group: 0
##    vars   n mean   sd median trimmed  mad   min  max range skew kurtosis
## X1    1 334    0 1.61  -0.18   -0.12 1.57 -3.23 6.52  9.75 0.79     0.92
##      se
## X1 0.09
## -------------------------------------------------------- 
## group: 1
##    vars  n  mean   sd median trimmed  mad   min  max range skew kurtosis
## X1    1 14 -0.01 0.54  -0.09   -0.04 0.39 -0.81 1.17  1.98 0.82    -0.22
##      se
## X1 0.14
describeBy(data2$pc2,group=data2$cdmx)
## 
##  Descriptive statistics by group 
## group: 0
##    vars   n  mean   sd median trimmed  mad   min  max range  skew kurtosis
## X1    1 334 -0.01 1.32   0.15    0.06 1.36 -5.06 3.06  8.11 -0.52     0.43
##      se
## X1 0.07
## -------------------------------------------------------- 
## group: 1
##    vars  n mean   sd median trimmed  mad   min max range  skew kurtosis
## X1    1 14 0.21 0.67   0.39     0.3 0.52 -1.54 0.9  2.43 -1.22     0.77
##      se
## X1 0.18

Finalmente, y si queremos ver si hay diferencias significativas?

t.test(data2$plib1~data2$cdmx)
## 
##  Welch Two Sample t-test
## 
## data:  data2$plib1 by data2$cdmx
## t = 2.0233, df = 19.855, p-value = 0.05672
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.1827518 11.7962394
## sample estimates:
## mean in group 0 mean in group 1 
##        62.69557        56.88883
t.test(data2$plib2~data2$cdmx)
## 
##  Welch Two Sample t-test
## 
## data:  data2$plib2 by data2$cdmx
## t = -0.15613, df = 19.455, p-value = 0.8775
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -5.706591  4.913150
## sample estimates:
## mean in group 0 mean in group 1 
##        80.09040        80.48712
t.test(data2$plib2~data2$cdmx)
## 
##  Welch Two Sample t-test
## 
## data:  data2$plib2 by data2$cdmx
## t = -0.15613, df = 19.455, p-value = 0.8775
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -5.706591  4.913150
## sample estimates:
## mean in group 0 mean in group 1 
##        80.09040        80.48712
t.test(data2$disc1~data2$cdmx)
## 
##  Welch Two Sample t-test
## 
## data:  data2$disc1 by data2$cdmx
## t = -0.17471, df = 16.217, p-value = 0.8635
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -9.810491  8.315057
## sample estimates:
## mean in group 0 mean in group 1 
##        44.85454        45.60226
t.test(data2$disc2~data2$cdmx)
## 
##  Welch Two Sample t-test
## 
## data:  data2$disc2 by data2$cdmx
## t = 1.7943, df = 19.658, p-value = 0.08816
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.9728268 12.8471294
## sample estimates:
## mean in group 0 mean in group 1 
##        47.08002        41.14286
t.test(data2$pc1~data2$cdmx)
## 
##  Welch Two Sample t-test
## 
## data:  data2$pc1 by data2$cdmx
## t = 0.030787, df = 24.251, p-value = 0.9757
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.3441538  0.3545825
## sample estimates:
## mean in group 0 mean in group 1 
##    0.0002097722   -0.0050045656
t.test(data2$pc2~data2$cdmx)
## 
##  Welch Two Sample t-test
## 
## data:  data2$pc2 by data2$cdmx
## t = -1.1276, df = 17.649, p-value = 0.2746
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.6210288  0.1876157
## sample estimates:
## mean in group 0 mean in group 1 
##    -0.008718078     0.207988440