Vamos a ver algunos detalles de procesamiento de nuestra base de datos
setwd("/Users/mau/Dropbox/Cursos mau/Maestria CEI")
data<-read.csv("BaseIP.csv",header=TRUE)
??que hacemos si no estamos seguros de que tipo de variable es nuestra variable?
str(data$s2)
## Factor w/ 81 levels "18","19","20",..: 4 7 6 4 7 7 10 1 11 10 ...
str(data$p1)
## Factor w/ 6 levels "Algo","Mucho",..: 1 5 1 1 2 6 1 1 1 5 ...
Primero vamos a tratar de convertir s2 a numerica sin perder la informacion original Para eso vamos a crear una nueva variable “duplicando” s2
data$edad<-data$s2
str(data$edad)
## Factor w/ 81 levels "18","19","20",..: 4 7 6 4 7 7 10 1 11 10 ...
table(data$edad)
##
## 18 19 20 21 22 23 24 25 26 27 28 29
## 277 255 253 226 241 240 252 226 185 213 241 237
## 30 31 32 33 34 35 36 37 38 39 40 41
## 285 154 228 212 191 204 244 216 248 218 335 135
## 42 43 44 45 46 47 48 49 50 51 52 53
## 299 207 178 280 165 185 274 200 274 147 228 192
## 54 55 56 57 58 59 60 61 62 63 64 65
## 184 151 200 167 177 139 174 95 126 119 85 135
## 66 67 68 69 70 71 72 73 74 75 76 77
## 85 87 110 53 117 45 69 53 61 56 64 36
## 78 79 80 81 82 83 84 85 86 87 88 89
## 39 36 47 15 14 27 23 15 9 12 5 7
## 90 91 92 93 94 96 97 98 NS/NC
## 5 3 2 1 2 2 1 1 1
Vamos a recodificar la variable en grupos de edad
Tenemos que indicar a R que las etiquetas realmente representan valores numericos para eso tenemos que usar otra funcion
data$edad<-as.numeric(as.character(data$s2))
## Warning: NAs introducidos por coerci'on
str(data$edad)
## num [1:11000] 21 24 23 21 24 24 27 18 28 27 ...
table(data$edad)
##
## 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
## 277 255 253 226 241 240 252 226 185 213 241 237 285 154 228 212 191 204
## 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
## 244 216 248 218 335 135 299 207 178 280 165 185 274 200 274 147 228 192
## 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
## 184 151 200 167 177 139 174 95 126 119 85 135 85 87 110 53 117 45
## 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
## 69 53 61 56 64 36 39 36 47 15 14 27 23 15 9 12 5 7
## 90 91 92 93 94 96 97 98
## 5 3 2 1 2 2 1 1
Asi pudimos mantener los valores originales
Ahora si vamos a recodificarla
Podemos recodificar un solo valor
library(car)
data$gedad<-recode(data$edad,"18=19")
table(data$gedad)
##
## 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## 532 253 226 241 240 252 226 185 213 241 237 285 154 228 212 191 204 244
## 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
## 216 248 218 335 135 299 207 178 280 165 185 274 200 274 147 228 192 184
## 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## 151 200 167 177 139 174 95 126 119 85 135 85 87 110 53 117 45 69
## 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## 53 61 56 64 36 39 36 47 15 14 27 23 15 9 12 5 7 5
## 91 92 93 94 96 97 98
## 3 2 1 2 2 1 1
277+255
## [1] 532
Tambien podemos recodificar un conjunto de valores…
data$gedad<-recode(data$edad,"c(18,19)=1;c(20,21,22,23,24)=2")
table(data$gedad)
##
## 1 2 25 26 27 28 29 30 31 32 33 34 35 36 37
## 532 1212 226 185 213 241 237 285 154 228 212 191 204 244 216
## 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
## 248 218 335 135 299 207 178 280 165 185 274 200 274 147 228
## 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
## 192 184 151 200 167 177 139 174 95 126 119 85 135 85 87
## 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
## 110 53 117 45 69 53 61 56 64 36 39 36 47 15 14
## 83 84 85 86 87 88 89 90 91 92 93 94 96 97 98
## 27 23 15 9 12 5 7 5 3 2 1 2 2 1 1
La ventaja de esta forma es que los valores no deben ser consecutivos
data$gedad<-recode(data$edad,"c(20,30,40,50,60,70,80,90)=1;else=2")
table(data$gedad)
##
## 1 2
## 1490 9510
table(data$edad,data$gedad)
##
## 1 2
## 18 0 277
## 19 0 255
## 20 253 0
## 21 0 226
## 22 0 241
## 23 0 240
## 24 0 252
## 25 0 226
## 26 0 185
## 27 0 213
## 28 0 241
## 29 0 237
## 30 285 0
## 31 0 154
## 32 0 228
## 33 0 212
## 34 0 191
## 35 0 204
## 36 0 244
## 37 0 216
## 38 0 248
## 39 0 218
## 40 335 0
## 41 0 135
## 42 0 299
## 43 0 207
## 44 0 178
## 45 0 280
## 46 0 165
## 47 0 185
## 48 0 274
## 49 0 200
## 50 274 0
## 51 0 147
## 52 0 228
## 53 0 192
## 54 0 184
## 55 0 151
## 56 0 200
## 57 0 167
## 58 0 177
## 59 0 139
## 60 174 0
## 61 0 95
## 62 0 126
## 63 0 119
## 64 0 85
## 65 0 135
## 66 0 85
## 67 0 87
## 68 0 110
## 69 0 53
## 70 117 0
## 71 0 45
## 72 0 69
## 73 0 53
## 74 0 61
## 75 0 56
## 76 0 64
## 77 0 36
## 78 0 39
## 79 0 36
## 80 47 0
## 81 0 15
## 82 0 14
## 83 0 27
## 84 0 23
## 85 0 15
## 86 0 9
## 87 0 12
## 88 0 5
## 89 0 7
## 90 5 0
## 91 0 3
## 92 0 2
## 93 0 1
## 94 0 2
## 96 0 2
## 97 0 1
## 98 0 1
Otra forma de recodificar es mediante el uso de rangos
data$gedad<-recode(data$edad,"18:19=1;20:24=2;25:hi=3")
table(data$gedad)
##
## 1 2 3
## 532 1212 9255
table(data$edad,data$gedad)
##
## 1 2 3
## 18 277 0 0
## 19 255 0 0
## 20 0 253 0
## 21 0 226 0
## 22 0 241 0
## 23 0 240 0
## 24 0 252 0
## 25 0 0 226
## 26 0 0 185
## 27 0 0 213
## 28 0 0 241
## 29 0 0 237
## 30 0 0 285
## 31 0 0 154
## 32 0 0 228
## 33 0 0 212
## 34 0 0 191
## 35 0 0 204
## 36 0 0 244
## 37 0 0 216
## 38 0 0 248
## 39 0 0 218
## 40 0 0 335
## 41 0 0 135
## 42 0 0 299
## 43 0 0 207
## 44 0 0 178
## 45 0 0 280
## 46 0 0 165
## 47 0 0 185
## 48 0 0 274
## 49 0 0 200
## 50 0 0 274
## 51 0 0 147
## 52 0 0 228
## 53 0 0 192
## 54 0 0 184
## 55 0 0 151
## 56 0 0 200
## 57 0 0 167
## 58 0 0 177
## 59 0 0 139
## 60 0 0 174
## 61 0 0 95
## 62 0 0 126
## 63 0 0 119
## 64 0 0 85
## 65 0 0 135
## 66 0 0 85
## 67 0 0 87
## 68 0 0 110
## 69 0 0 53
## 70 0 0 117
## 71 0 0 45
## 72 0 0 69
## 73 0 0 53
## 74 0 0 61
## 75 0 0 56
## 76 0 0 64
## 77 0 0 36
## 78 0 0 39
## 79 0 0 36
## 80 0 0 47
## 81 0 0 15
## 82 0 0 14
## 83 0 0 27
## 84 0 0 23
## 85 0 0 15
## 86 0 0 9
## 87 0 0 12
## 88 0 0 5
## 89 0 0 7
## 90 0 0 5
## 91 0 0 3
## 92 0 0 2
## 93 0 0 1
## 94 0 0 2
## 96 0 0 2
## 97 0 0 1
## 98 0 0 1
Otra forma de hacer la recodificacion es usando “else”
data$gedad<-recode(data$edad,"18:19=1;20:24=2;25:29=3;
else=4")
table(data$gedad)
##
## 1 2 3 4
## 532 1212 1102 8154
Finalmente, creamos grupos quinquenales de edad
data$gedad<-recode(data$edad,"18:19=1;20:24=2;25:29=3;
30:34=4;35:39=5;40:44=6;45:49=7;
50:54=8;55:59=9;60:64=10;65:69=11;
70:74=12;75:hi=13")
table(data$gedad)
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13
## 532 1212 1102 1070 1130 1154 1104 1025 834 599 470 345 422
vamos a asignar la variable s1 a una nueva variable
data$sexo<-data$s1
table(data$sexo,data$gedad)
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13
## Hombre 230 544 458 436 460 514 491 444 391 288 219 166 217
## Mujer 302 668 644 634 670 640 613 581 443 311 251 179 205
Primero vamos a crear una variable resumen de “libertades civiles”, Usaremos las variables p21_1-> Libertad para decir lo que uno piensa p21_2-> Libertad para votar por el partido deseado p21_3-> Libertad para practicar religion libremente
data$libertades<-2
data$libertades<-ifelse(data$p21_1=="Nunca" | data$p21_2=="Nunca"
| data$p21_3=="Nunca",1,data$libertades)
data$libertades<-ifelse(data$p21_1=="Muchas veces" & data$p21_2=="Muchas veces"
& data$p21_3=="Muchas veces",3,data$libertades)
data$libertades<-ifelse(data$p21_1=="Muchas veces" & data$p21_2=="Muchas veces"
& data$p21_3=="Siempre",3,data$libertades)
data$libertades<-ifelse(data$p21_2=="Muchas veces" & data$p21_1=="Muchas veces"
& data$p21_3=="Siempre",3,data$libertades)
data$libertades<-ifelse(data$p21_3=="Muchas veces" & data$p21_1=="Muchas veces"
& data$p21_3=="Siempre",3,data$libertades)
data$libertades<-ifelse(data$p21_1=="Muchas veces" & data$p21_2=="Siempre" &
data$p21_3=="Siempre",3,data$libertades)
data$libertades<-ifelse(data$p21_2=="Muchas veces" & data$p21_1=="Siempre" &
data$p21_3=="Siempre",3,data$libertades)
data$libertades<-ifelse(data$p21_3=="Muchas veces" & data$p21_1=="Siempre" &
data$p21_2=="Siempre",3,data$libertades)
data$libertades<-ifelse(data$p21_1=="Siempre" & data$p21_2=="Siempre" &
data$p21_3=="Siempre",4,data$libertades)
table(data$libertades)
##
## 1 2 3 4
## 555 4456 1693 4296
data$libertades<-ifelse(data$p21_1=="NS/NC",NA,data$libertades)
data$libertades<-ifelse(data$p21_2=="NS/NC",NA,data$libertades)
data$libertades<-ifelse(data$p21_3=="NS/NC",NA,data$libertades)
table(data$libertades)
##
## 1 2 3 4
## 548 4384 1693 4296
table(data$sexo,data$libertades)
##
## 1 2 3 4
## Hombre 258 1890 744 1928
## Mujer 290 2494 949 2368
library(questionr)
wtd.table(data$libertades, weights=data$ponde)
## 1 2 3 4
## 5268046 36045526 16943751 27846695
wtd.table(data$sexo,data$libertades, weights=data$ponde)
## 1 2 3 4
## Hombre 2727516 17465688 8115296 13465394
## Mujer 2540530 18579838 8828455 14381301
Podemos hacer algunos analisis preliminares…
prop.table(table(data$sexo,data$libertades),2)
##
## 1 2 3 4
## Hombre 0.4708029 0.4311131 0.4394566 0.4487896
## Mujer 0.5291971 0.5688869 0.5605434 0.5512104
t.test(data$libertades~data$sexo)
##
## Welch Two Sample t-test
##
## data: data$libertades by data$sexo
## t = 0.86456, df = 10288, p-value = 0.3873
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.02097154 0.05406876
## sample estimates:
## mean in group Hombre mean in group Mujer
## 2.900830 2.884281
Vamos a crear un indice
Para aprender mas de la creacion de indices recomiendo revisar:
Cecchini, S. (2005). Indicadores sociales en America Latina y El Caribe. CEPAL, Division de Estadistica y Proyecciones Economicas, Serie Estudios estadisticos y prospectivos No. 34 http://repositorio.cepal.org/bitstream/handle/11362/4735/1/S05707_es.pdf
Los indicadores deben ser: - Precisos: Midan el fenomeno que se desea medir - Mensurables: Replicables y con datos disponibles - Relevantes: Si realmente sirven
Ademas - Faciles de interpretar - Fiables - Oportunos y puntuales
Ademas, vamos a aprovechar para hacer algunos agregados…
data$libIde<-ifelse(data$p21_1=="Siempre" | data$p21_1=="Muchas veces",1,0)
data$libPol<-ifelse(data$p21_2=="Siempre" | data$p21_2=="Muchas veces",1,0)
data$libRel<-ifelse(data$p21_3=="Siempre" | data$p21_3=="Muchas veces",1,0)
data$discRel<-ifelse(data$p45_7=="No",1,0)
data$discPol<-ifelse(data$p45_6=="No",1,0)
data$total<-1
data$libIde2<-ifelse(data$p21_1=="Siempre" | data$p21_1=="Muchas veces",data$ponde,0)
data$libPol2<-ifelse(data$p21_2=="Siempre" | data$p21_2=="Muchas veces",data$ponde,0)
data$libRel2<-ifelse(data$p21_3=="Siempre" | data$p21_3=="Muchas veces",data$ponde,0)
data$discRel2<-ifelse(data$p45_7=="No",data$ponde,0)
data$discPol2<-ifelse(data$p45_6=="No",data$ponde,0)
data$total2<-data$ponde
summary(data$libIde)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 1.0000 0.6194 1.0000 1.0000
summary(data$libPol)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 1.000 1.000 0.816 1.000 1.000
summary(data$libRel)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 1.000 1.000 0.852 1.000 1.000
columns<-c("edo","muni","libIde","libPol","libRel","total","libIde2","libPol2",
"libRel2","total2","discRel","discPol","discRel2","discPol2")
data2a<-data[columns]
data2<-aggregate(cbind(libIde,libPol,libRel,total,libIde2,libPol2,libRel2,
discRel,discPol,discRel2,discPol2,total2)~
edo+muni,data=data,sum)
data2$plib1<-data2$libIde2/data2$total2*100
data2$plib2<-data2$libPol2/data2$total2*100
data2$plib3<-data2$libRel2/data2$total2*100
data2$disc1<-data2$discRel2/data2$total2*100
data2$disc2<-data2$discPol2/data2$total2*100
summary(data2$plib1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 47.59 62.21 62.46 79.29 100.00
summary(data2$plib2)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 70.68 84.03 80.11 95.20 100.00
summary(data2$plib3)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 74.09 89.22 82.80 100.00 100.00
summary(data2$disc1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 27.52 43.22 44.88 61.69 100.00
summary(data2$disc2)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 27.93 44.94 46.84 65.04 100.00
Ahora usamos Analisis de Componentes Principales para crear alguna medida resumen
pca<-prcomp(~plib1+plib2+plib3+disc1+disc2,data=data2,scale=T,retx=T)
print(pca)
## Standard deviations:
## [1] 1.5784498 1.3045284 0.6985024 0.4294492 0.3665645
##
## Rotation:
## PC1 PC2 PC3 PC4 PC5
## plib1 -0.4908836 0.1597613 -0.85330243 0.06548943 0.0331017
## plib2 -0.5291587 0.3064807 0.30197613 -0.72269453 -0.1121772
## plib3 -0.5045560 0.3423987 0.41025080 0.66740133 0.1202335
## disc1 -0.3468340 -0.6080516 0.06882068 0.13322289 -0.6982083
## disc2 -0.3227286 -0.6273614 0.08742644 -0.10124665 0.6959667
plot(pca,type="l")
summary(pca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5
## Standard deviation 1.5784 1.3045 0.69850 0.42945 0.36656
## Proportion of Variance 0.4983 0.3404 0.09758 0.03689 0.02687
## Cumulative Proportion 0.4983 0.8387 0.93624 0.97313 1.00000
pca$rotation
## PC1 PC2 PC3 PC4 PC5
## plib1 -0.4908836 0.1597613 -0.85330243 0.06548943 0.0331017
## plib2 -0.5291587 0.3064807 0.30197613 -0.72269453 -0.1121772
## plib3 -0.5045560 0.3423987 0.41025080 0.66740133 0.1202335
## disc1 -0.3468340 -0.6080516 0.06882068 0.13322289 -0.6982083
## disc2 -0.3227286 -0.6273614 0.08742644 -0.10124665 0.6959667
data2$pc1<-pca$x[,1]
data2$pc2<-pca$x[,2]
data2$cdmx<-ifelse(data2$edo==9,1,0)
library(psych)
##
## Attaching package: 'psych'
## The following object is masked from 'package:questionr':
##
## describe
## The following object is masked from 'package:car':
##
## logit
describeBy(data2$plib1,group=data2$cdmx)
##
## Descriptive statistics by group
## group: 0
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 334 62.7 22.97 62.59 63.71 23.23 0 100 100 -0.36 -0.29
## se
## X1 1.26
## --------------------------------------------------------
## group: 1
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 14 56.89 9.65 57.63 56.66 13.52 44.65 71.83 27.18 0.2 -1.52
## se
## X1 2.58
describeBy(data2$plib2,group=data2$cdmx)
##
## Descriptive statistics by group
## group: 0
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 334 80.09 19.88 84.39 83.13 19.33 0 100 100 -1.3 1.66
## se
## X1 1.09
## --------------------------------------------------------
## group: 1
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 14 80.49 8.59 82.17 80.41 8.41 67.89 93.99 26.1 -0.12 -1.46
## se
## X1 2.3
describeBy(data2$plib3,group=data2$cdmx)
##
## Descriptive statistics by group
## group: 0
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 334 82.52 19.71 89.03 85.87 16.27 0 100 100 -1.58 2.72
## se
## X1 1.08
## --------------------------------------------------------
## group: 1
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 14 89.61 5.37 91.55 89.97 3.6 78.96 95.97 17.01 -0.78 -0.73
## se
## X1 1.44
describeBy(data2$disc1,group=data2$cdmx)
##
## Descriptive statistics by group
## group: 0
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 334 44.85 25.33 43.22 44.07 26.08 0 100 100 0.27 -0.64
## se
## X1 1.39
## --------------------------------------------------------
## group: 1
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 14 45.6 15.15 40.86 43.49 8.99 30.11 86.41 56.3 1.31 1.04
## se
## X1 4.05
describeBy(data2$disc2,group=data2$cdmx)
##
## Descriptive statistics by group
## group: 0
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 334 47.08 26.19 45.04 46.07 29.39 0 100 100 0.3 -0.76
## se
## X1 1.43
## --------------------------------------------------------
## group: 1
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 14 41.14 11.16 41.18 40.56 11.98 26.28 63 36.72 0.28 -1.21
## se
## X1 2.98
describeBy(data2$pc1,group=data2$cdmx)
##
## Descriptive statistics by group
## group: 0
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 334 0 1.61 -0.18 -0.12 1.57 -3.23 6.52 9.75 0.79 0.92
## se
## X1 0.09
## --------------------------------------------------------
## group: 1
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 14 -0.01 0.54 -0.09 -0.04 0.39 -0.81 1.17 1.98 0.82 -0.22
## se
## X1 0.14
describeBy(data2$pc2,group=data2$cdmx)
##
## Descriptive statistics by group
## group: 0
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 334 -0.01 1.32 0.15 0.06 1.36 -5.06 3.06 8.11 -0.52 0.43
## se
## X1 0.07
## --------------------------------------------------------
## group: 1
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 14 0.21 0.67 0.39 0.3 0.52 -1.54 0.9 2.43 -1.22 0.77
## se
## X1 0.18
Finalmente, y si queremos ver si hay diferencias significativas?
t.test(data2$plib1~data2$cdmx)
##
## Welch Two Sample t-test
##
## data: data2$plib1 by data2$cdmx
## t = 2.0233, df = 19.855, p-value = 0.05672
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1827518 11.7962394
## sample estimates:
## mean in group 0 mean in group 1
## 62.69557 56.88883
t.test(data2$plib2~data2$cdmx)
##
## Welch Two Sample t-test
##
## data: data2$plib2 by data2$cdmx
## t = -0.15613, df = 19.455, p-value = 0.8775
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -5.706591 4.913150
## sample estimates:
## mean in group 0 mean in group 1
## 80.09040 80.48712
t.test(data2$plib2~data2$cdmx)
##
## Welch Two Sample t-test
##
## data: data2$plib2 by data2$cdmx
## t = -0.15613, df = 19.455, p-value = 0.8775
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -5.706591 4.913150
## sample estimates:
## mean in group 0 mean in group 1
## 80.09040 80.48712
t.test(data2$disc1~data2$cdmx)
##
## Welch Two Sample t-test
##
## data: data2$disc1 by data2$cdmx
## t = -0.17471, df = 16.217, p-value = 0.8635
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -9.810491 8.315057
## sample estimates:
## mean in group 0 mean in group 1
## 44.85454 45.60226
t.test(data2$disc2~data2$cdmx)
##
## Welch Two Sample t-test
##
## data: data2$disc2 by data2$cdmx
## t = 1.7943, df = 19.658, p-value = 0.08816
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.9728268 12.8471294
## sample estimates:
## mean in group 0 mean in group 1
## 47.08002 41.14286
t.test(data2$pc1~data2$cdmx)
##
## Welch Two Sample t-test
##
## data: data2$pc1 by data2$cdmx
## t = 0.030787, df = 24.251, p-value = 0.9757
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.3441538 0.3545825
## sample estimates:
## mean in group 0 mean in group 1
## 0.0002097722 -0.0050045656
t.test(data2$pc2~data2$cdmx)
##
## Welch Two Sample t-test
##
## data: data2$pc2 by data2$cdmx
## t = -1.1276, df = 17.649, p-value = 0.2746
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.6210288 0.1876157
## sample estimates:
## mean in group 0 mean in group 1
## -0.008718078 0.207988440