Prueba de hipotesis variables cuantitativas

Author

AYGG

Bases y datos

library("MASS")
data("Pima.tr")
library("RColorBrewer")
library(gplots)


Attaching package: 'gplots'

The following object is masked from 'package:stats':

    lowess

ejericicio 13.1

Grafico.student <- function(x, y){
print(y)  
par(mfrow = c(1,3))  
boxplot(x, main = "",  ylab = "", xlab = "Grupos experimentales")
vioplot::vioplot(x, main= "Grafico de violin", ylab="", xlab="")
plotmeans(x, ylab= "", xlab = "Grupos experimentales")
}
Grafico.student(Pima.tr$glu~Pima.tr$type, Pima.tr$glu~Pima.tr$type)

Pima.tr$glu ~ Pima.tr$type

bartlett.test(Pima.tr$glu~Pima.tr$type)


    Bartlett test of homogeneity of variances

data:  Pima.tr$glu by Pima.tr$type
Bartlett's K-squared = 1.3638, df = 1, p-value = 0.2429

t.test(Pima.tr$glu~Pima.tr$type, var.equal=T)


    Two Sample t-test

data:  Pima.tr$glu by Pima.tr$type
t = -7.682, df = 198, p-value = 7.075e-13
alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
95 percent confidence interval:
 -40.15520 -23.75033
sample estimates:
 mean in group No mean in group Yes 
         113.1061          145.0588

#Variable bp y type
bartlett.test(Pima.tr$bp~Pima.tr$type) #  varianzas son iguales


    Bartlett test of homogeneity of variances

data:  Pima.tr$bp by Pima.tr$type
Bartlett's K-squared = 0.17341, df = 1, p-value = 0.6771

t.test(Pima.tr$bp~Pima.tr$type, var.equal=T)


    Two Sample t-test

data:  Pima.tr$bp by Pima.tr$type
t = -3.0015, df = 198, p-value = 0.003032
alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
95 percent confidence interval:
 -8.355947 -1.729615
sample estimates:
 mean in group No mean in group Yes 
         69.54545          74.58824

Grafico.student(Pima.tr$bp~Pima.tr$type, Pima.tr$bp~Pima.tr$type)

Pima.tr$bp ~ Pima.tr$type

#variables bmi y type
bartlett.test(Pima.tr$bmi~Pima.tr$type) #  varianzas son iguales


    Bartlett test of homogeneity of variances

data:  Pima.tr$bmi by Pima.tr$type
Bartlett's K-squared = 6.5537, df = 1, p-value = 0.01047

t.test(Pima.tr$bmi~Pima.tr$type, var.equal=F)


    Welch Two Sample t-test

data:  Pima.tr$bmi by Pima.tr$type
t = -4.512, df = 171.46, p-value = 1.188e-05
alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
95 percent confidence interval:
 -5.224615 -2.044547
sample estimates:
 mean in group No mean in group Yes 
         31.07424          34.70882

Grafico.student(Pima.tr$bmi~Pima.tr$type, Pima.tr$bmi~Pima.tr$type)

Pima.tr$bmi ~ Pima.tr$type

#variables skin y type
bartlett.test(Pima.tr$skin~Pima.tr$type) #  varianzas son iguales


    Bartlett test of homogeneity of variances

data:  Pima.tr$skin by Pima.tr$type
Bartlett's K-squared = 1.2628, df = 1, p-value = 0.2611

t.test(Pima.tr$skin~Pima.tr$type, var.equal=T)


    Two Sample t-test

data:  Pima.tr$skin by Pima.tr$type
t = -3.4712, df = 198, p-value = 0.0006361
alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
95 percent confidence interval:
 -9.272397 -2.553806
sample estimates:
 mean in group No mean in group Yes 
         27.20455          33.11765

Grafico.student(Pima.tr$skin~Pima.tr$type, Pima.tr$skin~Pima.tr$type)

Pima.tr$skin ~ Pima.tr$type

#variables ped y type
bartlett.test(Pima.tr$ped~Pima.tr$type) #  varianzas son iguales


    Bartlett test of homogeneity of variances

data:  Pima.tr$ped by Pima.tr$type
Bartlett's K-squared = 8.0901, df = 1, p-value = 0.004451

t.test(Pima.tr$ped~Pima.tr$type, var.equal=T)


    Two Sample t-test

data:  Pima.tr$ped by Pima.tr$type
t = -2.9601, df = 198, p-value = 0.003451
alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
95 percent confidence interval:
 -0.22189897 -0.04445487
sample estimates:
 mean in group No mean in group Yes 
        0.4154848         0.5486618

Grafico.student(Pima.tr$ped~Pima.tr$type, Pima.tr$ped~Pima.tr$type)

Pima.tr$ped ~ Pima.tr$type

#variables age y type
bartlett.test(Pima.tr$age~Pima.tr$type) #  varianzas son iguales


    Bartlett test of homogeneity of variances

data:  Pima.tr$age by Pima.tr$type
Bartlett's K-squared = 3.1155, df = 1, p-value = 0.07755

t.test(Pima.tr$age~Pima.tr$type, var.equal=F)


    Welch Two Sample t-test

data:  Pima.tr$age by Pima.tr$type
t = -5.2162, df = 115.7, p-value = 8.106e-07
alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
95 percent confidence interval:
 -11.667372  -5.245284
sample estimates:
 mean in group No mean in group Yes 
         29.23485          37.69118

Grafico.student(Pima.tr$age~Pima.tr$type, Pima.tr$age~Pima.tr$type)

Pima.tr$age ~ Pima.tr$type

#Conclusiones: En la comparación entre la condicion experimental y los niveles de glucosa se encontraron diferencias estadisticamente significativas, se observó que los niveles de glucosa son más altos en el grupo que presenta diabetes. Se encontró el mismo efecto en la comparación entre la condición experimetal y la presión arterial, donde se encontró una relación en la presión arterial y la diabetes. Efectos similares se encontraron entre el pedigree y la diabetes.

Resolución de ejercicios 13.2

Pima.tr$edad1.1 <- cut(Pima.tr$age, 
                            breaks = c(0, 40, 90),
                            labels = c("0-40", "40-90"),
                            right = F, na.rm= TRUE) 

Pima.tr$edad1.1 <- factor(Pima.tr$edad1.1)
#Niveles de glucosa y relación con edad (menor a 40 y mayor a 40)

bartlett.test(Pima.tr$glu~Pima.tr$edad1.1) #  varianzas son iguales


    Bartlett test of homogeneity of variances

data:  Pima.tr$glu by Pima.tr$edad1.1
Bartlett's K-squared = 2.8805, df = 1, p-value = 0.08966

t.test(Pima.tr$glu~Pima.tr$edad1.1, var.equal=F)


    Welch Two Sample t-test

data:  Pima.tr$glu by Pima.tr$edad1.1
t = -3.1504, df = 72.526, p-value = 0.00237
alternative hypothesis: true difference in means between group 0-40 and group 40-90 is not equal to 0
95 percent confidence interval:
 -28.495972  -6.410695
sample estimates:
 mean in group 0-40 mean in group 40-90 
           119.6067            137.0600

Grafico.student(Pima.tr$glu~Pima.tr$edad1.1, Pima.tr$glu~Pima.tr$edad1.1)

Pima.tr$glu ~ Pima.tr$edad1.1

#compración presion arterial y rango de edad

Pima.tr$edad1.1 <- cut(Pima.tr$age, 
                            breaks = c(0, 40, 90),
                            labels = c("0-40", "40-90"),
                            right = F, na.rm= TRUE) 

Pima.tr$edad1.1 <- factor(Pima.tr$edad1.1)
#Niveles de presion arterial y relación con edad (menor a 40 y mayor a 40)

bartlett.test(Pima.tr$bmi~Pima.tr$edad1.1) #  varianzas son iguales


    Bartlett test of homogeneity of variances

data:  Pima.tr$bmi by Pima.tr$edad1.1
Bartlett's K-squared = 8.1341, df = 1, p-value = 0.004344

t.test(Pima.tr$bmi~Pima.tr$edad1.1, var.equal=F)


    Welch Two Sample t-test

data:  Pima.tr$bmi by Pima.tr$edad1.1
t = -1.7278, df = 119.93, p-value = 0.0866
alternative hypothesis: true difference in means between group 0-40 and group 40-90 is not equal to 0
95 percent confidence interval:
 -3.1130555  0.2117222
sample estimates:
 mean in group 0-40 mean in group 40-90 
           31.94733            33.39800

Grafico.student(Pima.tr$bmi~Pima.tr$edad1.1, Pima.tr$bmi~Pima.tr$edad1.1)

Pima.tr$bmi ~ Pima.tr$edad1.1

Resolucon ejercicio 13.3

importar base de datos

library(readxl)
SLE_dataset1 <- read_excel("C:/Users/David/Downloads/SLE dataset1.xlsx")
View(SLE_dataset1)

Recodificacion de variables

SLE_dataset1$Groups_NLSLEvsSLE<-factor(SLE_dataset1$Groups_NLSLEvsSLE)

SLE_dataset1$Gender<-factor(SLE_dataset1$Gender)
SLE_dataset1$Alcohol_abuse<-factor(SLE_dataset1$Alcohol_abuse)
SLE_dataset1$Smoking<-factor(SLE_dataset1$Smoking)
SLE_dataset1$Corticosteroids_users<-factor(SLE_dataset1$Corticosteroids_users)
SLE_dataset1$Azathioprine_users<-factor(SLE_dataset1$Azathioprine_users)
SLE_dataset1$Cyclophosphamide_users<-factor(SLE_dataset1$Cyclophosphamide_users)
SLE_dataset1$Mycophenolate_Mofetil_user<-factor(SLE_dataset1$Mycophenolate_Mofetil_user)
SLE_dataset1$Act_SLEDAI<-factor(SLE_dataset1$Act_SLEDAI)
SLE_dataset1$SNP_A<-factor(SLE_dataset1$SNP_A)
SLE_dataset1$ALLELE_1<-factor(SLE_dataset1$ALLELE_1)
SLE_dataset1$ALLELE_2<-factor(SLE_dataset1$ALLELE_2)

Resolucion

#edad y grupo
bartlett.test(SLE_dataset1$Age~SLE_dataset1$Groups_NLSLEvsSLE)


    Bartlett test of homogeneity of variances

data:  SLE_dataset1$Age by SLE_dataset1$Groups_NLSLEvsSLE
Bartlett's K-squared = 0.09995, df = 1, p-value = 0.7519

t.test(SLE_dataset1$Age~SLE_dataset1$Groups_NLSLEvsSLE, var.equal=T)


    Two Sample t-test

data:  SLE_dataset1$Age by SLE_dataset1$Groups_NLSLEvsSLE
t = -2.0686, df = 101, p-value = 0.04114
alternative hypothesis: true difference in means between group LN and group Non-LN is not equal to 0
95 percent confidence interval:
 -9.7448587 -0.2039998
sample estimates:
    mean in group LN mean in group Non-LN 
            39.06667             44.04110

bartlett.test(SLE_dataset1$Leptin~SLE_dataset1$Groups_NLSLEvsSLE)


    Bartlett test of homogeneity of variances

data:  SLE_dataset1$Leptin by SLE_dataset1$Groups_NLSLEvsSLE
Bartlett's K-squared = 1.8646, df = 1, p-value = 0.1721

t.test(SLE_dataset1$Leptin~SLE_dataset1$Groups_NLSLEvsSLE, var.equal=T)


    Two Sample t-test

data:  SLE_dataset1$Leptin by SLE_dataset1$Groups_NLSLEvsSLE
t = 1.8135, df = 101, p-value = 0.07273
alternative hypothesis: true difference in means between group LN and group Non-LN is not equal to 0
95 percent confidence interval:
 -1.00849 22.49153
sample estimates:
    mean in group LN mean in group Non-LN 
            33.28518             22.54365

bartlett.test(SLE_dataset1$Leptin_BMI~SLE_dataset1$Groups_NLSLEvsSLE)


    Bartlett test of homogeneity of variances

data:  SLE_dataset1$Leptin_BMI by SLE_dataset1$Groups_NLSLEvsSLE
Bartlett's K-squared = 0.24117, df = 1, p-value = 0.6234

t.test(SLE_dataset1$Leptin_BMI ~SLE_dataset1$Groups_NLSLEvsSLE, var.equal=T)


    Two Sample t-test

data:  SLE_dataset1$Leptin_BMI by SLE_dataset1$Groups_NLSLEvsSLE
t = 1.8684, df = 101, p-value = 0.0646
alternative hypothesis: true difference in means between group LN and group Non-LN is not equal to 0
95 percent confidence interval:
 -0.0234272  0.7825830
sample estimates:
    mean in group LN mean in group Non-LN 
           1.1753673            0.7957895

bartlett.test(SLE_dataset1$Adiponectin~SLE_dataset1$Groups_NLSLEvsSLE)


    Bartlett test of homogeneity of variances

data:  SLE_dataset1$Adiponectin by SLE_dataset1$Groups_NLSLEvsSLE
Bartlett's K-squared = 3.2306, df = 1, p-value = 0.07228

t.test(SLE_dataset1$Adiponectin ~SLE_dataset1$Groups_NLSLEvsSLE, var.equal=T)


    Two Sample t-test

data:  SLE_dataset1$Adiponectin by SLE_dataset1$Groups_NLSLEvsSLE
t = 2.6096, df = 101, p-value = 0.01044
alternative hypothesis: true difference in means between group LN and group Non-LN is not equal to 0
95 percent confidence interval:
 1.169488 8.582551
sample estimates:
    mean in group LN mean in group Non-LN 
            20.44003             15.56401

bartlett.test(SLE_dataset1$Adiponectin_BMI~SLE_dataset1$Groups_NLSLEvsSLE)


    Bartlett test of homogeneity of variances

data:  SLE_dataset1$Adiponectin_BMI by SLE_dataset1$Groups_NLSLEvsSLE
Bartlett's K-squared = 2.4829, df = 1, p-value = 0.1151

t.test(SLE_dataset1$Adiponectin_BMI ~SLE_dataset1$Groups_NLSLEvsSLE, var.equal=T)


    Two Sample t-test

data:  SLE_dataset1$Adiponectin_BMI by SLE_dataset1$Groups_NLSLEvsSLE
t = 2.3709, df = 101, p-value = 0.01964
alternative hypothesis: true difference in means between group LN and group Non-LN is not equal to 0
95 percent confidence interval:
 0.03280225 0.36891067
sample estimates:
    mean in group LN mean in group Non-LN 
           0.7989785            0.5981220

#Tabla de contingencia de variables a evaluar
tabx20 <- table(SLE_dataset1$Groups_NLSLEvsSLE, SLE_dataset1$Gender)
#Grafico con proporciones
barplot(prop.table(tabx20,2), 
        legend= rownames(tabx20), 
        beside= T, 
        ylab = "Proporción", 
        names= c("Hombre","Mujer"),
        col=brewer.pal(n = 3, name = "Accent"))

#Test de chisquare
chisq.test(tabx20)


    Chi-squared test for given probabilities

data:  tabx20
X-squared = 17.951, df = 1, p-value = 2.266e-05

tabx20.1 <-chisq.test(tabx20)
tabx20.1$observed

[1] 30 73

tabx20.1$expected

[1] 51.5 51.5

#Grupo y smoking
tabx21 <- table(SLE_dataset1$Groups_NLSLEvsSLE, SLE_dataset1$Smoking)

#Test de chisquare
chisq.test(tabx21)

Warning in chisq.test(tabx21): Chi-squared approximation may be incorrect


    Pearson's Chi-squared test with Yates' continuity correction

data:  tabx21
X-squared = 1.1092, df = 1, p-value = 0.2923

tabx21.1 <-chisq.test(tabx21)

Warning in chisq.test(tabx21): Chi-squared approximation may be incorrect

tabx21.1$observed

        
         No Yes
  LN     27   3
  Non-LN 71   2

tabx21.1$expected

        
               No      Yes
  LN     28.54369 1.456311
  Non-LN 69.45631 3.543689

#Grupo y abuso de alcohol
tabx22 <- table(SLE_dataset1$Groups_NLSLEvsSLE, SLE_dataset1$Alcohol_abuse)

#Test de chisquare
chisq.test(tabx22)

Warning in chisq.test(tabx22): Chi-squared approximation may be incorrect


    Pearson's Chi-squared test with Yates' continuity correction

data:  tabx22
X-squared = 0.21558, df = 1, p-value = 0.6424

tabx22.1 <-chisq.test(tabx21)

Warning in chisq.test(tabx21): Chi-squared approximation may be incorrect

tabx22.1$observed

        
         No Yes
  LN     27   3
  Non-LN 71   2

tabx22.1$expected

        
               No      Yes
  LN     28.54369 1.456311
  Non-LN 69.45631 3.543689

#Grupo y corticoesteroides
tabx23 <- table(SLE_dataset1$Groups_NLSLEvsSLE, SLE_dataset1$Corticosteroids_users)

#Test de chisquare
chisq.test(tabx23)


    Chi-squared test for given probabilities

data:  tabx23
X-squared = 17.951, df = 1, p-value = 2.266e-05

tabx23.1 <-chisq.test(tabx21)

Warning in chisq.test(tabx21): Chi-squared approximation may be incorrect

tabx23.1$observed

        
         No Yes
  LN     27   3
  Non-LN 71   2

tabx23.1$expected

        
               No      Yes
  LN     28.54369 1.456311
  Non-LN 69.45631 3.543689

tabx24 <- table(SLE_dataset1$Groups_NLSLEvsSLE, SLE_dataset1$SNP_A)

#Test de chisquare
chisq.test(tabx24)


    Pearson's Chi-squared test with Yates' continuity correction

data:  tabx24
X-squared = 0.12459, df = 1, p-value = 0.7241

tabx24.1 <-chisq.test(tabx24)
tabx24.1$observed

        
         GC GG
  LN     15 15
  Non-LN 41 32

tabx24.1$expected

        
               GC       GG
  LN     16.31068 13.68932
  Non-LN 39.68932 33.31068

Resolucion ejercicio 13.4 y bases

library(readxl)
Base_Prueba_t_pareada_2 <- read_excel("C:/Users/David/Downloads/Base_Prueba_t_pareada 2.xlsx")

#comparacion entee niveles de leptina

antes1 <- (Base_Prueba_t_pareada_2$Leptin)
antes2 <- (Base_Prueba_t_pareada_2$Leptin6M)
despues1 <- (Base_Prueba_t_pareada_2$Leptin12M)

boxplot(antes1, antes2, horizontal = FALSE, 
        lwd = 2, col=brewer.pal(n = 3, name = "Accent"),
        xlab = "Grupos",  # X-axis label
        ylab = "Niveles de leptina",  # Y-axis label
        main = "Comparación de niveles de leptina inicio y leptina 6 meses", # Title
        border = "black",  # Boxplot border color
        outpch = 25,       # Outliers symbol
        whisklty = 2,      # Whisker line type
        names=c("leptina antes", "leptina despues"))

#prueba de hipotesis 

t.test(x=antes1, y=antes2, alternative = "two.sided", 
       paired = T, var.equal = T)


    Paired t-test

data:  antes1 and antes2
t = 11.746, df = 164, p-value < 2.2e-16
alternative hypothesis: true mean difference is not equal to 0
95 percent confidence interval:
 3.447977 4.841477
sample estimates:
mean difference 
       4.144727

#12 meses posterior 


boxplot(antes1, despues1, horizontal = FALSE, 
        lwd = 2, col=brewer.pal(n = 3, name = "Accent"),
        xlab = "Grupos",  # X-axis label
        ylab = "Niveles de biomarcador",  # Y-axis label
        main = "Comparación de niveles de leptina inicio y leptina 6 meses despues", # Title
        border = "black",  # Boxplot border color
        outpch = 25,       # Outliers symbol
        whisklty = 2,      # Whisker line type
        names=c("leptina antes", "leptina despues"))

#prueba de hipotesis 

t.test(x=antes1, y=despues1, alternative = "two.sided", 
       paired = T, var.equal = T)


    Paired t-test

data:  antes1 and despues1
t = 11.744, df = 164, p-value < 2.2e-16
alternative hypothesis: true mean difference is not equal to 0
95 percent confidence interval:
 7.044784 9.892549
sample estimates:
mean difference 
       8.468667

resolución ejercicio 13.5

antes1.1 <- (Base_Prueba_t_pareada_2$Biomarcador)
antes2.1 <- (Base_Prueba_t_pareada_2$Biomarcador6M)
despues3.1 <- (Base_Prueba_t_pareada_2$Biomarcador12M)

boxplot(antes1.1, antes2.1, horizontal = FALSE, 
        lwd = 2, col=brewer.pal(n = 3, name = "Accent"),
        xlab = "Grupos",  # X-axis label
        ylab = "Niveles de biomarcador",  # Y-axis label
        main = "Comparación de niveles de biomarcador inicio y biomarcador 6 meses despues", # Title
        border = "black",  # Boxplot border color
        outpch = 25,       # Outliers symbol
        whisklty = 2,      # Whisker line type
        names=c("biomarcador antes", "biomarcador despues"))

#prueba de hipotesis 

t.test(x=antes1.1, y=antes2.1, alternative = "two.sided", 
       paired = T, var.equal = T)


    Paired t-test

data:  antes1.1 and antes2.1
t = -10.147, df = 164, p-value < 2.2e-16
alternative hypothesis: true mean difference is not equal to 0
95 percent confidence interval:
 -4.254057 -2.868125
sample estimates:
mean difference 
      -3.561091

#biomarcador antes y despues 12 meses

boxplot(antes1.1, despues3.1, horizontal = FALSE, 
        lwd = 2, col=brewer.pal(n = 3, name = "Accent"),
        xlab = "Grupos",  # X-axis label
        ylab = "Niveles de biomarcador",  # Y-axis label
        main = "Comparación de niveles de biomarcador inicio y biomarcador 12 meses despues", # Title
        border = "black",  # Boxplot border color
        outpch = 25,       # Outliers symbol
        whisklty = 2,      # Whisker line type
        names=c("biomarcador antes", "biomarcador despues"))

#prueba de hipotesis 

t.test(x=antes1.1, y=despues3.1, alternative = "two.sided", 
       paired = T, var.equal = T)


    Paired t-test

data:  antes1.1 and despues3.1
t = -10.152, df = 164, p-value < 2.2e-16
alternative hypothesis: true mean difference is not equal to 0
95 percent confidence interval:
 -9.291692 -6.265884
sample estimates:
mean difference 
      -7.778788