Correlacion y regresion

Author

Gonzalez Garcia AY

Bases de datos y otros datos

bodyfat <- read.csv("C:/Users/David/Downloads/bodyfat.txt", sep="")
library(psych)

Warning: package 'psych' was built under R version 4.2.3

library (corrr)

Warning: package 'corrr' was built under R version 4.2.3

library(ggstatsplot)

Warning: package 'ggstatsplot' was built under R version 4.2.3

You can cite this package as:
     Patil, I. (2021). Visualizations with statistical details: The 'ggstatsplot' approach.
     Journal of Open Source Software, 6(61), 3167, doi:10.21105/joss.03167

library("RColorBrewer")
library(correlation)

Warning: package 'correlation' was built under R version 4.2.3

library(see)

Warning: package 'see' was built under R version 4.2.3

attach(bodyfat)

Resolución de ejercicio 14.1

#Regresión 

cor.test(Fat,Weight)


    Pearson's product-moment correlation

data:  Fat and Weight
t = 12.249, df = 250, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.5288644 0.6842076
sample estimates:
     cor 
0.612414

cor.test(Fat,Height)


    Pearson's product-moment correlation

data:  Fat and Height
t = -1.4207, df = 250, p-value = 0.1566
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.21073764  0.03445855
sample estimates:
        cor 
-0.08949538

cor.test(Fat,Age)


    Pearson's product-moment correlation

data:  Fat and Age
t = 4.8175, df = 250, p-value = 2.522e-06
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.1741581 0.4006030
sample estimates:
      cor 
0.2914584

cor.test(Fat,Abdomen)


    Pearson's product-moment correlation

data:  Fat and Abdomen
t = 22.112, df = 250, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.7669520 0.8514218
sample estimates:
      cor 
0.8134323

cor.test(Fat,Wrist)


    Pearson's product-moment correlation

data:  Fat and Wrist
t = 5.8419, df = 250, p-value = 1.6e-08
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.2329799 0.4508395
sample estimates:
      cor 
0.3465749

cor.test(Fat,Thigh)


    Pearson's product-moment correlation

data:  Fat and Thigh
t = 10.676, df = 250, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.4684275 0.6389926
sample estimates:
      cor 
0.5596075

Resolucion ejercicio 14.2

#Cargar base
library(readr)
RegionalInterestByConditionOverTime <- read_csv("C:/Users/David/Downloads/RegionalInterestByConditionOverTime.csv")

Rows: 210 Columns: 74
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (1): dma
dbl (73): geoCode, 2010+cancer, 2010+cardiovascular, 2010+stroke, 2010+depre...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

ggcorrmat(
  data = RegionalInterestByConditionOverTime[1,3:9], ## Data Frame
  type = "np", ## which correlation coefficient is to be computed
  matrix.type = "lower", ## Estructura de la gráfica
  title = "Grafica de correlación", ## custom title
  subtitle = "Biostadística DCB" ## custom subtitle
)

Warning: 2010+cancer and 2010+cardiovascular have less than 3 complete
observations. Returning NA.

# Cargar biblioteca
library("psych")

#Mejor funcion para bases grandes
correlate(RegionalInterestByConditionOverTime)

Non-numeric variables removed from input: `dma`
Correlation computed with
• Method: 'pearson'
• Missing treated using: 'pairwise.complete.obs'

# A tibble: 73 × 74
   term  geoCode 2010+…¹ 2010+…² 2010+…³ 2010+…⁴ 2010+…⁵ 2010+…⁶ 2010+…⁷ 2010+…⁸
   <chr>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
 1 geoC… NA       -0.441 -0.255   -0.180 -0.137  -0.249  -0.0500 -0.203   -0.126
 2 2010… -0.441   NA      0.331    0.336  0.230   0.344   0.250   0.207    0.197
 3 2010… -0.255    0.331 NA        0.145  0.0670  0.207   0.234   0.143    0.467
 4 2010… -0.180    0.336  0.145   NA      0.517   0.385   0.166   0.568    0.238
 5 2010… -0.137    0.230  0.0670   0.517 NA       0.373  -0.0325  0.579    0.284
 6 2010… -0.249    0.344  0.207    0.385  0.373  NA       0.0960  0.400    0.105
 7 2010… -0.0500   0.250  0.234    0.166 -0.0325  0.0960 NA      -0.0486   0.245
 8 2010… -0.203    0.207  0.143    0.568  0.579   0.400  -0.0486 NA        0.164
 9 2010… -0.126    0.197  0.467    0.238  0.284   0.105   0.245   0.164   NA    
10 2010… -0.152    0.309  0.269    0.565  0.616   0.435   0.125   0.640    0.330
# … with 63 more rows, 64 more variables: `2010+diabetes` <dbl>,
#   `2011+cancer` <dbl>, `2011+cardiovascular` <dbl>, `2011+stroke` <dbl>,
#   `2011+depression` <dbl>, `2011+rehab` <dbl>, `2011+vaccine` <dbl>,
#   `2011+diarrhea` <dbl>, `2011+obesity` <dbl>, `2011+diabetes` <dbl>,
#   `2012+cancer` <dbl>, `2012+cardiovascular` <dbl>, `2012+stroke` <dbl>,
#   `2012+depression` <dbl>, `2012+rehab` <dbl>, `2012+vaccine` <dbl>,
#   `2012+diarrhea` <dbl>, `2012+obesity` <dbl>, `2012+diabetes` <dbl>, …

#Grafica de correlacion
pairs.panels(RegionalInterestByConditionOverTime[,3:9], main="Matriz de correlación de las primeras 7 variables")

pairs.panels(RegionalInterestByConditionOverTime[,10:19])

pairs.panels(RegionalInterestByConditionOverTime[,20:30])

pairs.panels(RegionalInterestByConditionOverTime[,31:41])

pairs.panels(RegionalInterestByConditionOverTime[,42:52])

pairs.panels(RegionalInterestByConditionOverTime[,53:63])

Resolución ejercicio 14.3

attach(bodyfat)

The following objects are masked from bodyfat (pos = 4):

    Abdomen, Age, Ankle, Biceps, Chest, Density, Fat, Forearm, Height,
    Hip, Knee, Neck, Thigh, Weight, Wrist

cor.test(Density, Fat)


    Pearson's product-moment correlation

data:  Density and Fat
t = -100.22, df = 250, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.9904570 -0.9843641
sample estimates:
       cor 
-0.9877824

plot(Density, Fat, main = "Densidad mineral vs Porcentaje de grasa ",xlab="Densidad mineral", ylab="Porcentaje de grasa", col="cyan4", pch=20)
# Segunda capa
text(x=1.06, y=40, label="r=--0.9877824; p<0.001")
# Tercera capa
abline(lm(Fat~Density))

cor.test(Fat, Height)


    Pearson's product-moment correlation

data:  Fat and Height
t = -1.4207, df = 250, p-value = 0.1566
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.21073764  0.03445855
sample estimates:
        cor 
-0.08949538

plot(Fat, Height, main = "Fat vs altura ",xlab="Porcentaje de grasa", ylab="Altura", col="cyan4", pch=20)
# Segunda capa
text(x=1.06, y=40, label="r=-0.08949538 ; p=0.1566")
# Tercera capa
  abline(lm(Fat~Height))

cor.test(Fat, Abdomen)


    Pearson's product-moment correlation

data:  Fat and Abdomen
t = 22.112, df = 250, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.7669520 0.8514218
sample estimates:
      cor 
0.8134323

plot(Fat, Abdomen, main = "Fat vs abdomen ",xlab="Porcentaje de grasa", ylab="Medida de abdomen", col="cyan4", pch=20)
# Segunda capa
text(x=1.06, y=40, label="r=0.8134323 
 ; p=0.0001")
# Tercera capa
abline(lm(Fat~Abdomen))

cor.test(Fat, Chest)


    Pearson's product-moment correlation

data:  Fat and Chest
t = 15.613, df = 250, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.6341034 0.7601899
sample estimates:
      cor 
0.7026203

plot(Fat, Abdomen, main = "Fat vs Chest ",xlab="Porcentaje de grasa", ylab="Pecho", col="cyan4", pch=20)
# Segunda capa
text(x=1.06, y=40, label="r=0.7026203  
 ; p=0.0001")
# Tercera capa
abline(lm(Fat~Chest))

cor.test(Fat, Biceps)


    Pearson's product-moment correlation

data:  Fat and Biceps
t = 8.966, df = 250, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.3936960 0.5814045
sample estimates:
      cor 
0.4932711

plot(Fat, Biceps, main = "Fat vs Chest ",xlab="Porcentaje de grasa", ylab="Pecho", col="cyan4", pch=20)
# Segunda capa
text(x=1.06, y=40, label="r=0.4932711   
 ; p=0.0001")
# Tercera capa
abline(lm(Fat~Biceps))