library(readr)
The_Cancer_data_1500_V2<-read_csv("The_Cancer_data_1500_V2.csv")
head(The_Cancer_data_1500_V2)
## # A tibble: 6 × 9
## Age Gender BMI Smoking GeneticRisk PhysicalActivity AlcoholIntake
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 58 1 16.1 0 1 8.15 4.15
## 2 71 0 30.8 0 1 9.36 3.52
## 3 48 1 38.8 0 2 5.14 4.73
## 4 34 0 30.0 0 0 9.50 2.04
## 5 62 1 35.5 0 0 5.36 3.31
## 6 27 0 37.1 0 1 3.94 2.32
## # ℹ 2 more variables: CancerHistory <dbl>, Diagnosis <dbl>
Independencia
library(dplyr)
library(gtsummary)
contingency_table <- The_Cancer_data_1500_V2 %>%
select(Smoking, Diagnosis) %>%
table()
print(contingency_table)
## Diagnosis
## Smoking 0 1
## 0 762 334
## 1 181 223
chisq.test(contingency_table)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: contingency_table
## X-squared = 76.237, df = 1, p-value < 2.2e-16
Correlación
plot(The_Cancer_data_1500_V2$Age, The_Cancer_data_1500_V2$PhysicalActivity, main = "Relación entre Edad y Actividad FÃsica",
xlab = "Edad",
ylab = "Horas de Actividad FÃsica por Semana",
pch = 16,
col = rgb(0, 0, 1, 0.6))
grid()
cor.test(The_Cancer_data_1500_V2$Age, The_Cancer_data_1500_V2$PhysicalActivity, method = "spearman")
##
## Spearman's rank correlation rho
##
## data: The_Cancer_data_1500_V2$Age and The_Cancer_data_1500_V2$PhysicalActivity
## S = 553698029, p-value = 0.5448
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.01564751
Normalidad
hist(The_Cancer_data_1500_V2$Age,
main = "Histograma de Edad",
xlab = "Edad",
ylab = "Frecuencia",
col = "blue",
border = "black",
breaks = 10)
plot(density(The_Cancer_data_1500_V2$Age),
main = "Gráfica de Densidad de Edad",
xlab = "Edad",
ylab = "Densidad",
col = "blue")
ks.test(The_Cancer_data_1500_V2$Age, "pnorm", mean = mean(The_Cancer_data_1500_V2$Age),sd(The_Cancer_data_1500_V2$Age))
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: The_Cancer_data_1500_V2$Age
## D = 0.071622, p-value = 4.146e-07
## alternative hypothesis: two-sided