Hallar el intervalo de confianza de la variable accuracy
# Leer los datos
# archivo <- file.choose()
archivo <- '/Users/pctm/Documents/EDA/Dataset_IA_corte_II.xlsx'
data <- read_excel(archivo, col_names = TRUE)
# Crear la tabla y aplicar estilos
kable(data[1:20,], caption = "Algoritmos de inteligencia artificial utilizando frameworks de python") %>%
kable_styling(full_width = TRUE) %>%
scroll_box(width = "900px", height = "500px")
| Algorithm | Framework | Problem_Type | Dataset_Type | Accuracy | Precision | Recall | F1_Score | Training_Time | Date |
|---|---|---|---|---|---|---|---|---|---|
| SVM | Scikit-learn | Regression | Time Series | 0.6618051 | 0.6929447 | NA | 0.4426950 | 4.9785924 | 2023-03-08 11:26:21 |
| K-Means | Keras | Clustering | Time Series | 0.7443216 | 0.4900292 | 0.8766533 | 0.4414046 | NA | 2023-03-09 11:26:21 |
| Neural Network | Keras | Clustering | Image | 0.8852037 | 0.5948056 | 0.9685424 | 0.9644707 | 3.2825938 | 2023-03-10 11:26:21 |
| SVM | Keras | Clustering | Text | 0.8416477 | 0.8424142 | 0.8748388 | 0.7041523 | 4.0416289 | 2023-03-11 11:26:21 |
| SVM | Scikit-learn | Regression | Tabular | 0.7229514 | 0.6856109 | 0.3010956 | 0.6456472 | 3.6039908 | 2023-03-12 11:26:21 |
| K-Means | PyTorch | Regression | Image | 0.6368133 | 0.6255330 | 7.4548096 | 0.8865271 | 3.0064753 | 2023-03-13 11:26:21 |
| Neural Network | PyTorch | Regression | Text | 0.9985623 | 0.6366858 | 0.3357948 | 0.9014956 | NA | 2023-03-14 11:26:21 |
| Neural Network | Scikit-learn | Regression | Image | 0.7130907 | 0.6756681 | 0.4803251 | 0.5993146 | 2.3283453 | 2023-03-15 11:26:21 |
| SVM | Keras | Regression | Time Series | NA | 0.8710099 | 0.3416673 | 0.8161708 | 3.4064529 | 2023-03-16 11:26:21 |
| Random Forest | Keras | Regression | Text | 0.5818119 | 0.9352508 | NA | 0.8626737 | 3.4199049 | 2023-03-17 11:26:21 |
| SVM | PyTorch | Regression | Image | 0.8974048 | 9.7320081 | 0.7806129 | 0.7927904 | 1.9283008 | 2023-03-18 11:26:21 |
| SVM | Keras | Clustering | Image | 0.8468411 | 0.8721420 | 0.3801413 | 0.4909570 | 4.7142907 | 2023-03-19 11:26:21 |
| SVM | TensorFlow | Clustering | Tabular | 0.6103848 | 0.5892441 | 0.5686872 | 0.9255299 | 0.9200495 | 2023-03-20 11:26:21 |
| SVM | PyTorch | Clustering | Image | 0.5411905 | 0.8128808 | 0.6193656 | 0.7234567 | 2.5517613 | 2023-03-21 11:26:21 |
| K-Means | Keras | Clustering | Text | 0.8402497 | 0.6625619 | 0.5583371 | 0.5694835 | 3.4853315 | 2023-03-22 11:26:21 |
| Neural Network | PyTorch | Regression | Text | NA | 0.5528024 | 0.3847175 | 0.6551369 | 3.5159654 | 2023-03-23 11:26:21 |
| K-Means | TensorFlow | Classification | Tabular | 0.6366298 | 0.9045229 | 0.5932635 | 0.4225427 | 3.2783309 | 2023-03-24 11:26:21 |
| K-Means | PyTorch | Regression | Text | 0.9754318 | 0.4230558 | 0.8258246 | 0.4767201 | 1.4489122 | 2023-03-25 11:26:21 |
| K-Means | PyTorch | Classification | Time Series | 0.5755289 | 0.9410572 | 0.3497054 | 0.8593281 | 0.8654122 | 2023-03-26 11:26:21 |
| SVM | PyTorch | Clustering | Text | 0.7161674 | 0.6768865 | 0.3561260 | 0.4000070 | 3.2161076 | 2023-03-27 11:26:21 |
t.test(data$Accuracy, conf.level = 0.95)
##
## One Sample t-test
##
## data: data$Accuracy
## t = 21.246, df = 520, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.7966857 0.9590306
## sample estimates:
## mean of x
## 0.8778581
#SIN FUNCION
media_muestra <- mean(data$Accuracy, na.rm = TRUE)
media_muestra
## [1] 0.8778581
desv_est <- sd(data$Accuracy, na.rm = TRUE)
desv_est
## [1] 0.9431208
n <- sum(!is.na(data$Accuracy))
n
## [1] 521
nivel_confianza <- 0.955
gl <- n - 1
t_critico <- qt(1 - (1 - nivel_confianza) / 2, gl)
t_critico
## [1] 2.009503
error_estandar <- desv_est / sqrt(n)
error_estandar
## [1] 0.04131887
limite_inferior <- media_muestra - t_critico * error_estandar
limite_superior <- media_muestra + t_critico * error_estandar
limite_inferior
## [1] 0.7948277
limite_superior
## [1] 0.9608885