## install.packages("corrr")
## install.packages("FactoMineR")
## install.packages("factoextra")
## install.packages("ggplot2")
## install.packages("rworldmap")
## install.packages("sp")
## install.packages("rnaturalearth")
## install.packages("rnaturalearthdata")
#Matriz de correlación
library(corrr)
library(corrplot)
## corrplot 0.92 loaded
#PCA
library(FactoMineR)
library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
#Gráficos
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
#Manejo datos
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#Mapas
library(rworldmap)
## Loading required package: sp
## ### Welcome to rworldmap ###
## For a short introduction type : vignette('rworldmap')
library(rnaturalearth)
library(rnaturalearthdata)
##
## Attaching package: 'rnaturalearthdata'
## The following object is masked from 'package:rnaturalearth':
##
## countries110
library(sf)
## Linking to GEOS 3.12.1, GDAL 3.8.4, PROJ 9.3.1; sf_use_s2() is TRUE
protein_data <- read.csv("protein.csv")
protein_data
## Country Red_Meat White_Meat Eggs Milk Fish Cereals Starchy_Foods
## 1 Albania 10 1 1 9 0 42 1
## 2 Austria 9 14 4 20 2 28 4
## 3 Belgium 14 9 4 18 5 27 6
## 4 Bulgaria 8 6 2 8 1 57 1
## 5 Czechoslovakia 10 11 3 13 2 34 5
## 6 Denmark 11 11 4 25 10 22 5
## 7 East_Germany 8 12 4 11 5 25 7
## 8 Finland 10 5 3 34 6 26 5
## 9 France 18 10 3 20 6 28 5
## 10 Greece 10 3 3 18 6 42 2
## 11 Hungary 5 12 3 10 0 40 4
## 12 Ireland 14 10 5 26 2 24 6
## 13 Italy 9 5 3 14 3 37 2
## 14 The_Netherlands 10 14 4 23 3 22 4
## 15 Norway 9 5 3 23 10 23 5
## 16 Poland 7 10 3 19 3 36 6
## 17 Portugal 6 4 1 5 14 27 6
## 18 Romania 6 6 2 11 1 50 3
## 19 Spain 7 3 3 9 7 29 6
## 20 Sweden 10 8 4 25 8 20 4
## 21 Switzerland 13 10 3 24 2 26 3
## 22 United_Kingdom 17 6 5 21 4 24 5
## 23 USSR 9 5 2 17 3 44 6
## 24 West_Germany 11 13 4 19 3 19 5
## 25 Yugoslavia 4 5 1 10 1 56 3
## Pulses_nuts_oilseeds Fruits_Vegetables Total
## 1 6 2 72
## 2 1 4 86
## 3 2 4 89
## 4 4 4 91
## 5 1 4 83
## 6 1 2 91
## 7 1 4 77
## 8 1 1 91
## 9 2 7 99
## 10 8 7 99
## 11 5 4 83
## 12 2 3 92
## 13 4 7 84
## 14 2 4 86
## 15 2 3 83
## 16 2 7 93
## 17 5 8 76
## 18 5 3 87
## 19 6 7 77
## 20 1 2 82
## 21 2 5 88
## 22 3 3 88
## 23 3 3 92
## 24 2 4 80
## 25 6 3 89
colSums(is.na(protein_data))
## Country Red_Meat White_Meat
## 0 0 0
## Eggs Milk Fish
## 0 0 0
## Cereals Starchy_Foods Pulses_nuts_oilseeds
## 0 0 0
## Fruits_Vegetables Total
## 0 0
numerical_data <- protein_data[,2:10]
numerical_data
## Red_Meat White_Meat Eggs Milk Fish Cereals Starchy_Foods
## 1 10 1 1 9 0 42 1
## 2 9 14 4 20 2 28 4
## 3 14 9 4 18 5 27 6
## 4 8 6 2 8 1 57 1
## 5 10 11 3 13 2 34 5
## 6 11 11 4 25 10 22 5
## 7 8 12 4 11 5 25 7
## 8 10 5 3 34 6 26 5
## 9 18 10 3 20 6 28 5
## 10 10 3 3 18 6 42 2
## 11 5 12 3 10 0 40 4
## 12 14 10 5 26 2 24 6
## 13 9 5 3 14 3 37 2
## 14 10 14 4 23 3 22 4
## 15 9 5 3 23 10 23 5
## 16 7 10 3 19 3 36 6
## 17 6 4 1 5 14 27 6
## 18 6 6 2 11 1 50 3
## 19 7 3 3 9 7 29 6
## 20 10 8 4 25 8 20 4
## 21 13 10 3 24 2 26 3
## 22 17 6 5 21 4 24 5
## 23 9 5 2 17 3 44 6
## 24 11 13 4 19 3 19 5
## 25 4 5 1 10 1 56 3
## Pulses_nuts_oilseeds Fruits_Vegetables
## 1 6 2
## 2 1 4
## 3 2 4
## 4 4 4
## 5 1 4
## 6 1 2
## 7 1 4
## 8 1 1
## 9 2 7
## 10 8 7
## 11 5 4
## 12 2 3
## 13 4 7
## 14 2 4
## 15 2 3
## 16 2 7
## 17 5 8
## 18 5 3
## 19 6 7
## 20 1 2
## 21 2 5
## 22 3 3
## 23 3 3
## 24 2 4
## 25 6 3
## MARGIN para trabajar por filas se escribe 1, por columnas 2
apply(X=numerical_data, MARGIN=2, FUN=mean)
## Red_Meat White_Meat Eggs
## 9.80 7.92 3.08
## Milk Fish Cereals
## 17.28 4.28 32.32
## Starchy_Foods Pulses_nuts_oilseeds Fruits_Vegetables
## 4.36 3.08 4.20
## Los valores de cereales son muy grandes, es necesario normalizar datos
apply(X=numerical_data, MARGIN=2, FUN=var)
## Red_Meat White_Meat Eggs
## 11.583333 13.993333 1.243333
## Milk Fish Cereals
## 50.376667 12.043333 121.226667
## Starchy_Foods Pulses_nuts_oilseeds Fruits_Vegetables
## 2.740000 4.076667 3.666667
apply(X=numerical_data, MARGIN=2, FUN=sd)
## Red_Meat White_Meat Eggs
## 3.403430 3.740766 1.115049
## Milk Fish Cereals
## 7.097652 3.470351 11.010298
## Starchy_Foods Pulses_nuts_oilseeds Fruits_Vegetables
## 1.655295 2.019076 1.914854
## Se resta la media y se divide en la desviación estándar
data_normalized <- scale(numerical_data)
data_normalized
## Red_Meat White_Meat Eggs Milk Fish Cereals
## [1,] 0.05876425 -1.84988830 -1.86538958 -1.16658295 -1.23330478 0.8791769
## [2,] -0.23505701 1.62533538 0.82507616 0.38322532 -0.65699414 -0.3923599
## [3,] 1.23404931 0.28871089 0.82507616 0.10144200 0.20747183 -0.4831840
## [4,] -0.52887828 -0.51326380 -0.96856767 -1.30747461 -0.94514946 2.2415378
## [5,] 0.05876425 0.82336069 -0.07174575 -0.60301630 -0.65699414 0.1525844
## [6,] 0.35258552 0.82336069 0.82507616 1.08768362 1.64824845 -0.9373043
## [7,] -0.52887828 1.09068558 0.82507616 -0.88479963 0.20747183 -0.6648321
## [8,] 0.05876425 -0.78058870 -0.07174575 2.35570856 0.49562716 -0.5740081
## [9,] 2.40933437 0.55603579 -0.07174575 0.38322532 0.49562716 -0.3923599
## [10,] 0.05876425 -1.31523850 -0.07174575 0.10144200 0.49562716 0.8791769
## [11,] -1.41034207 1.09068558 -0.07174575 -1.02569129 -1.23330478 0.6975288
## [12,] 1.23404931 0.55603579 1.72189807 1.22857528 -0.65699414 -0.7556562
## [13,] -0.23505701 -0.78058870 -0.07174575 -0.46212464 -0.36883881 0.4250566
## [14,] 0.05876425 1.62533538 0.82507616 0.80590030 -0.36883881 -0.9373043
## [15,] -0.23505701 -0.78058870 -0.07174575 0.80590030 1.64824845 -0.8464803
## [16,] -0.82269954 0.55603579 -0.07174575 0.24233366 -0.36883881 0.3342325
## [17,] -1.11652080 -1.04791360 -1.86538958 -1.73014959 2.80086974 -0.4831840
## [18,] -1.11652080 -0.51326380 -0.96856767 -0.88479963 -0.94514946 1.6057694
## [19,] -0.82269954 -1.31523850 -0.07174575 -1.16658295 0.78378248 -0.3015359
## [20,] 0.05876425 0.02138599 0.82507616 1.08768362 1.07193780 -1.1189524
## [21,] 0.94022805 0.55603579 -0.07174575 0.94679196 -0.65699414 -0.5740081
## [22,] 2.11551310 -0.51326380 1.72189807 0.52411698 -0.08068349 -0.7556562
## [23,] -0.23505701 -0.78058870 -0.96856767 -0.03944966 -0.36883881 1.0608250
## [24,] 0.35258552 1.35801048 0.82507616 0.24233366 -0.36883881 -1.2097765
## [25,] -1.70416333 -0.78058870 -1.86538958 -1.02569129 -0.94514946 2.1507138
## Starchy_Foods Pulses_nuts_oilseeds Fruits_Vegetables
## [1,] -2.0298502 1.44620630 -1.1489125
## [2,] -0.2174840 -1.03017435 -0.1044466
## [3,] 0.9907602 -0.53489822 -0.1044466
## [4,] -2.0298502 0.45565404 -0.1044466
## [5,] 0.3866381 -1.03017435 -0.1044466
## [6,] 0.3866381 -1.03017435 -1.1489125
## [7,] 1.5948823 -1.03017435 -0.1044466
## [8,] 0.3866381 -1.03017435 -1.6711455
## [9,] 0.3866381 -0.53489822 1.4622523
## [10,] -1.4257281 2.43675857 1.4622523
## [11,] -0.2174840 0.95093017 -0.1044466
## [12,] 0.9907602 -0.53489822 -0.6266796
## [13,] -1.4257281 0.45565404 1.4622523
## [14,] -0.2174840 -0.53489822 -0.1044466
## [15,] 0.3866381 -0.53489822 -0.6266796
## [16,] 0.9907602 -0.53489822 1.4622523
## [17,] 0.9907602 0.95093017 1.9844853
## [18,] -0.8216060 0.95093017 -0.6266796
## [19,] 0.9907602 1.44620630 1.4622523
## [20,] -0.2174840 -1.03017435 -1.1489125
## [21,] -0.8216060 -0.53489822 0.4177864
## [22,] 0.3866381 -0.03962209 -0.6266796
## [23,] 0.9907602 -0.03962209 -0.6266796
## [24,] 0.3866381 -0.53489822 -0.1044466
## [25,] -0.8216060 1.44620630 -0.6266796
## attr(,"scaled:center")
## Red_Meat White_Meat Eggs
## 9.80 7.92 3.08
## Milk Fish Cereals
## 17.28 4.28 32.32
## Starchy_Foods Pulses_nuts_oilseeds Fruits_Vegetables
## 4.36 3.08 4.20
## attr(,"scaled:scale")
## Red_Meat White_Meat Eggs
## 3.403430 3.740766 1.115049
## Milk Fish Cereals
## 7.097652 3.470351 11.010298
## Starchy_Foods Pulses_nuts_oilseeds Fruits_Vegetables
## 1.655295 2.019076 1.914854
### Después de aplicar la transformación a los datos se tendrán
### una media de aproximadamente cero y una varianza de uno
apply(X=data_normalized, MARGIN=2, FUN=mean)
## Red_Meat White_Meat Eggs
## -2.192951e-16 1.097646e-17 -6.438426e-17
## Milk Fish Cereals
## -1.701417e-16 -6.438426e-17 -1.774622e-17
## Starchy_Foods Pulses_nuts_oilseeds Fruits_Vegetables
## -1.976479e-16 -1.833169e-17 -1.021492e-16
# Esta función center debe dar como resultado igual a la media de los normalizados
## data_pca$center
apply(X=data_normalized, MARGIN=2, FUN=var)
## Red_Meat White_Meat Eggs
## 1 1 1
## Milk Fish Cereals
## 1 1 1
## Starchy_Foods Pulses_nuts_oilseeds Fruits_Vegetables
## 1 1 1
data_pca <- princomp(data_normalized)
data_pca
## Call:
## princomp(x = data_normalized)
##
## Standard deviations:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## 1.9828553 1.2489623 1.0207403 0.9321032 0.6400533 0.5771158 0.5086679 0.3593629
## Comp.9
## 0.3271628
##
## 9 variables and 25 observations.
Un valor y un vector propio para cada dato a partir del cual se calculan los componentes principales Los autovalores o vectores propios son los eigenvectores, función loadings Los loadings son las combinaciones lineales de sus variables originales El número máximo de components que se calculan Se calcula el mínimo min(n-1,p), en este caso min(24,9), por lo que este PCA reportará 9 componentes
data_pca$loadings
##
## Loadings:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## Red_Meat 0.311 0.355 0.597 0.397 0.377 0.228
## White_Meat 0.316 0.215 -0.628 -0.311 0.146
## Eggs 0.421 0.255 -0.665 0.467
## Milk 0.379 0.169 0.404 -0.318 -0.718 -0.102
## Fish 0.134 -0.652 0.300 -0.235 -0.304 0.237 0.441
## Cereals -0.430 0.254 0.185 0.194 -0.343 0.721
## Starchy_Foods 0.296 -0.389 -0.281 -0.305 0.673 -0.326
## Pulses_nuts_oilseeds -0.422 -0.129 0.140 0.251 -0.587 -0.218
## Fruits_Vegetables -0.122 -0.504 -0.340 0.604 -0.228 0.158 -0.359
## Comp.9
## Red_Meat 0.251
## White_Meat 0.577
## Eggs -0.275
## Milk 0.190
## Fish 0.260
## Cereals 0.192
## Starchy_Foods 0.150
## Pulses_nuts_oilseeds 0.567
## Fruits_Vegetables -0.211
##
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8 Comp.9
## SS loadings 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000
## Proportion Var 0.111 0.111 0.111 0.111 0.111 0.111 0.111 0.111 0.111
## Cumulative Var 0.111 0.222 0.333 0.444 0.556 0.667 0.778 0.889 1.000
Si hago este análisis, quiero reducir la dimensionalidad Los cuatro primeros components principals pueden considerarse los más significativos, ya que contienen casi el 86% de los datos (0.8567534)
summary(data_pca)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## Standard deviation 1.9828553 1.2489623 1.0207403 0.9321032 0.6400533
## Proportion of Variance 0.4550596 0.1805448 0.1205915 0.1005574 0.0474153
## Cumulative Proportion 0.4550596 0.6356044 0.7561959 0.8567534 0.9041687
## Comp.6 Comp.7 Comp.8 Comp.9
## Standard deviation 0.57711577 0.50866787 0.35936288 0.32716279
## Proportion of Variance 0.03854891 0.02994711 0.01494695 0.01238837
## Cumulative Proportion 0.94271757 0.97266468 0.98761163 1.00000000
corr_matrix <- cor(data_normalized)
corr_matrix
## Red_Meat White_Meat Eggs Milk Fish
## Red_Meat 1.00000000 0.18850977 0.57532001 0.5440251 0.06491072
## White_Meat 0.18850977 1.00000000 0.60095535 0.2974816 -0.19719960
## Eggs 0.57532001 0.60095535 1.00000000 0.6130310 0.04780844
## Milk 0.54402512 0.29748163 0.61303102 1.0000000 0.16246239
## Fish 0.06491072 -0.19719960 0.04780844 0.1624624 1.00000000
## Cereals -0.50970337 -0.43941908 -0.70131040 -0.5924925 -0.51714759
## Starchy_Foods 0.15383673 0.33456770 0.41266333 0.2144917 0.43868411
## Pulses_nuts_oilseeds -0.40988882 -0.67214885 -0.59519381 -0.6238357 -0.12226043
## Fruits_Vegetables -0.06393465 -0.07329308 -0.16392249 -0.3997753 0.22948842
## Cereals Starchy_Foods Pulses_nuts_oilseeds
## Red_Meat -0.50970337 0.1538367 -0.4098888
## White_Meat -0.43941908 0.3345677 -0.6721488
## Eggs -0.70131040 0.4126633 -0.5951938
## Milk -0.59249246 0.2144917 -0.6238357
## Fish -0.51714759 0.4386841 -0.1222604
## Cereals 1.00000000 -0.5781345 0.6360595
## Starchy_Foods -0.57813449 1.0000000 -0.4951880
## Pulses_nuts_oilseeds 0.63605948 -0.4951880 1.0000000
## Fruits_Vegetables 0.04229293 0.0683567 0.3513323
## Fruits_Vegetables
## Red_Meat -0.06393465
## White_Meat -0.07329308
## Eggs -0.16392249
## Milk -0.39977527
## Fish 0.22948842
## Cereals 0.04229293
## Starchy_Foods 0.06835670
## Pulses_nuts_oilseeds 0.35133227
## Fruits_Vegetables 1.00000000
ggcorrplot::ggcorrplot(corr_matrix)
## La relación es mayor entre variables de color rojo o rosa
## Muestra cuáles son los componentes principales más significativos
### Es el gráfico de lo que presenta el summary
fviz_eig(data_pca, addlabels=TRUE)
fviz_pca_var(data_pca)
## choice=var es para escoger por variables
## axes=1:2 son los components que quiero analizar, en este caso 1 y 2
fviz_cos2(data_pca, choice="var", axes=1:2)
## choice=var es para escoger por variables
## axes=1:2 son los components que quiero analizar, en este caso 1 a 4
fviz_cos2(data_pca, choice="var", axes=1:4)
## choice=var es para escoger por variables
## axes=1:2 son los components que quiero analizar, en este caso 1 a 4
fviz_cos2(data_pca, choice="var", axes=1:9)
Suponiendo que ya se tiene el objeto data.pca con los componentes principales
# Extraer los scores de los primeros cuatro componentes
pca_scores <- data_pca$scores[, 1:4]
# Realizar K-means clustering (25 observaciones correspondientes a los países)
set.seed(123) # Para reproducibilidad
kmeans_result <- kmeans(pca_scores, centers = 3, nstart = 25)
# Añadir el cluster asignado a cada país al dataframe original
protein_data$Cluster <- as.factor(kmeans_result$cluster)
# Visualización del clustering en el espacio de los dos primeros componentes principales
plot1<-ggplot(protein_data, aes(x = pca_scores[,1], y = pca_scores[,2], color = Cluster, label = rownames(protein_data))) +
geom_point(size = 5) + # Dibuja los puntos de dispersión con un tamaño de 5
geom_text(vjust = 1.5) + # Añade los nombres de los países, con un desplazamiento vertical para que no se superpongan con los puntos
labs(title = "Clustering de Países basado en el Consumo de Proteínas",
x = "Componente Principal 1", # Etiqueta del eje X
y = "Componente Principal 2") + # Etiqueta del eje Y
theme_minimal() # Utiliza un tema minimalista para la gráfica
ggplotly(plot1)
# Seleccionar las columnas Country y Cluster
country_clusters <- protein_data%>% select(Country,Cluster)
# Obtener los datos del mapa
world <- ne_countries(scale = "medium", returnclass = "sf")
# Revisar los nombres de los países
# Nombres de países en el mapa del mundo
world_countries <- unique(world$name)
# Nombres de países en tu dataset
protein_countries <- unique(country_clusters$Country)
# Países que están en el dataset y no aparecen en el mapa del mundo
missing_in_world <- setdiff(protein_countries, world_countries)
# Imprimir resultados
print("Países en el dataset pero no en el mapa del mundo:")
## [1] "Países en el dataset pero no en el mapa del mundo:"
print(missing_in_world)
## [1] "Czechoslovakia" "East_Germany" "The_Netherlands" "United_Kingdom"
## [5] "USSR" "West_Germany" "Yugoslavia"
country_clusters <- protein_data%>% select(Country,Cluster)%>%
mutate(Country = recode(Country,
"United_Kingdom" = "United Kingdom",
"Yugoslavia" = "Serbia",
"East_Germany"= "Germany",
"West_Germany"= "Germany",
"The_Netherlands"= "Netherlands",
"Czechoslovakia"= "Czechia",
"USSR"= "Russia"))
country_clusters
## Country Cluster
## 1 Albania 3
## 2 Austria 1
## 3 Belgium 1
## 4 Bulgaria 3
## 5 Czechia 1
## 6 Denmark 1
## 7 Germany 1
## 8 Finland 1
## 9 France 1
## 10 Greece 3
## 11 Hungary 3
## 12 Ireland 1
## 13 Italy 3
## 14 Netherlands 1
## 15 Norway 1
## 16 Poland 1
## 17 Portugal 2
## 18 Romania 3
## 19 Spain 2
## 20 Sweden 1
## 21 Switzerland 1
## 22 United Kingdom 1
## 23 Russia 3
## 24 Germany 1
## 25 Serbia 3
# se puede ajustar según el caso
# Unir los clústeres con los datos del mapa
map_data <- left_join(world, country_clusters, by = c("name" = "Country"))
ggplot(data = map_data) +
geom_sf(aes(fill = Cluster)) +
scale_fill_manual(values = c("pink", "green", "skyblue"), na.value = "lightgrey") +
theme_minimal() +
labs(title = "Clustering de Países Europeos basados en el Consumo de Proteínas",
fill = "Cluster") +
theme(legend.position = "bottom") +
coord_sf(xlim = c(-25, 45), ylim = c(35, 70), expand = FALSE)
# Ajustar la vista a Europa
Supongamos que quieres predecir el consumo de Red_Meat (Carne Roja) usando las componentes principales obtenidas del PCA.
# Crear un DataFrame con las componentes principales y la variable respuesta
regression_data <- data.frame(PC1 = pca_scores[, 1],
PC2 = pca_scores[, 2],
PC3 = pca_scores[, 3],
Red_Meat = protein_data$Red_Meat)
regression_data
## PC1 PC2 PC3 Red_Meat
## 1 -3.4062175 1.43187183 1.596648133 10
## 2 1.3961709 1.07844406 -1.234558817 9
## 3 1.6271911 -0.27394175 0.009163712 14
## 4 -3.0996115 1.50333675 -0.082356700 8
## 5 0.4277883 0.57418064 -1.159335459 10
## 6 2.4422594 -0.28305004 0.676942687 11
## 7 1.4249913 -0.60782538 -1.746831101 8
## 8 1.7006498 0.58298031 1.972677332 10
## 9 1.4354297 -0.89590251 0.161539920 18
## 10 -2.3291742 -0.86546599 1.227337046 10
## 11 -1.4302687 0.95052166 -1.782611863 5
## 12 2.5809791 0.82037615 0.161750192 14
## 13 -1.5501576 -0.16192833 0.053056104 9
## 14 1.7115591 0.78012960 -0.766301047 10
## 15 0.9571511 -1.10929163 1.319851198 9
## 16 0.1285106 -0.63184836 -1.522555810 7
## 17 -1.8854364 -4.23632323 -0.235407502 6
## 18 -2.6361730 1.10164486 -0.169166371 6
## 19 -1.4042842 -2.43957843 -0.249276728 7
## 20 1.9196053 0.08881654 1.085799797 10
## 21 0.8862644 0.79798276 0.228906351 13
## 22 1.9396765 0.32877834 1.274231236 17
## 23 -0.8607657 0.15774231 0.215679913 9
## 24 1.8007758 0.34409820 -0.872728311 11
## 25 -3.7769132 0.96425165 -0.162453908 4
# Ajustar un modelo de regresión lineal usando las primeras 3 componentes principales
regression_model <- lm(Red_Meat ~ PC1 + PC2 + PC3, data = regression_data)
regression_model
##
## Call:
## lm(formula = Red_Meat ~ PC1 + PC2 + PC3, data = regression_data)
##
## Coefficients:
## (Intercept) PC1 PC2 PC3
## 9.8000 1.0573 0.2368 1.2098
# Resumen del modelo
summary(regression_model)
##
## Call:
## lm(formula = Red_Meat ~ PC1 + PC2 + PC3, data = regression_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.123 -1.069 -0.436 1.221 6.699
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.8000 0.4934 19.863 4.3e-15 ***
## PC1 1.0573 0.2488 4.249 0.000358 ***
## PC2 0.2368 0.3950 0.599 0.555329
## PC3 1.2098 0.4834 2.503 0.020650 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.467 on 21 degrees of freedom
## Multiple R-squared: 0.5403, Adjusted R-squared: 0.4746
## F-statistic: 8.227 on 3 and 21 DF, p-value: 0.0008238
plot_model <- ggplot(regression_data, aes(x = PC1, y = Red_Meat)) +
geom_point(color = "blue") +
geom_smooth(method = "lm", formula = y ~ x, color = "red") +
labs(title = "Modelo de Regresión: Red_Meat vs. PC1",
x = "Componente Principal 1 (PC1)",
y = "Consumo de Carne Roja (Red_Meat)") +
theme_minimal()
# Mostrar el gráfico
plot_model
plot_model2 <- ggplot(regression_data, aes(x = PC2, y = Red_Meat)) +
geom_point(color = "blue") +
geom_smooth(method = "lm", formula = y ~ x, color = "red") +
labs(title = "Modelo de Regresión: Red_Meat vs. PC2",
x = "Componente Principal 2 (PC2)",
y = "Consumo de Carne Roja (Red_Meat)") +
theme_minimal()
# Mostrar el gráfico
plot_model2
# Nuevo punto para predecir (valores de PC1, PC2, PC3)
nuevo_punto <- data.frame(PC1 = 0.5, PC2 = -0.3, PC3 = 0.2)
# Predicción del modelo
prediccion <- predict(regression_model, newdata = nuevo_punto)
# Mostrar la predicción
print(paste("Predicción del consumo de Red_Meat para los valores dados:", round(prediccion, 2)))
## [1] "Predicción del consumo de Red_Meat para los valores dados: 10.5"