Diferentes maneras de hacer grƔficos
Scatter Plots
library(ggplot2)
auto <- read.csv("C:/Users/LUIS 1/Desktop/MachineLearinngR/Visualizacion/auto-mpg.csv", stringsAsFactors = F)
auto$cylinders <- factor(auto$cylinders,
labels = c("3C", "4C", "5C", "6C", "8C"))
head(auto)
## No mpg cylinders displacement horsepower weight acceleration model_year
## 1 1 28 4C 140 90 2264 15.5 71
## 2 2 19 3C 70 97 2330 13.5 72
## 3 3 36 4C 107 75 2205 14.5 82
## 4 4 28 4C 97 92 2288 17.0 72
## 5 5 21 6C 199 90 2648 15.0 70
## 6 6 23 4C 115 95 2694 15.0 75
## car_name
## 1 chevrolet vega 2300
## 2 mazda rx2 coupe
## 3 honda accord
## 4 datsun 510 (sw)
## 5 amc gremlin
## 6 audi 100ls
plot <- ggplot(auto, aes(weight, mpg))
plot + geom_point()

plot + geom_point(alpha = 1/2, size = 5,
aes(color = factor(cylinders))) +
geom_smooth(method = "lm", se = TRUE, col ="green") +
facet_grid(cylinders~.) +
theme_bw(base_family = "Arial", base_size = 10)+
labs(x = "Peso") + labs(y = "Millas por Galón")+
labs(title = "Consumo vs Peso")
## `geom_smooth()` using formula 'y ~ x'

qplot(x=weight, y = mpg, data = auto,
geom=c("point", "smooth"), method = "lm",
formula = y~x, color = cylinders,
main = "Regresión de MPG sobre el Peso")

Line Charts
library(ggplot2)
mtcars <- read.csv("C:/Users/LUIS 1/Desktop/MachineLearinngR/Visualizacion/mtcars.csv", stringsAsFactors = F)
head(mtcars)
## X mpg cyl disp hp drat wt qsec vs am gear carb
## 1 Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## 2 Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## 3 Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## 4 Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## 5 Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## 6 Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
plot <- ggplot(mtcars, aes(wt, mpg))
plot + geom_line()

plot + geom_line(linetype = "dashed", color = "red")

plot + geom_line(aes(color = as.factor(carb)))

Bar Graphs
bike <- read.csv("C:/Users/LUIS 1/Desktop/MachineLearinngR/Visualizacion/daily-bike-rentals.csv")
bike$season <- factor(bike$season,
levels = c(1,2,3,4),
labels = c("Invierno", "Primavera", "Verano", "OtoƱo"))
bike$workingday <- factor(bike$workingday,
levels = c(0,1),
labels = c("DĆa libre", "DĆa de trabajo"))
bike$weathersit <- factor(bike$weathersit,
levels = c(1,2,3),
labels = c("Buen tiempo", "DĆa nublado", "Mal tiempo"))
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
bike.sum = bike %>%
group_by(season, workingday) %>%
summarize(reantal = sum(cnt))
## `summarise()` has grouped output by 'season'. You can override using the
## `.groups` argument.
bike.sum
## # A tibble: 8 x 3
## # Groups: season [4]
## season workingday reantal
## <fct> <fct> <int>
## 1 Invierno DĆa libre 137683
## 2 Invierno DĆa de trabajo 333665
## 3 Primavera DĆa libre 287976
## 4 Primavera DĆa de trabajo 630613
## 5 Verano DĆa libre 312056
## 6 Verano DĆa de trabajo 749073
## 7 OtoƱo DĆa libre 262554
## 8 OtoƱo DĆa de trabajo 579059
ggplot(bike.sum, aes(x = season, y = reantal,
fill = workingday, label = scales::comma(reantal))) +
geom_bar(show.legend = T, stat = "identity", fill="lightblue", colour = "black") +
labs(title = "Alquileres de bicicletas por estación y dĆa") +
scale_y_continuous(labels = scales::comma) +
geom_text(size = 3, position = position_stack(vjust = 0.5))

Distribution plots
library(ggplot2)
geiser <- read.csv("C:/Users/LUIS 1/Desktop/MachineLearinngR/Visualizacion/geiser.csv")
head(geiser)
## X eruptions waiting
## 1 1 3.600 79
## 2 2 1.800 54
## 3 3 3.333 74
## 4 4 2.283 62
## 5 5 4.533 85
## 6 6 2.883 55
plot <- ggplot(geiser, aes(x=waiting))
plot + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

plot + geom_histogram(binwidth = 5,
fill="white", colour = "black")

ggplot(geiser, aes(x=waiting, y = ..density..))+
geom_histogram(fill="cornsilk", color = "grey60", size=.2)+
geom_density() + xlim(35, 105)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(geiser, aes(x=eruptions, y = ..density..))+
geom_histogram(fill="cornsilk", color = "grey60", size=.2)+
geom_density() + xlim(0,7)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Mosaic Plot
library(stats)
mtcars <- read.csv("C:/Users/LUIS 1/Desktop/MachineLearinngR/Visualizacion/mtcars.csv")
head(mtcars)
## X mpg cyl disp hp drat wt qsec vs am gear carb
## 1 Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## 2 Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## 3 Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## 4 Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## 5 Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## 6 Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
mosaicplot(~ gear + carb, data = mtcars,
color=2:7, las = 1)

Tree Maps
library(treemap)
branch <- c(rep("branch-1",4), rep("branch-2",2), rep("branch-3",3))
subbranch <- paste("subbranch",c(1,2,3,4,1,2,1,2,3), sep = "-")
values = c(15,4,22,13,11,8,6,1,25)
data <- data.frame(branch, subbranch, values)
treemap(data, index = c("branch", "subbranch"),
vSize = "values", type = "index")

posts <- read.csv("C:/Users/LUIS 1/Desktop/MachineLearinngR/Visualizacion/post-data.csv")
head(posts)
## id views comments category
## 1 5019 148896 28 Artistic Visualization
## 2 1416 81374 26 Visualization
## 3 1416 81374 26 Featured
## 4 3485 80819 37 Featured
## 5 3485 80819 37 Mapping
## 6 3485 80819 37 Data Sources
treemap(posts,
index = c("category", "comments"),
vSize = "views", type = "index")

Correlation Matrix
library(ggplot2)
library(corrplot)
## corrplot 0.92 loaded
mtcars <- read.csv("C:/Users/LUIS 1/Desktop/MachineLearinngR/Visualizacion/mtcars.csv")
head(mtcars)
## X mpg cyl disp hp drat wt qsec vs am gear carb
## 1 Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## 2 Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## 3 Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## 4 Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## 5 Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## 6 Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
mtcars$X = NULL
mtcars.cor <- cor(mtcars, method = "pearson")
round(mtcars.cor, digits = 2)
## mpg cyl disp hp drat wt qsec vs am gear carb
## mpg 1.00 -0.85 -0.85 -0.78 0.68 -0.87 0.42 0.66 0.60 0.48 -0.55
## cyl -0.85 1.00 0.90 0.83 -0.70 0.78 -0.59 -0.81 -0.52 -0.49 0.53
## disp -0.85 0.90 1.00 0.79 -0.71 0.89 -0.43 -0.71 -0.59 -0.56 0.39
## hp -0.78 0.83 0.79 1.00 -0.45 0.66 -0.71 -0.72 -0.24 -0.13 0.75
## drat 0.68 -0.70 -0.71 -0.45 1.00 -0.71 0.09 0.44 0.71 0.70 -0.09
## wt -0.87 0.78 0.89 0.66 -0.71 1.00 -0.17 -0.55 -0.69 -0.58 0.43
## qsec 0.42 -0.59 -0.43 -0.71 0.09 -0.17 1.00 0.74 -0.23 -0.21 -0.66
## vs 0.66 -0.81 -0.71 -0.72 0.44 -0.55 0.74 1.00 0.17 0.21 -0.57
## am 0.60 -0.52 -0.59 -0.24 0.71 -0.69 -0.23 0.17 1.00 0.79 0.06
## gear 0.48 -0.49 -0.56 -0.13 0.70 -0.58 -0.21 0.21 0.79 1.00 0.27
## carb -0.55 0.53 0.39 0.75 -0.09 0.43 -0.66 -0.57 0.06 0.27 1.00
corrplot(mtcars.cor)

corrplot(mtcars.cor, method = "shade",
shade.col = NA, tl.col = "black",
tl.srt = 45)

col <- colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF",
"#77AADD", "#4477AA"))
corrplot(mtcars.cor, method = "square",
tl.col = "black",
tl.srt = 45, col = col(200),
addCoef.col = "black",
order = "AOE",
type = "upper",
diag = F,
addshade = "all")

library(reshape2)
mtcars.melted <- melt(mtcars.cor)
head(mtcars.cor)
## mpg cyl disp hp drat wt
## mpg 1.0000000 -0.8521620 -0.8475514 -0.7761684 0.6811719 -0.8676594
## cyl -0.8521620 1.0000000 0.9020329 0.8324475 -0.6999381 0.7824958
## disp -0.8475514 0.9020329 1.0000000 0.7909486 -0.7102139 0.8879799
## hp -0.7761684 0.8324475 0.7909486 1.0000000 -0.4487591 0.6587479
## drat 0.6811719 -0.6999381 -0.7102139 -0.4487591 1.0000000 -0.7124406
## wt -0.8676594 0.7824958 0.8879799 0.6587479 -0.7124406 1.0000000
## qsec vs am gear carb
## mpg 0.41868403 0.6640389 0.5998324 0.4802848 -0.5509251
## cyl -0.59124207 -0.8108118 -0.5226070 -0.4926866 0.5269883
## disp -0.43369788 -0.7104159 -0.5912270 -0.5555692 0.3949769
## hp -0.70822339 -0.7230967 -0.2432043 -0.1257043 0.7498125
## drat 0.09120476 0.4402785 0.7127111 0.6996101 -0.0907898
## wt -0.17471588 -0.5549157 -0.6924953 -0.5832870 0.4276059
head(mtcars.melted)
## Var1 Var2 value
## 1 mpg mpg 1.0000000
## 2 cyl mpg -0.8521620
## 3 disp mpg -0.8475514
## 4 hp mpg -0.7761684
## 5 drat mpg 0.6811719
## 6 wt mpg -0.8676594
ggplot(data = mtcars.melted,
aes(x=Var1, y=Var2,fill=value))+
geom_tile()

get_lower_triangle <- function(cormat){
cormat[upper.tri(cormat)] <- NA
return(cormat)
}
get_upper_triangle <- function(cormat){
cormat[lower.tri(cormat)] <- NA
return(cormat)
}
reorder_cormat <- function(cormat){
dd <- as.dist((1-cormat)/2)
hc <- hclust(dd)
cormat <- cormat[hc$order, hc$order]
}
cormat <- reorder_cormat(mtcars.cor)
cormat.ut <- get_upper_triangle(cormat)
cormat.ut.melted <- melt(cormat.ut, na.rm = T)
ggplot(cormat.ut.melted, aes(Var2, Var1, fill = value)) +
geom_tile(color = "white")+
scale_fill_gradient2(low = "blue", high = "red",
mid = "white", midpoint = 0,
limit = c(-1,1), space = "Lab",
name="Pearson\nCorrelation")+
theme_minimal()+
theme(axis.text.x = element_text(angle = 45, vjust=1,
size = 12, hjust=1))+
coord_fixed()

Redes
library(igraph)
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
g.dir <- graph(edges = c(1,2, 2,3, 2,4, 1,4, 5,5, 3,6, 5,6), n=6)
g.n_dir <- graph(edges = c(1,2, 2,3, 2,4, 1,4,
5,5, 3,6, 5,6), n=6, directed = F)
plot(g.dir)

plot(g.n_dir)

g_isolated <- graph(edges = c("Juan", "MarĆa",
"MarĆa", "Ana",
"Ana", "Juan",
"JosĆ©", "MarĆa",
"Pedro", "JosƩ",
"Joel", "Pedro"),
isolates = c( "Carmen", "Antonio",
"Mario", "Vicente"))
par(mfrow = c(1,1))
plot(g_isolated, edge.arrow.size = 1,
vertex.color="gold",vertex.size = 15,
vertex.frame.color = "gray",
vertex.label.color = "black",
vertex.label.cex = 0.8,
vertex.label.dist = 2,
edge.curved=0.2)

labels and legends
#ggtitle(titulo)
#xlab(eje x)
#ylab(eje y)
#labs(....)
library(ggplot2)
tooth <- read.csv("C:/Users/LUIS 1/Desktop/MachineLearinngR/Visualizacion/ToothGrowth.csv")
head(tooth)
## X len supp dose
## 1 1 4.2 VC 0.5
## 2 2 11.5 VC 0.5
## 3 3 7.3 VC 0.5
## 4 4 5.8 VC 0.5
## 5 5 6.4 VC 0.5
## 6 6 10.0 VC 0.5
# box plot
ggplot(tooth, aes(x=dose, y=len, fill = as.factor(dose)))+
geom_boxplot() +
ggtitle("Crecimiento dental en función de una dosis (mg/dĆa) de vitamina C")+
xlab("Dosis de vitamina C (mg/dĆa)") +
ylab("Crecimiento dental (en mm)") +
labs(fill = "Dosis en mg/dĆa") +
theme(legend.position = "bottom") +
guides(fill = F)

ggplot(tooth, aes(x=dose, y = len))+
geom_boxplot()+
theme_bw()+
#theme_dark()+
#theme_classic()+
#theme_grey()+
#theme(plot.background = element_rect(fill="darkblue"))
theme(axis.text.x = element_text(face = "bold",
family = "Times",
size = 14,
angle = 45,
color = "#995566"),
axis.text.y = element_text(face = "italic",
family = "Courier",
size = 16,
angle = 30,
color = "#449955")
)+
theme(panel.border = element_blank())+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())

Plot Multivariante
library(ggplot2)
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
bike <- read.csv("C:/Users/LUIS 1/Desktop/MachineLearinngR/Visualizacion/daily-bike-rentals.csv")
head(bike)
## instant dteday season yr mnth holiday weekday workingday weathersit
## 1 1 2011-01-01 1 0 1 0 6 0 2
## 2 2 2011-01-02 1 0 1 0 0 0 2
## 3 3 2011-01-03 1 0 1 0 1 1 1
## 4 4 2011-01-04 1 0 1 0 2 1 1
## 5 5 2011-01-05 1 0 1 0 3 1 1
## 6 6 2011-01-06 1 0 1 0 4 1 1
## temp atemp hum windspeed casual registered cnt
## 1 0.344167 0.363625 0.805833 0.1604460 331 654 985
## 2 0.363478 0.353739 0.696087 0.2485390 131 670 801
## 3 0.196364 0.189405 0.437273 0.2483090 120 1229 1349
## 4 0.200000 0.212122 0.590435 0.1602960 108 1454 1562
## 5 0.226957 0.229270 0.436957 0.1869000 82 1518 1600
## 6 0.204348 0.233209 0.518261 0.0895652 88 1518 1606
bike$season = factor(bike$season,
levels = c(1,2,3,4),
labels = c("Invierno", "Primavera", "Verano", "OtoƱo"))
bike$weathersit <- factor(bike$weathersit,
levels = c(1,2,3),
labels = c("Despejado", "Nublado", "Lluvia"))
bike$weekday <- factor(bike$weekday,
levels = 0:6,
labels = c("D", "L", "M", "X", "J", "V", "S"))
hist(bike$windspeed)

bike$windspeed.fac <- cut(bike$windspeed, breaks = 3,
labels = c("Poco", "Medio", "Elevado"))
head(bike)
## instant dteday season yr mnth holiday weekday workingday weathersit
## 1 1 2011-01-01 Invierno 0 1 0 S 0 Nublado
## 2 2 2011-01-02 Invierno 0 1 0 D 0 Nublado
## 3 3 2011-01-03 Invierno 0 1 0 L 1 Despejado
## 4 4 2011-01-04 Invierno 0 1 0 M 1 Despejado
## 5 5 2011-01-05 Invierno 0 1 0 X 1 Despejado
## 6 6 2011-01-06 Invierno 0 1 0 J 1 Despejado
## temp atemp hum windspeed casual registered cnt windspeed.fac
## 1 0.344167 0.363625 0.805833 0.1604460 331 654 985 Poco
## 2 0.363478 0.353739 0.696087 0.2485390 131 670 801 Medio
## 3 0.196364 0.189405 0.437273 0.2483090 120 1229 1349 Medio
## 4 0.200000 0.212122 0.590435 0.1602960 108 1454 1562 Poco
## 5 0.226957 0.229270 0.436957 0.1869000 82 1518 1600 Medio
## 6 0.204348 0.233209 0.518261 0.0895652 88 1518 1606 Poco
ggplot(bike, aes(x=temp, y = cnt))+
geom_point(size=3, aes(color=windspeed.fac))+
theme(legend.position = "bottom")+
geom_smooth(method="lm", se=F, col="red")+
facet_grid(weekday ~ season)
## `geom_smooth()` using formula 'y ~ x'

auto <- read.csv("C:/Users/LUIS 1/Desktop/MachineLearinngR/Visualizacion/auto-mpg.csv", stringsAsFactors = F)
auto$cylinders <- factor(auto$cylinders,
labels = c("3C", "4C", "5C", "6C", "8C"))
#postscript(file="multivariant.ps")
#pdf(file="multivariant.pdf")
png(file="C:/Users/LUIS 1/Desktop/MachineLearinngR/Visualizacion/multivariant.png", width = 3000, height = 3000, res = 72)
ggpairs(auto[,2:7],
aes(colour = cylinders,
alpha = 0.4),
title = "AnƔlisis multivariante de coches",
upper = list(continuous = "density"),
lower = list(combo = "denstrip"))+
theme(plot.title = element_text(hjust = 0.5))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
dev.off()
## png
## 2
GrƔficos en 3D
library(plot3D)
## Warning: package 'plot3D' was built under R version 4.1.3
mtcars <- read.csv("C:/Users/LUIS 1/Desktop/MachineLearinngR/Visualizacion/mtcars.csv")
rownames(mtcars) = mtcars$X
mtcars$X = NULL
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
scatter3D(x=mtcars$disp,
y=mtcars$wt,
z=mtcars$mpg,
clab = c("Millas/Galón"),
pch=19,
cex = 0.5,
theta = 18, #azimutal
phi=20, # colatitud
main="Coches de los 70'",
xlab = "Desplazamiento (cu.in.)",
ylab = "Peso (x1000lb)",
zlab = "Millas por galón",
bty = "g")
text3D(x=mtcars$disp,
y=mtcars$wt,
z=mtcars$mpg,
labels = rownames(mtcars),
add = T,
colkey = F,
cex = 0.5)

data(VADeaths)
head(VADeaths)
## Rural Male Rural Female Urban Male Urban Female
## 50-54 11.7 8.7 15.4 8.4
## 55-59 18.1 11.7 24.3 13.6
## 60-64 26.9 20.3 37.0 19.3
## 65-69 41.0 30.9 54.6 35.1
## 70-74 66.0 54.3 71.1 50.0
hist3D(z = VADeaths,
scale = F, expand = 0.01,
bty = "g", phi = 30,
col = "#1188CC", border = "black",
shade = 0.2, ltheta = 80,
space = 0.3,
ticktype = "detailed")

scatter3D(x = mtcars$disp,
y = mtcars$wt,
z = mtcars$mpg,
type = "h")
