Diferentes maneras de hacer grƔficos

Scatter Plots

library(ggplot2)
auto <- read.csv("C:/Users/LUIS 1/Desktop/MachineLearinngR/Visualizacion/auto-mpg.csv", stringsAsFactors = F)
auto$cylinders <- factor(auto$cylinders,
                         labels = c("3C", "4C", "5C", "6C", "8C"))
head(auto)
##   No mpg cylinders displacement horsepower weight acceleration model_year
## 1  1  28        4C          140         90   2264         15.5         71
## 2  2  19        3C           70         97   2330         13.5         72
## 3  3  36        4C          107         75   2205         14.5         82
## 4  4  28        4C           97         92   2288         17.0         72
## 5  5  21        6C          199         90   2648         15.0         70
## 6  6  23        4C          115         95   2694         15.0         75
##              car_name
## 1 chevrolet vega 2300
## 2     mazda rx2 coupe
## 3        honda accord
## 4     datsun 510 (sw)
## 5         amc gremlin
## 6          audi 100ls
plot <- ggplot(auto, aes(weight, mpg))

plot + geom_point()

plot + geom_point(alpha = 1/2, size = 5, 
                  aes(color = factor(cylinders))) + 
  geom_smooth(method = "lm", se = TRUE, col ="green") +
  facet_grid(cylinders~.) +
  theme_bw(base_family = "Arial", base_size = 10)+
  labs(x = "Peso") + labs(y = "Millas por Galón")+
  labs(title = "Consumo vs Peso")
## `geom_smooth()` using formula 'y ~ x'

qplot(x=weight, y = mpg, data = auto,
      geom=c("point", "smooth"), method = "lm", 
      formula = y~x, color = cylinders, 
      main = "Regresión de MPG sobre el Peso")

Line Charts

library(ggplot2)
mtcars <- read.csv("C:/Users/LUIS 1/Desktop/MachineLearinngR/Visualizacion/mtcars.csv", stringsAsFactors = F)
head(mtcars)
##                   X  mpg cyl disp  hp drat    wt  qsec vs am gear carb
## 1         Mazda RX4 21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## 2     Mazda RX4 Wag 21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## 3        Datsun 710 22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## 4    Hornet 4 Drive 21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## 5 Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## 6           Valiant 18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
plot <- ggplot(mtcars, aes(wt, mpg))
plot + geom_line()

plot + geom_line(linetype = "dashed", color = "red")

plot + geom_line(aes(color = as.factor(carb)))

Bar Graphs

bike <- read.csv("C:/Users/LUIS 1/Desktop/MachineLearinngR/Visualizacion/daily-bike-rentals.csv")
bike$season <- factor(bike$season, 
                      levels = c(1,2,3,4),
                      labels = c("Invierno", "Primavera", "Verano", "OtoƱo"))
bike$workingday <- factor(bike$workingday,
                          levels = c(0,1),
                          labels = c("DĆ­a libre", "DĆ­a de trabajo"))
bike$weathersit <- factor(bike$weathersit,
                          levels = c(1,2,3),
                          labels = c("Buen tiempo", "DĆ­a nublado", "Mal tiempo"))

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
bike.sum = bike %>%
  group_by(season, workingday) %>%
  summarize(reantal = sum(cnt))
## `summarise()` has grouped output by 'season'. You can override using the
## `.groups` argument.
bike.sum
## # A tibble: 8 x 3
## # Groups:   season [4]
##   season    workingday     reantal
##   <fct>     <fct>            <int>
## 1 Invierno  DĆ­a libre       137683
## 2 Invierno  DĆ­a de trabajo  333665
## 3 Primavera DĆ­a libre       287976
## 4 Primavera DĆ­a de trabajo  630613
## 5 Verano    DĆ­a libre       312056
## 6 Verano    DĆ­a de trabajo  749073
## 7 OtoƱo     Dƭa libre       262554
## 8 OtoƱo     Dƭa de trabajo  579059
ggplot(bike.sum, aes(x = season, y = reantal, 
                     fill = workingday, label = scales::comma(reantal))) +
  geom_bar(show.legend = T, stat = "identity", fill="lightblue", colour = "black") + 
  labs(title = "Alquileres de bicicletas por estación y día") +
  scale_y_continuous(labels = scales::comma) +
  geom_text(size = 3, position = position_stack(vjust = 0.5))

Distribution plots

library(ggplot2)

geiser <- read.csv("C:/Users/LUIS 1/Desktop/MachineLearinngR/Visualizacion/geiser.csv")
head(geiser)
##   X eruptions waiting
## 1 1     3.600      79
## 2 2     1.800      54
## 3 3     3.333      74
## 4 4     2.283      62
## 5 5     4.533      85
## 6 6     2.883      55
plot <- ggplot(geiser, aes(x=waiting))
plot + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

plot + geom_histogram(binwidth = 5, 
                      fill="white", colour = "black")

ggplot(geiser, aes(x=waiting, y = ..density..))+
  geom_histogram(fill="cornsilk", color = "grey60", size=.2)+
  geom_density() + xlim(35, 105)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(geiser, aes(x=eruptions, y = ..density..))+
  geom_histogram(fill="cornsilk", color = "grey60", size=.2)+
  geom_density() + xlim(0,7)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Mosaic Plot

library(stats)
mtcars <- read.csv("C:/Users/LUIS 1/Desktop/MachineLearinngR/Visualizacion/mtcars.csv")
head(mtcars)
##                   X  mpg cyl disp  hp drat    wt  qsec vs am gear carb
## 1         Mazda RX4 21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## 2     Mazda RX4 Wag 21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## 3        Datsun 710 22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## 4    Hornet 4 Drive 21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## 5 Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## 6           Valiant 18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
mosaicplot(~ gear + carb, data = mtcars, 
           color=2:7,  las = 1)

Tree Maps

library(treemap)

branch <- c(rep("branch-1",4), rep("branch-2",2), rep("branch-3",3))
subbranch <- paste("subbranch",c(1,2,3,4,1,2,1,2,3), sep = "-")
values = c(15,4,22,13,11,8,6,1,25)
data <- data.frame(branch, subbranch, values)

treemap(data, index = c("branch", "subbranch"),
        vSize = "values", type = "index")

posts <- read.csv("C:/Users/LUIS 1/Desktop/MachineLearinngR/Visualizacion/post-data.csv")
head(posts)
##     id  views comments               category
## 1 5019 148896       28 Artistic Visualization
## 2 1416  81374       26          Visualization
## 3 1416  81374       26               Featured
## 4 3485  80819       37               Featured
## 5 3485  80819       37                Mapping
## 6 3485  80819       37           Data Sources
treemap(posts, 
        index = c("category", "comments"),
        vSize = "views", type = "index")

Correlation Matrix

library(ggplot2)
library(corrplot)
## corrplot 0.92 loaded
mtcars <- read.csv("C:/Users/LUIS 1/Desktop/MachineLearinngR/Visualizacion/mtcars.csv")
head(mtcars)
##                   X  mpg cyl disp  hp drat    wt  qsec vs am gear carb
## 1         Mazda RX4 21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## 2     Mazda RX4 Wag 21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## 3        Datsun 710 22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## 4    Hornet 4 Drive 21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## 5 Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## 6           Valiant 18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
mtcars$X = NULL
mtcars.cor <- cor(mtcars, method = "pearson")

round(mtcars.cor, digits = 2)
##        mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
## mpg   1.00 -0.85 -0.85 -0.78  0.68 -0.87  0.42  0.66  0.60  0.48 -0.55
## cyl  -0.85  1.00  0.90  0.83 -0.70  0.78 -0.59 -0.81 -0.52 -0.49  0.53
## disp -0.85  0.90  1.00  0.79 -0.71  0.89 -0.43 -0.71 -0.59 -0.56  0.39
## hp   -0.78  0.83  0.79  1.00 -0.45  0.66 -0.71 -0.72 -0.24 -0.13  0.75
## drat  0.68 -0.70 -0.71 -0.45  1.00 -0.71  0.09  0.44  0.71  0.70 -0.09
## wt   -0.87  0.78  0.89  0.66 -0.71  1.00 -0.17 -0.55 -0.69 -0.58  0.43
## qsec  0.42 -0.59 -0.43 -0.71  0.09 -0.17  1.00  0.74 -0.23 -0.21 -0.66
## vs    0.66 -0.81 -0.71 -0.72  0.44 -0.55  0.74  1.00  0.17  0.21 -0.57
## am    0.60 -0.52 -0.59 -0.24  0.71 -0.69 -0.23  0.17  1.00  0.79  0.06
## gear  0.48 -0.49 -0.56 -0.13  0.70 -0.58 -0.21  0.21  0.79  1.00  0.27
## carb -0.55  0.53  0.39  0.75 -0.09  0.43 -0.66 -0.57  0.06  0.27  1.00
corrplot(mtcars.cor)

corrplot(mtcars.cor, method = "shade", 
         shade.col = NA, tl.col = "black", 
         tl.srt = 45)

col <- colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", 
                          "#77AADD", "#4477AA"))
corrplot(mtcars.cor, method = "square", 
         tl.col = "black",
         tl.srt = 45, col = col(200), 
         addCoef.col = "black",
         order = "AOE", 
         type = "upper",
         diag = F,
         addshade = "all")

library(reshape2)
mtcars.melted <- melt(mtcars.cor)
head(mtcars.cor)
##             mpg        cyl       disp         hp       drat         wt
## mpg   1.0000000 -0.8521620 -0.8475514 -0.7761684  0.6811719 -0.8676594
## cyl  -0.8521620  1.0000000  0.9020329  0.8324475 -0.6999381  0.7824958
## disp -0.8475514  0.9020329  1.0000000  0.7909486 -0.7102139  0.8879799
## hp   -0.7761684  0.8324475  0.7909486  1.0000000 -0.4487591  0.6587479
## drat  0.6811719 -0.6999381 -0.7102139 -0.4487591  1.0000000 -0.7124406
## wt   -0.8676594  0.7824958  0.8879799  0.6587479 -0.7124406  1.0000000
##             qsec         vs         am       gear       carb
## mpg   0.41868403  0.6640389  0.5998324  0.4802848 -0.5509251
## cyl  -0.59124207 -0.8108118 -0.5226070 -0.4926866  0.5269883
## disp -0.43369788 -0.7104159 -0.5912270 -0.5555692  0.3949769
## hp   -0.70822339 -0.7230967 -0.2432043 -0.1257043  0.7498125
## drat  0.09120476  0.4402785  0.7127111  0.6996101 -0.0907898
## wt   -0.17471588 -0.5549157 -0.6924953 -0.5832870  0.4276059
head(mtcars.melted)
##   Var1 Var2      value
## 1  mpg  mpg  1.0000000
## 2  cyl  mpg -0.8521620
## 3 disp  mpg -0.8475514
## 4   hp  mpg -0.7761684
## 5 drat  mpg  0.6811719
## 6   wt  mpg -0.8676594
ggplot(data = mtcars.melted, 
       aes(x=Var1, y=Var2,fill=value))+
  geom_tile()

get_lower_triangle <- function(cormat){
  cormat[upper.tri(cormat)] <- NA
  return(cormat)
}

get_upper_triangle <- function(cormat){
  cormat[lower.tri(cormat)] <- NA
  return(cormat)
}

reorder_cormat <- function(cormat){
  dd <- as.dist((1-cormat)/2)
  hc <- hclust(dd)
  cormat <- cormat[hc$order, hc$order]
}



cormat <- reorder_cormat(mtcars.cor)
cormat.ut <- get_upper_triangle(cormat)
cormat.ut.melted <- melt(cormat.ut, na.rm = T)

ggplot(cormat.ut.melted, aes(Var2, Var1, fill = value)) + 
  geom_tile(color = "white")+
  scale_fill_gradient2(low = "blue", high = "red",
                       mid = "white", midpoint = 0,
                       limit = c(-1,1), space = "Lab",
                       name="Pearson\nCorrelation")+
  theme_minimal()+
  theme(axis.text.x = element_text(angle = 45, vjust=1, 
                                   size = 12, hjust=1))+
  coord_fixed()

Redes

library(igraph)
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
g.dir <- graph(edges = c(1,2, 2,3, 2,4, 1,4, 5,5, 3,6, 5,6), n=6)
g.n_dir <- graph(edges = c(1,2, 2,3, 2,4, 1,4,
                           5,5, 3,6, 5,6), n=6, directed = F)

plot(g.dir)

plot(g.n_dir)

g_isolated <- graph(edges = c("Juan", "MarĆ­a", 
                      "MarĆ­a", "Ana",
                      "Ana", "Juan", 
                      "JosƩ", "Marƭa",
                      "Pedro", "JosƩ",
                      "Joel", "Pedro"),
                    isolates = c( "Carmen", "Antonio", 
                                  "Mario", "Vicente"))

par(mfrow = c(1,1))
plot(g_isolated, edge.arrow.size = 1,
     vertex.color="gold",vertex.size = 15,
     vertex.frame.color = "gray", 
     vertex.label.color = "black",
     vertex.label.cex = 0.8,
     vertex.label.dist = 2,
     edge.curved=0.2)

labels and legends

#ggtitle(titulo)
#xlab(eje x)
#ylab(eje y)
#labs(....)

library(ggplot2)
tooth <- read.csv("C:/Users/LUIS 1/Desktop/MachineLearinngR/Visualizacion/ToothGrowth.csv")
head(tooth)
##   X  len supp dose
## 1 1  4.2   VC  0.5
## 2 2 11.5   VC  0.5
## 3 3  7.3   VC  0.5
## 4 4  5.8   VC  0.5
## 5 5  6.4   VC  0.5
## 6 6 10.0   VC  0.5
# box plot
ggplot(tooth, aes(x=dose, y=len, fill = as.factor(dose)))+
  geom_boxplot() +
  ggtitle("Crecimiento dental en función de una dosis (mg/día) de vitamina C")+
  xlab("Dosis de vitamina C (mg/dĆ­a)") +
  ylab("Crecimiento dental (en mm)") +
  labs(fill = "Dosis en mg/dĆ­a") +
  theme(legend.position = "bottom") +
  guides(fill = F)

ggplot(tooth, aes(x=dose, y = len))+
  geom_boxplot()+
  theme_bw()+
  #theme_dark()+
  #theme_classic()+
  #theme_grey()+
  #theme(plot.background = element_rect(fill="darkblue"))
  theme(axis.text.x = element_text(face = "bold", 
                                   family = "Times",
                                   size = 14,
                                   angle = 45,
                                   color = "#995566"),
        axis.text.y = element_text(face = "italic",
                                   family = "Courier",
                                   size = 16,
                                   angle = 30, 
                                   color = "#449955")
        )+
  theme(panel.border = element_blank())+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

Plot Multivariante

library(ggplot2)
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
bike <- read.csv("C:/Users/LUIS 1/Desktop/MachineLearinngR/Visualizacion/daily-bike-rentals.csv")
head(bike)
##   instant     dteday season yr mnth holiday weekday workingday weathersit
## 1       1 2011-01-01      1  0    1       0       6          0          2
## 2       2 2011-01-02      1  0    1       0       0          0          2
## 3       3 2011-01-03      1  0    1       0       1          1          1
## 4       4 2011-01-04      1  0    1       0       2          1          1
## 5       5 2011-01-05      1  0    1       0       3          1          1
## 6       6 2011-01-06      1  0    1       0       4          1          1
##       temp    atemp      hum windspeed casual registered  cnt
## 1 0.344167 0.363625 0.805833 0.1604460    331        654  985
## 2 0.363478 0.353739 0.696087 0.2485390    131        670  801
## 3 0.196364 0.189405 0.437273 0.2483090    120       1229 1349
## 4 0.200000 0.212122 0.590435 0.1602960    108       1454 1562
## 5 0.226957 0.229270 0.436957 0.1869000     82       1518 1600
## 6 0.204348 0.233209 0.518261 0.0895652     88       1518 1606
bike$season = factor(bike$season,
                     levels = c(1,2,3,4),
                     labels = c("Invierno", "Primavera", "Verano", "OtoƱo"))

bike$weathersit <- factor(bike$weathersit,
                          levels = c(1,2,3),
                          labels = c("Despejado", "Nublado", "Lluvia"))

bike$weekday <- factor(bike$weekday, 
                       levels = 0:6,
                       labels = c("D", "L", "M", "X", "J", "V", "S"))

hist(bike$windspeed)

bike$windspeed.fac <- cut(bike$windspeed, breaks = 3,
                          labels = c("Poco", "Medio", "Elevado"))

head(bike)
##   instant     dteday   season yr mnth holiday weekday workingday weathersit
## 1       1 2011-01-01 Invierno  0    1       0       S          0    Nublado
## 2       2 2011-01-02 Invierno  0    1       0       D          0    Nublado
## 3       3 2011-01-03 Invierno  0    1       0       L          1  Despejado
## 4       4 2011-01-04 Invierno  0    1       0       M          1  Despejado
## 5       5 2011-01-05 Invierno  0    1       0       X          1  Despejado
## 6       6 2011-01-06 Invierno  0    1       0       J          1  Despejado
##       temp    atemp      hum windspeed casual registered  cnt windspeed.fac
## 1 0.344167 0.363625 0.805833 0.1604460    331        654  985          Poco
## 2 0.363478 0.353739 0.696087 0.2485390    131        670  801         Medio
## 3 0.196364 0.189405 0.437273 0.2483090    120       1229 1349         Medio
## 4 0.200000 0.212122 0.590435 0.1602960    108       1454 1562          Poco
## 5 0.226957 0.229270 0.436957 0.1869000     82       1518 1600         Medio
## 6 0.204348 0.233209 0.518261 0.0895652     88       1518 1606          Poco
ggplot(bike, aes(x=temp, y = cnt))+
  geom_point(size=3, aes(color=windspeed.fac))+
  theme(legend.position = "bottom")+
  geom_smooth(method="lm", se=F, col="red")+
  facet_grid(weekday ~ season)
## `geom_smooth()` using formula 'y ~ x'

auto <- read.csv("C:/Users/LUIS 1/Desktop/MachineLearinngR/Visualizacion/auto-mpg.csv", stringsAsFactors = F)
auto$cylinders <- factor(auto$cylinders,
                         labels = c("3C", "4C", "5C", "6C", "8C"))

#postscript(file="multivariant.ps")
#pdf(file="multivariant.pdf")
png(file="C:/Users/LUIS 1/Desktop/MachineLearinngR/Visualizacion/multivariant.png", width = 3000, height = 3000, res = 72)
ggpairs(auto[,2:7], 
        aes(colour = cylinders, 
            alpha = 0.4),
        title = "AnƔlisis multivariante de coches",
        upper = list(continuous = "density"),
        lower = list(combo = "denstrip"))+
  theme(plot.title = element_text(hjust = 0.5))  
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
dev.off()
## png 
##   2

GrƔficos en 3D

library(plot3D)
## Warning: package 'plot3D' was built under R version 4.1.3
mtcars <- read.csv("C:/Users/LUIS 1/Desktop/MachineLearinngR/Visualizacion/mtcars.csv")

rownames(mtcars) = mtcars$X
mtcars$X = NULL
head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
scatter3D(x=mtcars$disp,
          y=mtcars$wt,
          z=mtcars$mpg,
          clab = c("Millas/Galón"),
          pch=19,
          cex = 0.5,
          theta = 18, #azimutal
          phi=20, # colatitud
          main="Coches de los 70'",
          xlab = "Desplazamiento (cu.in.)",
          ylab = "Peso (x1000lb)",
          zlab = "Millas por galón",
          bty = "g")

text3D(x=mtcars$disp,
       y=mtcars$wt,
       z=mtcars$mpg,
       labels = rownames(mtcars),
       add = T, 
       colkey = F, 
       cex = 0.5)

data(VADeaths)
head(VADeaths)
##       Rural Male Rural Female Urban Male Urban Female
## 50-54       11.7          8.7       15.4          8.4
## 55-59       18.1         11.7       24.3         13.6
## 60-64       26.9         20.3       37.0         19.3
## 65-69       41.0         30.9       54.6         35.1
## 70-74       66.0         54.3       71.1         50.0
hist3D(z = VADeaths, 
       scale = F, expand = 0.01,
       bty = "g", phi = 30,
       col = "#1188CC", border = "black",
       shade = 0.2, ltheta = 80,
       space = 0.3,
       ticktype = "detailed")

scatter3D(x = mtcars$disp,
          y = mtcars$wt,
          z = mtcars$mpg,
          type = "h")