Primero la Sección de Librerías de Funciones:
```{r INSTALACION LIBRERIAS }
Error: attempt to use zero-length variable name
Llamada a LIBRERIAS:
# Adicionales Octubre 2021:
library(sqldf) # Para SQL en R
Loading required package: gsubfn
Loading required package: proto
Loading required package: RSQLite
A partir de aquí la Sección de Importación de Datasets:
```{r DATA }
Error: attempt to use zero-length variable name
REVISION RAPIDA DEL DATAFRAME:
#View(database)
summary(database) # Summary Estadístico.
name year length_min genre average_rating cost_millions foreign age_restriction
Length:30 Min. :1936 Min. : 81.00 Length:30 Min. :5.200 Min. : 0.400 Min. :0.0 Min. : 0.00
Class :character 1st Qu.:1988 1st Qu.: 99.25 Class :character 1st Qu.:7.925 1st Qu.: 3.525 1st Qu.:0.0 1st Qu.:12.00
Mode :character Median :1998 Median :110.50 Mode :character Median :8.300 Median : 13.000 Median :0.0 Median :14.00
Mean :1996 Mean :116.80 Mean :8.103 Mean : 22.300 Mean :0.4 Mean :12.93
3rd Qu.:2008 3rd Qu.:124.25 3rd Qu.:8.500 3rd Qu.: 25.000 3rd Qu.:1.0 3rd Qu.:16.00
Max. :2015 Max. :179.00 Max. :9.300 Max. :165.000 Max. :1.0 Max. :18.00
head(database) # Primeros 6.
names(database) # Names de columnas.
[1] "name" "year" "length_min" "genre" "average_rating" "cost_millions" "foreign" "age_restriction"
print(is.data.frame(database))
[1] TRUE
#attach(database) #only if there is only 1 dataset
# CONTENIDO DE TABLA:
# database es la tabla con datos de películas.
ANALIZAMOS LA ESTRCUTURA DE LA TABLA:
Función str: structure Lab08-importingData
# Prints out the structure of your table.
str(database) # es la función structure
tibble [30 x 8] (S3: tbl_df/tbl/data.frame)
$ name : chr [1:30] "Toy Story" "Akira" "The Breakfast Club" "The Artist" ...
$ year : num [1:30] 1995 1998 1985 2011 1936 ...
$ length_min : num [1:30] 81 125 97 100 87 139 130 119 121 122 ...
$ genre : chr [1:30] "Animation" "Animation" "Drama" "Romance" ...
$ average_rating : num [1:30] 8.3 8.1 7.9 8 8.6 8.9 8.7 7.9 8.7 8.4 ...
$ cost_millions : num [1:30] 30 10.4 1 15 1.5 63 3.3 25 11 15 ...
$ foreign : num [1:30] 0 1 0 1 0 0 1 0 0 0 ...
$ age_restriction: num [1:30] 0 14 14 12 10 18 18 14 10 14 ...
A partir de aquí inicia el Cuerpo del Script:
EJERCICIO FUNCIONES PROPIAS EN R
# UDF - User Defined Functions
sum( 111 , 222 )
[1] 333
prod( 111 , 222 )
[1] 24642
log( 8 , 2 )
[1] 3
# library(ggplot2)
# ggplot()
#Incremento Porcentual
(
Importe <- 256
)
[1] 256
(
Impuesto <- 3.2/100
)
[1] 0.032
(
Precio.de.Venta <- Importe * ( 1+Impuesto )
)
[1] 264.192
(
Variación <- Precio.de.Venta/Importe - 1
)
[1] 0.032
# Function Increase_percentaje
Increase_percentaje <- function( Importe , Prc_Impuesto ){
Precio.de.Venta <- Importe * ( 1+Prc_Impuesto/100 )
Precio.de.Venta <- round( Precio.de.Venta , 2 )
print( paste( "Increasing ", Importe ,
" en ", Prc_Impuesto ,
"% resulta en: ", Precio.de.Venta , " USD.",
sep = "" ))
return(Precio.de.Venta)
}
#Llamar a la function
Precio.de.Venta.1 <- Increase_percentaje( 256 , 3.2 )
[1] "Increasing 256 en 3.2% resulta en: 264.19 USD."
Precio.de.Venta.1
[1] 264.19
OTRA FUNCION:
# Function Increase_percentaje2
Increase_percentaje2 <- function( Importe , Prc_Impuesto ){
Precio.de.Venta <- Importe * ( 1+Prc_Impuesto/100 )
Precio.de.Venta <- round( Precio.de.Venta , 2 )
if( Importe <= 0 ){
print("Error. Importe <= 0.")
return(NULL)
}else if( Prc_Impuesto <= 0 ){
print("Error. Prc_Impuesto <= 0.")
return(NULL)
}else{
print( paste( "Increasing ", Importe ,
" en ", Prc_Impuesto ,
"% resulta en: ", Precio.de.Venta , " USD.",
sep = "" ))
return(Precio.de.Venta)
}
}
#Llamar a la function
Precio.de.Venta.1 <- Increase_percentaje2( 256 , 3.2 )
[1] "Increasing 256 en 3.2% resulta en: 264.19 USD."
str(Precio.de.Venta.1)
num 264
#Precio.de.Venta.1
Precio.de.Venta.2 <- Increase_percentaje2( -256 , 3.2 )
[1] "Error. Importe <= 0."
str(Precio.de.Venta.2)
NULL
#Precio.de.Venta.2
Precio.de.Venta.3 <- Increase_percentaje2( 256 , -3.2 )
[1] "Error. Prc_Impuesto <= 0."
str(Precio.de.Venta.3)
NULL
#Precio.de.Venta.2
EJEMPLO BUCLE FOR:
for( i in 1:5){
print( i^2 )
}
[1] 1
[1] 4
[1] 9
[1] 16
[1] 25
Vector.1a5 <- c(1:5)
#Vector.1a5
Vector.1a5^2
[1] 1 4 9 16 25
for( i in Vector.1a5 ){
print( i^2 )
}
[1] 1
[1] 4
[1] 9
[1] 16
[1] 25
Dataframe.1a5 <- as.data.frame( c(1:5) )
#Dataframe.1a5
names(Dataframe.1a5)
[1] "c(1:5)"
Dataframe.1a5$c
[1] 1 2 3 4 5
for( i in Dataframe.1a5$c ){
print( i^2 )
}
[1] 1
[1] 4
[1] 9
[1] 16
[1] 25
sqldf("SELECT cyl , count( cyl ) as Cilindros FROM mtcars GROUP BY cyl ")
df_pivot_sql <- sqldf( " SELECT cyl ,
COUNT( cyl ) as Cilindros
FROM mtcars
GROUP BY cyl ")
df_pivot_sql
EJERCICIOS ADICIONALES CON mtcars:
#mtcars
tabla.1 <- table( mtcars$cyl )
#tabla.1
colores <- c( "orange" ,
"green" ,
"yellow" )
#colores
plot.1 <- barplot( tabla.1 ,
xlab = "Cilindros" ,
ylab = "Frequencia" ,
main = "Nro de Cilindros" ,
col = colores )
plot.1
[,1]
[1,] 0.7
[2,] 1.9
[3,] 3.1
CONTINUACION CON mtcars :
plot.2 <- ggplot( mtcars ,
aes( cyl )) +
geom_bar( fill = colores ) +
labs( x= "Cilindros" ,
y = "Frecuencias" ,
title = "Numero de Cilindros")
plot.2
NA
REPASO DE MATRICES:
matrix.1 <- matrix( 1:10 ,
nrow = 5 ,
ncol = 4 )
matrix.1
[,1] [,2] [,3] [,4]
[1,] 1 6 1 6
[2,] 2 7 2 7
[3,] 3 8 3 8
[4,] 4 9 4 9
[5,] 5 10 5 10
dim(matrix.1)
[1] 5 4
matrix.1[2,4]
[1] 7
matrix.1[2, ]
[1] 2 7 2 7
matrix.1[ ,4]
[1] 6 7 8 9 10
df_matrix.1 <- as.data.frame( matrix.1 , row.names = NULL,
optional = FALSE ,
make.names = TRUE ,
stringsAsFactors = default.stringsAsFactors() )
#df_matrix.1
#df_matrix.1$V4
df_matrix.1['V4']
NA
EJERCICIOS:
summary(database)
name year length_min genre average_rating cost_millions foreign age_restriction
Length:30 Min. :1936 Min. : 81.00 Length:30 Min. :5.200 Min. : 0.400 Min. :0.0 Min. : 0.00
Class :character 1st Qu.:1988 1st Qu.: 99.25 Class :character 1st Qu.:7.925 1st Qu.: 3.525 1st Qu.:0.0 1st Qu.:12.00
Mode :character Median :1998 Median :110.50 Mode :character Median :8.300 Median : 13.000 Median :0.0 Median :14.00
Mean :1996 Mean :116.80 Mean :8.103 Mean : 22.300 Mean :0.4 Mean :12.93
3rd Qu.:2008 3rd Qu.:124.25 3rd Qu.:8.500 3rd Qu.: 25.000 3rd Qu.:1.0 3rd Qu.:16.00
Max. :2015 Max. :179.00 Max. :9.300 Max. :165.000 Max. :1.0 Max. :18.00
matrix_summary <- do.call(cbind, lapply(database, summary))
matrix_summary
name year length_min genre average_rating cost_millions foreign age_restriction
Min. "30" "1936" "81" "30" "5.2" "0.4" "0" "0"
1st Qu. "character" "1987.75" "99.25" "character" "7.925" "3.525" "0" "12"
Median "character" "1998.5" "110.5" "character" "8.3" "13" "0" "14"
Mean "30" "1995.5" "116.8" "30" "8.10333333333333" "22.3" "0.4" "12.9333333333333"
3rd Qu. "character" "2007.5" "124.25" "character" "8.5" "25" "1" "16"
Max. "character" "2015" "179" "character" "9.3" "165" "1" "18"
str(matrix_summary)
chr [1:6, 1:8] "30" "character" "character" "30" "character" "character" "1936" "1987.75" "1998.5" "1995.5" "2007.5" "2015" "81" "99.25" ...
- attr(*, "dimnames")=List of 2
..$ : chr [1:6] "Min." "1st Qu." "Median" "Mean" ...
..$ : chr [1:8] "name" "year" "length_min" "genre" ...
df_summary <- as.data.frame(matrix_summary, row.names = NULL, optional = FALSE,
make.names = TRUE,
stringsAsFactors = default.stringsAsFactors())
df_summary
summary( database$cost_millions )
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.400 3.525 13.000 22.300 25.000 165.000
# Retrieve a subset_dataframe of the data frame consisting of the "genre" columns
database['genre']
# Retrieve the data for the "name" column in the data frame in a vector
database$genre
[1] "Animation" "Animation" "Drama" "Romance" "Comedy" "Drama" "Crime" "Drama" "Action" "Drama" "Drama" "Comedy"
[13] "Horror" "Comedy" "Comedy" "Horror" "Crime" "Crime" "Adventure" "Biography" "Biography" "Romance" "Thriller" "Sci-fi"
[25] "Thriller" "Drama" "Crime" "Fantasy" "Drama" "Comedy"
# Retrieve the first row of the data frame.
database[3,]
# Retrieve the third row of the data frame, but only the "name" and "length_min" columns.
database[3, c("name","length_min")]
summary(database)
name year length_min genre average_rating cost_millions foreign age_restriction
Length:30 Min. :1936 Min. : 81.00 Length:30 Min. :5.200 Min. : 0.400 Min. :0.0 Min. : 0.00
Class :character 1st Qu.:1988 1st Qu.: 99.25 Class :character 1st Qu.:7.925 1st Qu.: 3.525 1st Qu.:0.0 1st Qu.:12.00
Mode :character Median :1998 Median :110.50 Mode :character Median :8.300 Median : 13.000 Median :0.0 Median :14.00
Mean :1996 Mean :116.80 Mean :8.103 Mean : 22.300 Mean :0.4 Mean :12.93
3rd Qu.:2008 3rd Qu.:124.25 3rd Qu.:8.500 3rd Qu.: 25.000 3rd Qu.:1.0 3rd Qu.:16.00
Max. :2015 Max. :179.00 Max. :9.300 Max. :165.000 Max. :1.0 Max. :18.00
histograma <- hist(database$length_min ,col="yellow",breaks = 10)
histograma
$breaks
[1] 80 90 100 110 120 130 140 150 160 170 180
$counts
[1] 2 8 5 5 4 1 2 0 1 2
$density
[1] 0.006666667 0.026666667 0.016666667 0.016666667 0.013333333 0.003333333 0.006666667 0.000000000 0.003333333 0.006666667
$mids
[1] 85 95 105 115 125 135 145 155 165 175
$xname
[1] "database$length_min"
$equidist
[1] TRUE
attr(,"class")
[1] "histogram"
time = gsub(":", "-", Sys.time())
#- exporta en formato .csv el df df_summary al fichero "df_summary.csv". Se guardará en la subcarpeta "datos/pruebas/" del proyecto
folder_path <- "./output_databases/"
filename <- "df_summary"
filetype <-".csv"
path <- paste(folder_path,filename," ",time,filetype, sep="")
write_csv(df_summary, path)
Hay varios packages que graban datos en formato .xls. Pero el más sencillo es el package xlsx. Veámoslo:
# install.packages("xlsx")
# library(xlsx)
write.xlsx(df_summary, "./output_databases/df_summary.xlsx")
La función write.xlsx() permite añadir datos a un archivo .xlsx preexistente; para ello tenemos que usar la opción append = TRUE:
# library(xlsx)
write.xlsx(df_summary, "./output_databases/df_summary.xlsx", sheetName = "summary", append = TRUE)
DEL EJERCICIO DE EJECUTAR SQL QUERY en UN R NOTEBOOK:
write.xlsx( df_pivot_sql ,
"./output_databases/df_pivot_sql.xlsx",
sheetName = "df_pivot_sql" )
GRAFICA
#**********************************************************************
#*# Publication quality graphs require 600dpi
dpi=600 #pixels per square inch
carpeta = "./output_images/"
archivo = "histograma"
time = gsub(":", "-", Sys.time())
carpeta_y_archivo = paste(carpeta,archivo," ",time,".tif", sep="")
nombre_de_tif = carpeta_y_archivo
tiff(nombre_de_tif, width=6*dpi, height=5*dpi, res=dpi)
#**********************************************************************
histograma <- hist(database$length_min ,col="yellow",breaks = 10)
histograma
$breaks
[1] 80 90 100 110 120 130 140 150 160 170 180
$counts
[1] 2 8 5 5 4 1 2 0 1 2
$density
[1] 0.006666667 0.026666667 0.016666667 0.016666667 0.013333333 0.003333333 0.006666667 0.000000000 0.003333333 0.006666667
$mids
[1] 85 95 105 115 125 135 145 155 165 175
$xname
[1] "database$length_min"
$equidist
[1] TRUE
attr(,"class")
[1] "histogram"
#**********************************************************************
dev.off()
null device
1
print(paste("Finalizado procesamiento de",archivo," ",time, sep=""))
[1] "Finalizado procesamiento dehistograma 2021-10-03 16-59-51"
#**********************************************************************
citation()
To cite R in publications use:
R Core Team (2021). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna,
Austria. URL https://www.R-project.org/.
A BibTeX entry for LaTeX users is
@Manual{,
title = {R: A Language and Environment for Statistical Computing},
author = {{R Core Team}},
organization = {R Foundation for Statistical Computing},
address = {Vienna, Austria},
year = {2021},
url = {https://www.R-project.org/},
}
We have invested a lot of time and effort in creating R, please cite it when using it for data analysis. See also
‘citation("pkgname")’ for citing R packages.
citation("readxl")
To cite package ‘readxl’ in publications use:
Hadley Wickham and Jennifer Bryan (2019). readxl: Read Excel Files. R package version 1.3.1.
https://CRAN.R-project.org/package=readxl
A BibTeX entry for LaTeX users is
@Manual{,
title = {readxl: Read Excel Files},
author = {Hadley Wickham and Jennifer Bryan},
year = {2019},
note = {R package version 1.3.1},
url = {https://CRAN.R-project.org/package=readxl},
}
#
help("readxl") # Documentacion de la library readxl