Primero la Sección de Librerías de Funciones:

# rownames(installed.packages())
list.of.packages <- c(
"arm" , 
"broom" , 
"corrplot" , 
"cowplot" , 
"datasets" , 
"datasets" , 
"dplyr" , 
"eeptools" , 
"estimatr" , 
"FinCal" , 
"formatR" , 
"ggfortify" , 
"ggpubr" , 
"haven" , 
"Hmisc" , 
"infer" , 
"knitr" , 
"lmtest" , 
"margins" , 
"nycflights13" , 
"psych" , 
"readxl" , 
"reshape2" , 
"rms" , 
"skimr" , 
"stargazer" , 
"stringr" , 
"survival" , 
"tableone" , 
"tidyr" , 
"tidyverse" , 
"TTR" , 
"wooldridge" , 
"xlsx",
# Adicionales Octubre 2021:
"sqldf", # Para SQL en R
"RODBC",  # Para Conexion SQL y R Studio
# Adicionales 9 Octubre 2021:
"readr",
"devtools",
"googledrive",
# Adicionales Domingo 10 Octubre 2021:
"lattice"
)
has   <- list.of.packages %in% rownames(installed.packages())
if(any(!has)) install.packages(list.of.packages[!has])

Llamada a LIBRERIAS:

# library(arm) 
# library(broom) 
# library(corrplot) 
# library(cowplot) 
# library(datasets) 
library(dplyr) 
# library(eeptools) 
# library(estimatr) 
library(FinCal) 
# library(formatR) 
# library(ggfortify) 
# library(ggpubr) 
 library(ggplot2) 
 library(haven) #para la lectura de archivos DTA de Stata
# library(Hmisc) 
# library(infer) 
# library(knitr) 
# library(lmtest) 
# library(margins) 
# library(nycflights13) 
# library(psych) 
library(readxl) 
library(reshape2) #para hacer ReShape (Pivot Tables)
# library(rms) 
# library(skimr) 
# library(stargazer) 
# library(stringr) 
# library(survival) 
# library(tableone) 
library(tidyr) #para hacer ReShape (Pivot Tables)
library(tidyverse) 
library(TTR) #para las graficas de series de tiempo
# library(wooldridge) 
library(xlsx) #para exportar a Excel file

# Adicionales Octubre 2021:
library(sqldf) # Para SQL en R
library(RODBC) # Para Conexion SQL y R Studio
# Adicionales 9 Octubre 2021:
library(readr)
library(devtools)
library(googledrive)

# Adicionales Domingo 10 Octubre 2021:
library(lattice)

A partir de aquí la Sección de Importación de Datasets:

print("Working Directory: "); getwd() #get to show me the current Working Directory 
[1] "Working Directory: "
[1] "I:/001.7 CURSO-02 Lenguaje R Semillero/21.10.13.Mie Grupo Javier Guillen de R"
### Cargando BBDD: n5ay5qadfe7e1nnsv5s01oe1x62mq51j.csv ####
# Version de BBDD: 2021.09.24 v1
# RUTA: https://ibm.box.com/shared/static/

# XLS file,  Download datasets
# download.file( "https://ibm.box.com/shared/static/nx0ohd9sq0iz3p871zg8ehc1m39ibpx6.xls" , 
#                destfile="movies-db.xls" )

# CSV file,  Download datasets
download.file("https://ibm.box.com/shared/static/n5ay5qadfe7e1nnsv5s01oe1x62mq51j.csv", 
              destfile="movies-db.csv")
trying URL 'https://ibm.box.com/shared/static/n5ay5qadfe7e1nnsv5s01oe1x62mq51j.csv'
Content type 'text/csv' length 1424 bytes
downloaded 1424 bytes
database_csv <- read.csv("movies-db.csv", header=TRUE, sep=",")

file.exists("movies-db.xlsx")
[1] TRUE
# Read data from the XLS file and attribute the table a dataframe.
database_xlsx <- read_excel("movies-db.xlsx")
database_xlsx

#movies_data
(
database <- database_xlsx
)

# Read data from the XLS file and attribute the table to a dataframe.
paises_001.xlsx <- read_excel("paises_001.xlsx")
paises_001.xlsx
NA
#movies_data
(
database <- paises_001.xlsx
)
NA

To download a Google Sheet online from Google Drive into a dataframe in R:

#################################################
#- GoogleSheet4
#################################################
#install.packages("devtools")
library(devtools)
#devtools::install_github("tidyverse/googlesheets4")

#install.packages("gargle")
library(googlesheets4)

Attaching package: 㤼㸱googlesheets4㤼㸲

The following objects are masked from 㤼㸱package:googledrive㤼㸲:

    request_generate, request_make
gs4_auth()
The googlesheets4 package is requesting access to your Google account.
Select a pre-authorised account or enter '0' to obtain a new token.
Press Esc/Ctrl + C to cancel.

1: ecanchor@unmsm.edu.pe
1
#help(gs4_auth)
#################################################
#- Read an existing Spreadsheet 
###############################################

data_to_write <- read_sheet("https://docs.google.com/spreadsheets/d/1Cm-VRHrUDlGFke4d_wkCiL-htt0ctB_p7oPn6Pl6YEc")
Auto-refreshing stale OAuth token.
v Reading from paises_001_GoogleSheet.
v Range Sheet1.
data_to_write

To download a CSV file from Google Drive (está viendose sin UTF-8(acentos, eñe) si lo abro en Excel, debe de insertarse con llamada de datos al file CSCV):

#################################################
#- Downloading a spreadsheet file as a csv 
#################################################
#library(googledrive)
#target <- drive_get( "https://docs.google.com/spreadsheets/d/1Cm-VRHrUDlGFke4d_wkCiL-htt0ctB_p7oPn6Pl6YEc" )
#drive_download( target , 
#                type= "csv" , 
#                path = "paises_001_GoogleSheet.csv" ,
#                overwrite = TRUE )

Read data from Google Drive (the ZIP file) and attribute the table to a dataframe.

# https://docs.google.com/spreadsheets/d/1PiNq7i0cXEhvXApRCvvh3FUDrF2kGeua/edit?usp=sharing&ouid=109636307569655661315&rtpof=true&sd=true
# Read data from Google Drive (the ZIP file) and attribute the table to a dataframe.
library(googledrive)
library(httpuv)
temp <- tempfile( fileext = ".zip" )
temp
[1] "C:\\Users\\user\\AppData\\Local\\Temp\\RtmpYDef6c\\file5c4034f4110f.zip"
dl <- drive_download(
         as_id( "1AiZda_1-2nwrxI8fLD0Y6e5rTg7aocv0" ) , 
         path = temp , overwrite = TRUE )
The googledrive package is requesting access to your Google account.
Select a pre-authorised account or enter '0' to obtain a new token.
Press Esc/Ctrl + C to cancel.

1: ecanchor@unmsm.edu.pe
1
Auto-refreshing stale OAuth token.
File downloaded:
* Archive.zip <id: 1AiZda_1-2nwrxI8fLD0Y6e5rTg7aocv0>
Saved locally as:
* C:\Users\user\AppData\Local\Temp\RtmpYDef6c\file5c4034f4110f.zip
dl
out <- unzip( temp , exdir = tempdir() )
out
 [1] "C:/Users/user/AppData/Local/Temp/RtmpYDef6c/bank/.DS_Store"                                     
 [2] "C:/Users/user/AppData/Local/Temp/RtmpYDef6c/__MACOSX/bank/._.DS_Store"                          
 [3] "C:/Users/user/AppData/Local/Temp/RtmpYDef6c/bank/bank-full.csv"                                 
 [4] "C:/Users/user/AppData/Local/Temp/RtmpYDef6c/__MACOSX/bank/._bank-full.csv"                      
 [5] "C:/Users/user/AppData/Local/Temp/RtmpYDef6c/bank/bank.csv"                                      
 [6] "C:/Users/user/AppData/Local/Temp/RtmpYDef6c/__MACOSX/bank/._bank.csv"                           
 [7] "C:/Users/user/AppData/Local/Temp/RtmpYDef6c/__MACOSX/._bank"                                    
 [8] "C:/Users/user/AppData/Local/Temp/RtmpYDef6c/bank-additional/.DS_Store"                          
 [9] "C:/Users/user/AppData/Local/Temp/RtmpYDef6c/__MACOSX/bank-additional/._.DS_Store"               
[10] "C:/Users/user/AppData/Local/Temp/RtmpYDef6c/bank-additional/.Rhistory"                          
[11] "C:/Users/user/AppData/Local/Temp/RtmpYDef6c/__MACOSX/bank-additional/._.Rhistory"               
[12] "C:/Users/user/AppData/Local/Temp/RtmpYDef6c/bank-additional/bank-additional-full.csv"           
[13] "C:/Users/user/AppData/Local/Temp/RtmpYDef6c/__MACOSX/bank-additional/._bank-additional-full.csv"
[14] "C:/Users/user/AppData/Local/Temp/RtmpYDef6c/bank-additional/bank-additional.csv"                
[15] "C:/Users/user/AppData/Local/Temp/RtmpYDef6c/__MACOSX/bank-additional/._bank-additional.csv"     
[16] "C:/Users/user/AppData/Local/Temp/RtmpYDef6c/__MACOSX/._bank-additional"                         
bank <- read.csv( out[14] , sep = ";" )
str(bank)
'data.frame':   4119 obs. of  21 variables:
 $ age           : int  30 39 25 38 47 32 32 41 31 35 ...
 $ job           : chr  "blue-collar" "services" "services" "services" ...
 $ marital       : chr  "married" "single" "married" "married" ...
 $ education     : chr  "basic.9y" "high.school" "high.school" "basic.9y" ...
 $ default       : chr  "no" "no" "no" "no" ...
 $ housing       : chr  "yes" "no" "yes" "unknown" ...
 $ loan          : chr  "no" "no" "no" "unknown" ...
 $ contact       : chr  "cellular" "telephone" "telephone" "telephone" ...
 $ month         : chr  "may" "may" "jun" "jun" ...
 $ day_of_week   : chr  "fri" "fri" "wed" "fri" ...
 $ duration      : int  487 346 227 17 58 128 290 44 68 170 ...
 $ campaign      : int  2 4 1 3 1 3 4 2 1 1 ...
 $ pdays         : int  999 999 999 999 999 999 999 999 999 999 ...
 $ previous      : int  0 0 0 0 0 2 0 0 1 0 ...
 $ poutcome      : chr  "nonexistent" "nonexistent" "nonexistent" "nonexistent" ...
 $ emp.var.rate  : num  -1.8 1.1 1.4 1.4 -0.1 -1.1 -1.1 -0.1 -0.1 1.1 ...
 $ cons.price.idx: num  92.9 94 94.5 94.5 93.2 ...
 $ cons.conf.idx : num  -46.2 -36.4 -41.8 -41.8 -42 -37.5 -37.5 -42 -42 -36.4 ...
 $ euribor3m     : num  1.31 4.86 4.96 4.96 4.19 ...
 $ nr.employed   : num  5099 5191 5228 5228 5196 ...
 $ y             : chr  "no" "no" "no" "no" ...
bank
NA

Reading file from a CSV file:

# Read data from the TXT  file and attribute the table to a dataframe.
paises_001.csv <- read.csv("prueba_csv01.csv", header=TRUE, sep=";")
paises_001.csv

Reading file from a TXT file:

# Read data from the TXT  file and attribute the table to a dataframe.
paises_001.txt <- read_csv("paises_001.txt")
Rows: 4 Columns: 3
-- Column specification ---------------------------------------------------------------
Delimiter: ","
chr (1): pais
dbl (2): poblacion, anio

i Use `spec()` to retrieve the full column specification for this data.
i Specify the column types or set `show_col_types = FALSE` to quiet this message.
paises_001.txt
#library(haven)
#BaseCOVID19.sav <- read_sav("BaseCOVID-19.sav")
#BaseCOVID19.sav
#library(haven)
#enaho01.dta <- read_dta("enaho01-2020-100.dta")
#enaho01.dta

REVISION RAPIDA DEL DATAFRAME:

#View(database)
summary(database) # Summary Estadístico.
       Nº            PAIS              CODIGO         
 Min.   : 1.00   Length:57          Length:57         
 1st Qu.:15.00   Class :character   Class :character  
 Median :30.00   Mode  :character   Mode  :character  
 Mean   :29.58                                        
 3rd Qu.:44.00                                        
 Max.   :58.00                                        
head(database) # Primeros 6.
names(database) # Names de columnas.
[1] "Nº"     "PAIS"   "CODIGO"
print(is.data.frame(database))
[1] TRUE
#attach(database) #only if there is only 1 dataset 
# CONTENIDO DE TABLA:
# database es la tabla con datos de películas.

ANALIZAMOS LA ESTRCUTURA DE LA TABLA:

Función str: structure Lab08-importingData


# Prints out the structure of your table.
str(database) # es la función structure
tibble [57 x 3] (S3: tbl_df/tbl/data.frame)
 $ Nº    : num [1:57] 1 2 3 4 5 6 7 8 9 10 ...
 $ PAIS  : chr [1:57] "Angola" "Argelia" "Benin" "Botswana" ...
 $ CODIGO: chr [1:57] "AGO" "DZA" "BEN" "BWA" ...

A partir de aquí inicia el Cuerpo del Script:

EJERCICIO FUNCIONES PROPIAS EN R

# UDF - User Defined Functions

sum( 111 , 222 )
[1] 333
prod( 111 , 222 )
[1] 24642
log( 8 , 2 )
[1] 3
# library(ggplot2)
# ggplot()


#Incremento Porcentual
( 
Importe <- 256
)
[1] 256
(
Impuesto <- 3.2/100
)
[1] 0.032
(
Precio.de.Venta <- Importe * ( 1+Impuesto )
)
[1] 264.192
(
Variación <- Precio.de.Venta/Importe - 1
)
[1] 0.032
# Function Increase_percentaje
Increase_percentaje <- function( Importe , Prc_Impuesto ){
  Precio.de.Venta <- Importe * ( 1+Prc_Impuesto/100 )
  Precio.de.Venta <- round( Precio.de.Venta , 2 )
  
  print( paste( "Increasing ", Importe ,
                " en ", Prc_Impuesto , 
                "% resulta en: ", Precio.de.Venta , " USD.",
                sep = "" ))
  return(Precio.de.Venta)
}

#Llamar a la function
Precio.de.Venta.1 <- Increase_percentaje( 256 , 3.2 ) 
[1] "Increasing 256 en 3.2% resulta en: 264.19 USD."
Precio.de.Venta.1
[1] 264.19

OTRA FUNCION:

# Function Increase_percentaje2
Increase_percentaje2 <- function( Importe , Prc_Impuesto ){
  Precio.de.Venta <- Importe * ( 1+Prc_Impuesto/100 )
  Precio.de.Venta <- round( Precio.de.Venta , 2 )
  
  if( Importe <= 0 ){
    print("Error. Importe <= 0.")
    return(NULL)
  }else if( Prc_Impuesto <= 0 ){
    print("Error. Prc_Impuesto <= 0.")
    return(NULL)
  }else{
  
  print( paste( "Increasing ", Importe ,
                " en ", Prc_Impuesto , 
                "% resulta en: ", Precio.de.Venta , " USD.",
                sep = "" ))
  return(Precio.de.Venta)
  }
}
#Llamar a la function
Precio.de.Venta.1 <- Increase_percentaje2( 256 , 3.2 ) 
[1] "Increasing 256 en 3.2% resulta en: 264.19 USD."
str(Precio.de.Venta.1)
 num 264
#Precio.de.Venta.1
Precio.de.Venta.2 <- Increase_percentaje2( -256 , 3.2 ) 
[1] "Error. Importe <= 0."
str(Precio.de.Venta.2)
 NULL
#Precio.de.Venta.2
Precio.de.Venta.3 <- Increase_percentaje2( 256 , -3.2 ) 
[1] "Error. Prc_Impuesto <= 0."
str(Precio.de.Venta.3)
 NULL
#Precio.de.Venta.2

EJEMPLO BUCLE FOR:

for( i in 1:5){
  print( i^2 )
}
[1] 1
[1] 4
[1] 9
[1] 16
[1] 25
Vector.1a5 <- c(1:5)
#Vector.1a5
Vector.1a5^2
[1]  1  4  9 16 25
for( i in Vector.1a5 ){
  print( i^2 )
}
[1] 1
[1] 4
[1] 9
[1] 16
[1] 25
Dataframe.1a5 <- as.data.frame( c(1:5) )
#Dataframe.1a5
names(Dataframe.1a5)
[1] "c(1:5)"
Dataframe.1a5$c
[1] 1 2 3 4 5
for( i in Dataframe.1a5$c ){
  print( i^2 )
}  
[1] 1
[1] 4
[1] 9
[1] 16
[1] 25
#help("sqldf")

mtcars
#View(mtcars)
df_query <- sqldf("select * from mtcars")
df_query
#str(df_query)
sqldf("SELECT cyl , count( cyl ) as Cilindros FROM mtcars GROUP BY cyl ")
df_pivot_sql <- sqldf( " SELECT cyl , 
                                COUNT( cyl ) as Cilindros 
                         FROM mtcars 
                         GROUP BY cyl ")
df_pivot_sql

EJERCICIOS ADICIONALES CON mtcars:

#mtcars
tabla.1 <- table( mtcars$cyl )
#tabla.1

colores <- c( "orange" , 
              "green" , 
              "yellow" )
#colores

plot.1 <- barplot( tabla.1 , 
                   xlab = "Cilindros" , 
                   ylab = "Frequencia" , 
                   main = "Nro de Cilindros" ,
                   col = colores )

plot.1
     [,1]
[1,]  0.7
[2,]  1.9
[3,]  3.1

Con library lattice:

#mtcars
tabla.1 <- table( mtcars$cyl )
#tabla.1

colores <- c( "orange" , 
              "green" , 
              "yellow" )
#colores

plot.3 <- barchart( tabla.1 , 
                    xlab = "Cilindros" , 
                    ylab = "Número de Cilindros" , 
                    main = "Número de Cilindros", 
                    col = colores )

plot.3

CONTINUACION CON mtcars :


plot.2 <- ggplot( mtcars ,
                  aes( cyl )) +
          geom_bar( fill = colores ) + 
          labs( x= "Cilindros" , 
                y = "Frecuencias" , 
                title = "Numero de Cilindros")
plot.2  

NA

REPASO DE MATRICES:

# PARA OCULTAR
matrix.1 <- matrix( 1:10 , 
            nrow = 5 , 
            ncol = 4 )
matrix.1

dim(matrix.1)

matrix.1[2,4]
matrix.1[2, ]
matrix.1[ ,4]
df_matrix.1 <- as.data.frame( matrix.1 , row.names = NULL, 
                              optional = FALSE , 
                              make.names = TRUE , 
               stringsAsFactors = default.stringsAsFactors() )
#df_matrix.1
#df_matrix.1$V4
df_matrix.1['V4']

EJERCICIOS:

summary(database)
       Nº            PAIS              CODIGO         
 Min.   : 1.00   Length:57          Length:57         
 1st Qu.:15.00   Class :character   Class :character  
 Median :30.00   Mode  :character   Mode  :character  
 Mean   :29.58                                        
 3rd Qu.:44.00                                        
 Max.   :58.00                                        
matrix_summary <- do.call(cbind, lapply(database, summary))
matrix_summary
        Nº                 PAIS        CODIGO     
Min.    "1"                "57"        "57"       
1st Qu. "15"               "character" "character"
Median  "30"               "character" "character"
Mean    "29.5789473684211" "57"        "57"       
3rd Qu. "44"               "character" "character"
Max.    "58"               "character" "character"
str(matrix_summary)
 chr [1:6, 1:3] "1" "15" "30" "29.5789473684211" "44" "58" "57" "character" ...
 - attr(*, "dimnames")=List of 2
  ..$ : chr [1:6] "Min." "1st Qu." "Median" "Mean" ...
  ..$ : chr [1:3] "Nº" "PAIS" "CODIGO"
df_summary <- as.data.frame(matrix_summary, row.names = NULL, optional = FALSE,
              make.names = TRUE, 
              stringsAsFactors = default.stringsAsFactors())
df_summary

summary( database$cost_millions )
Unknown or uninitialised column: `cost_millions`.
Length  Class   Mode 
     0   NULL   NULL 
# Retrieve a subset_dataframe of the data frame consisting of the "genre" columns
database['PAIS']

# Retrieve the data for the "name" column in the data frame in a vector
database$PAIS
 [1] "Angola"                                       
 [2] "Argelia"                                      
 [3] "Benin"                                        
 [4] "Botswana"                                     
 [5] "Burkina Faso"                                 
 [6] "Burundi"                                      
 [7] "Cabo Verde"                                   
 [8] "Camerún"                                      
 [9] "Chad"                                         
[10] "Comoras"                                      
[11] "Congo"                                        
[12] "Côte d'Ivoire"                                
[13] "Djibouti"                                     
[14] "Egipto"                                       
[15] "Eritrea"                                      
[16] "España, territorios vinculados en Africa"     
[17] "Etiopía"                                      
[18] "Francia, territorios vinculados en Africa"    
[19] "Gabón"                                        
[20] "Gambia"                                       
[21] "Ghana"                                        
[22] "Guinea"                                       
[23] "Guinea Bissau"                                
[24] "Guinea Ecuatorial"                            
[25] "Kenya"                                        
[26] "Lesotho"                                      
[27] "Liberia"                                      
[28] "Libia"                                        
[29] "Madagascar"                                   
[30] "Malawi"                                       
[31] "Malí"                                         
[32] "Marruecos"                                    
[33] "Mauricio"                                     
[34] "Mauritania"                                   
[35] "Mozambique"                                   
[36] "Namibia"                                      
[37] "Níger"                                        
[38] "Nigeria"                                      
[39] "Reino Unido, territorios vinculados en África"
[40] "República Centroafricana"                     
[41] "República Democrática del Congo"              
[42] "Rwanda"                                       
[43] "Santo Tomé y Príncipe"                        
[44] "Senegal"                                      
[45] "Seychelles"                                   
[46] "Sierra Leona"                                 
[47] "Somalia"                                      
[48] "Sudáfrica"                                    
[49] "Sudán"                                        
[50] "Sudán del Sur"                                
[51] "Swazilandia"                                  
[52] "Tanzanía"                                     
[53] "Togo"                                         
[54] "Túnez"                                        
[55] "Uganda"                                       
[56] "Zambia"                                       
[57] "Zimbabwe"                                     
# Retrieve the 3rd row of the data frame.
database[3,]

# Retrieve the third row of the data frame, but only the "name" and "length_min" columns.
database[3, c("PAIS","CODIGO")]

summary(database)
       Nº            PAIS              CODIGO         
 Min.   : 1.00   Length:57          Length:57         
 1st Qu.:15.00   Class :character   Class :character  
 Median :30.00   Mode  :character   Mode  :character  
 Mean   :29.58                                        
 3rd Qu.:44.00                                        
 Max.   :58.00                                        
histograma <- hist(database$Nº ,col="yellow",breaks = 10)

histograma
$breaks
 [1]  0  5 10 15 20 25 30 35 40 45 50 55 60

$counts
 [1] 5 5 5 5 4 5 5 5 5 5 5 3

$density
 [1] 0.01754386 0.01754386 0.01754386 0.01754386 0.01403509 0.01754386 0.01754386
 [8] 0.01754386 0.01754386 0.01754386 0.01754386 0.01052632

$mids
 [1]  2.5  7.5 12.5 17.5 22.5 27.5 32.5 37.5 42.5 47.5 52.5 57.5

$xname
[1] "database$Nº"

$equidist
[1] TRUE

attr(,"class")
[1] "histogram"

time = gsub(":", "-", Sys.time())

#- exporta en formato .csv el df df_summary al fichero "df_summary.csv". Se guardará en la subcarpeta "datos/pruebas/" del proyecto
folder_path <- "./output_databases/"
filename <- "df_summary"
filetype <-".csv"
path <- paste(folder_path,filename," ",time,filetype, sep="")
write_csv(df_summary, path)

Hay varios packages que graban datos en formato .xls. Pero el más sencillo es el package xlsx. Veámoslo:

# install.packages("xlsx")
# library(xlsx)
write.xlsx(df_summary, "./output_databases/df_summary.xlsx", sheetName = "df_summary" )

La función write.xlsx() permite añadir datos a un archivo .xlsx preexistente; para ello tenemos que usar la opción append = TRUE:

# library(xlsx)
write.xlsx(df_summary, "./output_databases/df_summary.xlsx", sheetName = "summary", append = TRUE)

DEL EJERCICIO DE EJECUTAR SQL QUERY en UN R NOTEBOOK:

write.xlsx( df_pivot_sql , 
            "./output_databases/df_pivot_sql.xlsx", 
            sheetName = "df_pivot_sql" )

GRAFICA

#**********************************************************************
#*# Publication quality graphs require 600dpi
dpi=600    #pixels per square inch
carpeta = "./output_images/"
archivo = "histograma"
time = gsub(":", "-", Sys.time())
carpeta_y_archivo = paste(carpeta,archivo," ",time,".tif", sep="")
nombre_de_tif = carpeta_y_archivo
tiff(nombre_de_tif, width=6*dpi, height=5*dpi, res=dpi)
#**********************************************************************
histograma <- hist(database$Nº ,col="yellow",breaks = 10)
histograma
$breaks
 [1]  0  5 10 15 20 25 30 35 40 45 50 55 60

$counts
 [1] 5 5 5 5 4 5 5 5 5 5 5 3

$density
 [1] 0.01754386 0.01754386 0.01754386 0.01754386 0.01403509 0.01754386 0.01754386
 [8] 0.01754386 0.01754386 0.01754386 0.01754386 0.01052632

$mids
 [1]  2.5  7.5 12.5 17.5 22.5 27.5 32.5 37.5 42.5 47.5 52.5 57.5

$xname
[1] "database$Nº"

$equidist
[1] TRUE

attr(,"class")
[1] "histogram"
#**********************************************************************
dev.off()
null device 
          1 
print(paste("Finalizado procesamiento de ",archivo," ",time, sep=""))
[1] "Finalizado procesamiento de histograma 2021-10-13 20-42-17"
#**********************************************************************
citation()

To cite R in publications use:

  R Core Team (2021). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. URL
  https://www.R-project.org/.

A BibTeX entry for LaTeX users is

  @Manual{,
    title = {R: A Language and Environment for Statistical Computing},
    author = {{R Core Team}},
    organization = {R Foundation for Statistical Computing},
    address = {Vienna, Austria},
    year = {2021},
    url = {https://www.R-project.org/},
  }

We have invested a lot of time and effort in creating R, please cite it when using it for data analysis. See also ‘citation("pkgname")’ for citing R
packages.
citation("readxl") 

To cite package ‘readxl’ in publications use:

  Hadley Wickham and Jennifer Bryan (2019). readxl: Read Excel Files. R package version 1.3.1. https://CRAN.R-project.org/package=readxl

A BibTeX entry for LaTeX users is

  @Manual{,
    title = {readxl: Read Excel Files},
    author = {Hadley Wickham and Jennifer Bryan},
    year = {2019},
    note = {R package version 1.3.1},
    url = {https://CRAN.R-project.org/package=readxl},
  }
# 
help("readxl") # Documentacion de la library readxl
---
title: "Read Google Sheet, GGplot2, Query SQL en  R Notebook (pivot table), bucle for & funciones, @ECR's Master R Notebook Template 2021-II"
author: "Ing. Ernesto Cancho-Rodriguez, MBA George Washington University"
email: "ernesto.cancho@unmsm.edu.pe"
date: "2021.10.13"
output:
  html_notebook: default
  pdf_document: default
  html_document:
    df_print: paged
  word_document: default
---

Primero la Sección de Librerías de Funciones: 

```{r INSTALACION LIBRERIAS }
# rownames(installed.packages())
list.of.packages <- c(
"arm" , 
"broom" , 
"corrplot" , 
"cowplot" , 
"datasets" , 
"datasets" , 
"dplyr" , 
"eeptools" , 
"estimatr" , 
"FinCal" , 
"formatR" , 
"ggfortify" , 
"ggpubr" , 
"haven" , 
"Hmisc" , 
"infer" , 
"knitr" , 
"lmtest" , 
"margins" , 
"nycflights13" , 
"psych" , 
"readxl" , 
"reshape2" , 
"rms" , 
"skimr" , 
"stargazer" , 
"stringr" , 
"survival" , 
"tableone" , 
"tidyr" , 
"tidyverse" , 
"TTR" , 
"wooldridge" , 
"xlsx",
# Adicionales Octubre 2021:
"sqldf", # Para SQL en R
"RODBC",  # Para Conexion SQL y R Studio
# Adicionales 9 Octubre 2021:
"readr",
"devtools",
"googledrive",
# Adicionales Domingo 10 Octubre 2021:
"lattice"
)
has   <- list.of.packages %in% rownames(installed.packages())
if(any(!has)) install.packages(list.of.packages[!has])

```


Llamada a LIBRERIAS:
```{r LLAMADA A LIBRERIAS}
# library(arm) 
# library(broom) 
# library(corrplot) 
# library(cowplot) 
# library(datasets) 
library(dplyr) 
# library(eeptools) 
# library(estimatr) 
library(FinCal) 
# library(formatR) 
# library(ggfortify) 
# library(ggpubr) 
 library(ggplot2) 
 library(haven) #para la lectura de archivos DTA de Stata
# library(Hmisc) 
# library(infer) 
# library(knitr) 
# library(lmtest) 
# library(margins) 
# library(nycflights13) 
# library(psych) 
library(readxl) 
library(reshape2) #para hacer ReShape (Pivot Tables)
# library(rms) 
# library(skimr) 
# library(stargazer) 
# library(stringr) 
# library(survival) 
# library(tableone) 
library(tidyr) #para hacer ReShape (Pivot Tables)
library(tidyverse) 
library(TTR) #para las graficas de series de tiempo
# library(wooldridge) 
library(xlsx) #para exportar a Excel file

# Adicionales Octubre 2021:
library(sqldf) # Para SQL en R
library(RODBC) # Para Conexion SQL y R Studio
# Adicionales 9 Octubre 2021:
library(readr)
library(devtools)
library(googledrive)

# Adicionales Domingo 10 Octubre 2021:
library(lattice)
```


A partir de aquí la Sección de Importación de Datasets:
```{r DATA }
print("Working Directory: "); getwd() #get to show me the current Working Directory 
### Cargando BBDD: n5ay5qadfe7e1nnsv5s01oe1x62mq51j.csv ####
# Version de BBDD: 2021.09.24 v1
# RUTA: https://ibm.box.com/shared/static/

# XLS file,  Download datasets
# download.file( "https://ibm.box.com/shared/static/nx0ohd9sq0iz3p871zg8ehc1m39ibpx6.xls" , 
#                destfile="movies-db.xls" )

# CSV file,  Download datasets
download.file("https://ibm.box.com/shared/static/n5ay5qadfe7e1nnsv5s01oe1x62mq51j.csv", 
              destfile="movies-db.csv")
database_csv <- read.csv("movies-db.csv", header=TRUE, sep=",")

file.exists("movies-db.xlsx")
# Read data from the XLS file and attribute the table a dataframe.
database_xlsx <- read_excel("movies-db.xlsx")
database_xlsx

#movies_data
(
database <- database_xlsx
)

# Read data from the XLS file and attribute the table to a dataframe.
paises_001.xlsx <- read_excel("paises_001.xlsx")
paises_001.xlsx

```


```{r Send to the dataframe database}
#movies_data
(
database <- paises_001.xlsx
)

```

To download a Google Sheet online from Google Drive into a dataframe in R:
```{r Read Google Sheet}
#################################################
#- GoogleSheet4
#################################################
#install.packages("devtools")
library(devtools)
#devtools::install_github("tidyverse/googlesheets4")

#install.packages("gargle")
library(googlesheets4)

gs4_auth()
#help(gs4_auth)
#################################################
#- Read an existing Spreadsheet 
###############################################

data_to_write <- read_sheet("https://docs.google.com/spreadsheets/d/1Cm-VRHrUDlGFke4d_wkCiL-htt0ctB_p7oPn6Pl6YEc")
data_to_write
```


To download a CSV file from Google Drive (está viendose sin UTF-8(acentos, eñe) si lo abro en Excel, debe de insertarse con llamada de datos al file CSCV):
```{r Read CSV from Drive}
#################################################
#- Downloading a spreadsheet file as a csv 
#################################################
#library(googledrive)
#target <- drive_get( "https://docs.google.com/spreadsheets/d/1Cm-VRHrUDlGFke4d_wkCiL-htt0ctB_p7oPn6Pl6YEc" )
#drive_download( target , 
#                type= "csv" , 
#                path = "paises_001_GoogleSheet.csv" ,
#                overwrite = TRUE )

```

Read data from Google Drive (the ZIP file) and attribute the table to a dataframe.
```{r Read ZIP with CSV file from Google Drive}
# https://docs.google.com/spreadsheets/d/1PiNq7i0cXEhvXApRCvvh3FUDrF2kGeua/edit?usp=sharing&ouid=109636307569655661315&rtpof=true&sd=true
# Read data from Google Drive (the ZIP file) and attribute the table to a dataframe.
# library(googledrive)
# library(httpuv)
# temp <- tempfile( fileext = ".zip" )
# temp
# dl <- drive_download(
#         as_id( "1AiZda_1-2nwrxI8fLD0Y6e5rTg7aocv0" ) , 
#         path = temp , overwrite = TRUE )
# dl
# out <- unzip( temp , exdir = tempdir() )
# out
# bank <- read.csv( out[14] , sep = ";" )
# str(bank)
# bank

```


Reading file from a CSV file:
```{r READ CSV}
# Read data from the TXT  file and attribute the table to a dataframe.
paises_001.csv <- read.csv("prueba_csv01.csv", header=TRUE, sep=";")
paises_001.csv
```

Reading file from a TXT file:
```{r READ TXT}
# Read data from the TXT  file and attribute the table to a dataframe.
paises_001.txt <- read_csv("paises_001.txt")
paises_001.txt
```

```{r Read SPSS file}
#library(haven)
#BaseCOVID19.sav <- read_sav("BaseCOVID-19.sav")
#BaseCOVID19.sav

```

```{r Read STATA files}
#library(haven)
#enaho01.dta <- read_dta("enaho01-2020-100.dta")
#enaho01.dta

```


REVISION RAPIDA DEL DATAFRAME:
```{r}
#View(database)
summary(database) # Summary Estadístico.
head(database) # Primeros 6.
names(database) # Names de columnas.
print(is.data.frame(database))
#attach(database) #only if there is only 1 dataset 
# CONTENIDO DE TABLA:
# database es la tabla con datos de películas.
```

ANALIZAMOS LA ESTRCUTURA DE LA TABLA:

Función str: structure
Lab08-importingData 
```{r CUERPO}

# Prints out the structure of your table.
str(database) # es la función structure


```


A partir de aquí inicia el Cuerpo del Script:

EJERCICIO FUNCIONES PROPIAS EN R
```{r}
# UDF - User Defined Functions #PARA OCULTAR

sum( 111 , 222 )
prod( 111 , 222 )
log( 8 , 2 )

# library(ggplot2)
# ggplot()


#Incremento Porcentual
( 
Importe <- 256
)
(
Impuesto <- 3.2/100
)
(
Precio.de.Venta <- Importe * ( 1+Impuesto )
)
(
Variación <- Precio.de.Venta/Importe - 1
)

# Function Increase_percentaje
Increase_percentaje <- function( Importe , Prc_Impuesto ){
  Precio.de.Venta <- Importe * ( 1+Prc_Impuesto/100 )
  Precio.de.Venta <- round( Precio.de.Venta , 2 )
  
  print( paste( "Increasing ", Importe ,
                " en ", Prc_Impuesto , 
                "% resulta en: ", Precio.de.Venta , " USD.",
                sep = "" ))
  return(Precio.de.Venta)
}

#Llamar a la function
Precio.de.Venta.1 <- Increase_percentaje( 256 , 3.2 ) 
Precio.de.Venta.1

```

OTRA FUNCION:
```{r}
# Function Increase_percentaje2
Increase_percentaje2 <- function( Importe , Prc_Impuesto ){
  Precio.de.Venta <- Importe * ( 1+Prc_Impuesto/100 )
  Precio.de.Venta <- round( Precio.de.Venta , 2 )
  
  if( Importe <= 0 ){
    print("Error. Importe <= 0.")
    return(NULL)
  }else if( Prc_Impuesto <= 0 ){
    print("Error. Prc_Impuesto <= 0.")
    return(NULL)
  }else{
  
  print( paste( "Increasing ", Importe ,
                " en ", Prc_Impuesto , 
                "% resulta en: ", Precio.de.Venta , " USD.",
                sep = "" ))
  return(Precio.de.Venta)
  }
}
#Llamar a la function
Precio.de.Venta.1 <- Increase_percentaje2( 256 , 3.2 ) 
str(Precio.de.Venta.1)
#Precio.de.Venta.1
Precio.de.Venta.2 <- Increase_percentaje2( -256 , 3.2 ) 
str(Precio.de.Venta.2)
#Precio.de.Venta.2
Precio.de.Venta.3 <- Increase_percentaje2( 256 , -3.2 ) 
str(Precio.de.Venta.3)
#Precio.de.Venta.2
```

EJEMPLO BUCLE FOR:
```{r}
for( i in 1:5){
  print( i^2 )
}

Vector.1a5 <- c(1:5)
#Vector.1a5
Vector.1a5^2

for( i in Vector.1a5 ){
  print( i^2 )
}

Dataframe.1a5 <- as.data.frame( c(1:5) )
#Dataframe.1a5
names(Dataframe.1a5)
Dataframe.1a5$c
for( i in Dataframe.1a5$c ){
  print( i^2 )
}  

```



```{r}
#help("sqldf")

mtcars
#View(mtcars)

```

```{r}
df_query <- sqldf("select * from mtcars")
df_query
#str(df_query)

```
```{r}
sqldf("SELECT cyl , count( cyl ) as Cilindros FROM mtcars GROUP BY cyl ")
```

```{r}
df_pivot_sql <- sqldf( " SELECT cyl , 
                                COUNT( cyl ) as Cilindros 
                         FROM mtcars 
                         GROUP BY cyl ")
df_pivot_sql
```

EJERCICIOS ADICIONALES CON mtcars:

```{r BARPLOT de R BASE}
#mtcars
tabla.1 <- table( mtcars$cyl )
#tabla.1

colores <- c( "orange" , 
              "green" , 
              "yellow" )
#colores

plot.1 <- barplot( tabla.1 , 
                   xlab = "Cilindros" , 
                   ylab = "Frequencia" , 
                   main = "Nro de Cilindros" ,
                   col = colores )
plot.1
```

Con library lattice:
```{r BARCAHRT de LATTICE}
#mtcars
tabla.1 <- table( mtcars$cyl )
#tabla.1

colores <- c( "orange" , 
              "green" , 
              "yellow" )
#colores

plot.3 <- barchart( tabla.1 , 
                    xlab = "Cilindros" , 
                    ylab = "Número de Cilindros" , 
                    main = "Número de Cilindros", 
                    col = colores )

plot.3
```

CONTINUACION CON mtcars : 

```{r}

plot.2 <- ggplot( mtcars ,
                  aes( cyl )) +
          geom_bar( fill = colores ) + 
          labs( x= "Cilindros" , 
                y = "Frecuencias" , 
                title = "Numero de Cilindros")
plot.2  
  
```



REPASO DE MATRICES:
```{r} 
# PARA OCULTAR
matrix.1 <- matrix( 1:10 , 
            nrow = 5 , 
            ncol = 4 )
matrix.1

dim(matrix.1)

matrix.1[2,4]
matrix.1[2, ]
matrix.1[ ,4]
df_matrix.1 <- as.data.frame( matrix.1 , row.names = NULL, 
                              optional = FALSE , 
                              make.names = TRUE , 
               stringsAsFactors = default.stringsAsFactors() )
#df_matrix.1
#df_matrix.1$V4
df_matrix.1['V4']

```



EJERCICIOS:


```{r}
summary(database)
matrix_summary <- do.call(cbind, lapply(database, summary))
matrix_summary
str(matrix_summary)
df_summary <- as.data.frame(matrix_summary, row.names = NULL, optional = FALSE,
              make.names = TRUE, 
              stringsAsFactors = default.stringsAsFactors())
df_summary

summary( database$cost_millions )

# Retrieve a subset_dataframe of the data frame consisting of the "genre" columns
database['PAIS']

# Retrieve the data for the "name" column in the data frame in a vector
database$PAIS

```

```{r}
# Retrieve the 3rd row of the data frame. # PARA OCULTAR
database[3,]

# Retrieve the third row of the data frame, but only the "name" and "length_min" columns.
database[3, c("PAIS","CODIGO")]
```
```{r}

summary(database)
histograma <- hist(database$Nº ,col="yellow",breaks = 10)
histograma
```
```{r}

time = gsub(":", "-", Sys.time())

#- exporta en formato .csv el df df_summary al fichero "df_summary.csv". Se guardará en la subcarpeta "datos/pruebas/" del proyecto
folder_path <- "./output_databases/"
filename <- "df_summary"
filetype <-".csv"
path <- paste(folder_path,filename," ",time,filetype, sep="")
write_csv(df_summary, path)


```
Hay varios packages que graban datos en formato .xls. Pero el más sencillo es el package xlsx. Veámoslo:
```{r}
# install.packages("xlsx")
# library(xlsx)
write.xlsx(df_summary, "./output_databases/df_summary.xlsx", sheetName = "df_summary" )

```

La función write.xlsx() permite añadir datos a un archivo .xlsx preexistente; para ello tenemos que usar la opción append = TRUE:
```{r}
# library(xlsx)
write.xlsx(df_summary, "./output_databases/df_summary.xlsx", sheetName = "summary", append = TRUE)
```


DEL EJERCICIO DE EJECUTAR SQL QUERY en UN R NOTEBOOK:
```{r}
write.xlsx( df_pivot_sql , 
            "./output_databases/df_pivot_sql.xlsx", 
            sheetName = "df_pivot_sql" )
```




GRAFICA
```{r}
#**********************************************************************
#*# Publication quality graphs require 600dpi
dpi=600    #pixels per square inch
carpeta = "./output_images/"
archivo = "histograma"
time = gsub(":", "-", Sys.time())
carpeta_y_archivo = paste(carpeta,archivo," ",time,".tif", sep="")
nombre_de_tif = carpeta_y_archivo
tiff(nombre_de_tif, width=6*dpi, height=5*dpi, res=dpi)
#**********************************************************************
histograma <- hist(database$Nº ,col="yellow",breaks = 10)
histograma
#**********************************************************************
dev.off()
print(paste("Finalizado procesamiento de ",archivo," ",time, sep=""))
#**********************************************************************
```

```{r}
citation()
citation("readxl") 
# 
help("readxl") # Documentacion de la library readxl

```