rm(list = ls())
options(scipen=999) # quitar notacion cientifica a los numeros
library(readxl)
library(openxlsx)
library(data.table)
library(foreign)
library(ggplot2)
library(gplots)
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
library(corrplot)
## corrplot 0.92 loaded
library(haven)
library(MASS)
library(kableExtra)
library(ggcorrplot)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ tibble  3.1.6     ✔ dplyr   1.0.7
## ✔ tidyr   1.1.4     ✔ stringr 1.4.0
## ✔ readr   2.1.0     ✔ forcats 0.5.1
## ✔ purrr   0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::between()    masks data.table::between()
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::first()      masks data.table::first()
## ✖ dplyr::group_rows() masks kableExtra::group_rows()
## ✖ dplyr::lag()        masks stats::lag()
## ✖ dplyr::last()       masks data.table::last()
## ✖ dplyr::select()     masks MASS::select()
## ✖ purrr::transpose()  masks data.table::transpose()
library(plm)
## 
## Attaching package: 'plm'
## The following objects are masked from 'package:dplyr':
## 
##     between, lag, lead
## The following object is masked from 'package:data.table':
## 
##     between
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
#library(psych)
Bancos <- read.xlsx("datos_tesis.xlsx")
Bancos <- subset(Bancos,Mes == 12)
Bancos$Ln_Act <- log(Bancos$Activo)
sum(is.na(Bancos$Activo))
## [1] 0
sum(is.na(Bancos$Ln_Act))
## [1] 0
Bancos$Int_Cartera=Bancos$CarteraCrditosyOperLEA/Bancos$Activo
Bancos$Int_Capital=(Bancos$ActivosMateriales+Bancos$ActNoCorrientMantVenta)/Bancos$Activo
Bancos$Fondos_Propios=Bancos$PatBasicOrd/Bancos$Activo
Bancos$Prov_Covid=log(Bancos$ProvCrdyOper)

Bancos$Dummy[Bancos$year<2021]=0
Bancos$Dummy[Bancos$year==2021]=1



varkeep <- c('TIEM_C','ROA','year','Mes','Banco','Ln_Act','Int_Cartera','Int_Capital','Fondos_Propios','Prov_Covid','Dummy')

BD <- Bancos[,varkeep]# deja las variables que se van a analizar

BD <- subset(BD,!is.na(TIEM_C))

BD <- BD %>% 
  group_by(Banco) %>% 
  mutate(Conteo = n())

BD <- subset(BD,Conteo >= 5)

BD=subset(BD,(TIEM_C>0))
varkeep <- c('TIEM_C','ROA','Ln_Act','Int_Cartera','Int_Capital','Fondos_Propios')
BD <- BD[,varkeep]# 

ggpairs(BD, lower = list(continuous = "smooth"),
        diag = list(continuous = "barDiag"), axisLabels = "none")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

round(cor(x = BD, method = "pearson"), 3)
##                TIEM_C    ROA Ln_Act Int_Cartera Int_Capital Fondos_Propios
## TIEM_C          1.000 -0.102 -0.470       0.165      -0.190          0.289
## ROA            -0.102  1.000 -0.109       0.169       0.104          0.624
## Ln_Act         -0.470 -0.109  1.000      -0.257      -0.341         -0.529
## Int_Cartera     0.165  0.169 -0.257       1.000       0.382          0.255
## Int_Capital    -0.190  0.104 -0.341       0.382       1.000          0.055
## Fondos_Propios  0.289  0.624 -0.529       0.255       0.055          1.000
ggcorrplot(cor(BD), method = "circle")

par(mfrow = c(2, 6))
  plot(TIEM_C ~ ROA,data = BD)
  plot(TIEM_C ~ Ln_Act,data = BD)
  plot(TIEM_C ~ Int_Cartera,data = BD)
  plot(TIEM_C ~ Int_Capital,data = BD)
  plot(TIEM_C ~ Fondos_Propios,data = BD)