Most of the data in R are saved in a dataframe format or tibble format . This will help your data analysis and also make easy the use of many libraries in R.
library(readr)# read cvs
library(readxl) #read xls
library(dplyr)
library(knitr) # web widget
library(tidyverse) # data manipulation
library(data.table) # fast file reading
library(kableExtra) # nice table html formating
library(gridExtra) # arranging ggplot in grid
library(caTools) # split
library(plotrix)
library(MASS)
Import data: Data used in this material are downloaded from this link: https://www.kaggle.com/sonujha090/bank-marketing
data_bank <- read.csv("~/ALDA DOC/ALDA 2020/Tirana bank-desktop/Tirana bank-trajnimi qershor Alda/Tirana Bank-R materials/Data _bank dataset/bank-full.csv", sep=";")
#ggplot library
Libraries helpfull to import and read the data.
library(readxl)
library(httr)
Applied to bank_data
d.age= ggplot (data_bank)
g1 = d.age + geom_histogram(aes(x=age),color="black", fill="white", binwidth = 5) +
ggtitle('Age histogram (red line=mean)') +
ylab('Frequence') +
xlab('Age') +
geom_vline(aes(xintercept = mean(age), color = "red")) +
scale_x_continuous(breaks = seq(0,100,5)) +
geom_text(x=45, y=7500, label="Mean",color="red",size=6)+
theme(legend.position = "none")
g1
Box Plot
g2 = d.age + geom_boxplot(aes(x='Clients', y=age),color="red",fill="yellow")+ ggtitle('Box-Plot age') + ylab('age')
g2
Grouping by a categorical variable (example “y” variable in data_bank)
g3 <- ggplot(data_bank, aes(x=age, fill=marital)) +
geom_histogram(binwidth = 2, alpha=0.7) +
facet_grid(cols = vars(y)) +
expand_limits(x=c(0,100)) +
scale_x_continuous(breaks = seq(0,100,10)) +
ggtitle("Age and civil status")+
ylab('Frequence') +
xlab('Age')
g3
g3 <- ggplot(data_bank, aes(x=age, fill=marital)) +
geom_histogram(binwidth = 2, alpha=0.7) +
facet_grid(cols = vars(education)) +
expand_limits(x=c(0,100)) +
scale_x_continuous(breaks = seq(0,100,10)) +
ggtitle("Age and civil status")+
ylab('Frequence') +
xlab('Age')
g3
Grouping by “y” variable (which has two cahtegories “yes” and “no”)
m.1 <- data_bank %>% group_by(y) %>% summarize(grp.mean=mean(age))# average age for each group
`summarise()` ungrouping output (override with `.groups` argument)
View(m.1)#
g4<-ggplot (data_bank, aes(x=age)) +
geom_histogram(color = "blue", fill = "orange", binwidth = 5) +
facet_grid(cols=vars(y)) +
ggtitle('Age distribution') + ylab('Frequence') + xlab('Age') +
scale_x_continuous(breaks = seq(0,100,5)) +
geom_vline(data=m.1, aes(xintercept=grp.mean), color="red",linetype="dashed")
g4
Having higher education level is seen to contribute to higher subscription of term deposit. Most clients who subscribe are from ‘secondary’ and ‘tertiary’ education levels. Tertiary educated clients have higher rate of subscription (15%) from total clients called.
g6<-ggplot(data = data_bank, aes(x=education, fill=y)) +
geom_bar() +
ggtitle("Education vs Y") +
xlab(" Education level") +
guides(fill=guide_legend(title="Y"))
g6
Clients that subscribe to term deposits have lower loan balances?
m.2 <- data_bank %>% group_by(y) %>% summarize(grp.mean2=mean(balance))
`summarise()` ungrouping output (override with `.groups` argument)
g5<-ggplot (data_bank, aes(x=balance)) +
geom_histogram(color = "blue", fill = "blue") +
stat_bin(bins=3)+
facet_grid(cols=vars(y)) +
ggtitle('Balance histogram') + ylab('Frequency') + xlab('Balance') +
geom_vline(data=m.2, aes(xintercept=grp.mean2), color="red",linetype="dashed")
g5
Try to change stat_bin(bins=30)
g7<-ggplot(data = data_bank, aes(x=marital, fill=y)) +
geom_bar() +
ggtitle("Civil status vs Deposit") +
xlab(" Civil status") +
ylab("Freq")+
guides(fill=guide_legend(title="Deposit"))
g7
data_bank %>%
ggplot(aes(age, duration)) +
geom_point(color="blue") +
facet_grid(cols = vars(y)) +
scale_x_continuous(breaks = seq(0,100,10)) +
ylab("Age")+
xlab("Duration")+
ggtitle("Scatterplot")
data_bank %>% filter(campaign < 63) %>%
ggplot(aes(campaign, duration)) +
geom_point(color="red") +
facet_grid(cols = vars(y)) +
ylab("Duration")+
xlab("Campain")+
ggtitle("Scatterplot")
We will use the COVID-19 is stored with automatic updates every day at ECDC site.
Reference : https://rpubs.com/eraldagjika/601456
Some libraries are:
library(ggplot2)
library(gganimate)
library(quantmod)
library(directlabels)
library(hrbrthemes)
library(viridis)
library(dplyr)
library(dbplyr)
library(gifski)
Import the data and save it as: data<-
lista<-c("Romania","Albania","Italy", "France","United_States_of_America","Germany", "United_Kingdom")
europa<-filter(data,data$countriesAndTerritories %in% lista)
p2.1<-ggplot(europa,aes(europa$dateRep,europa$cases,group=europa$countriesAndTerritories, color=factor(europa$countriesAndTerritories))) +
geom_line() +
geom_point() +
ylab("Cases") +
xlab("Data")+
scale_color_brewer(palette="Dark2") +
ggtitle("New cases (31 Dec 2019-13 Dec 2020)") +
geom_dl(aes(label =europa$countriesAndTerritories), method = list(dl.trans(x = x - 1.2), "last.points"))+
theme(legend.position="top")
p2.1
lista<-c("Romania","Albania","Kosovo")
europa<-filter(data,data$countriesAndTerritories %in% lista)
europa
p2.1<-ggplot(europa,aes(europa$dateRep,europa$cases,group=europa$countriesAndTerritories, color=factor(europa$countriesAndTerritories))) +
geom_line() +
geom_point() +
ylab("Cases") +
xlab("Data")+
scale_color_brewer(palette="Dark2") +
ggtitle("New cases (31 Dec 2019-13 Dec 2020)") +
geom_dl(aes(label =europa$countriesAndTerritories), method = list(dl.trans(x = x - 1.2), "last.points"))+
theme(legend.position="top")
p2.1
p2.1+facet_wrap(~europa$countriesAndTerritories)
p2.1+transition_reveal(europa$day)
Use of `europa$dateRep` is discouraged. Use `dateRep` instead.Use of `europa$cases` is discouraged. Use `cases` instead.Use of `europa$countriesAndTerritories` is discouraged. Use `countriesAndTerritories` instead.Use of `europa$countriesAndTerritories` is discouraged. Use `countriesAndTerritories` instead.Use of `europa$dateRep` is discouraged. Use `dateRep` instead.Use of `europa$cases` is discouraged. Use `cases` instead.Use of `europa$countriesAndTerritories` is discouraged. Use `countriesAndTerritories` instead.Use of `europa$countriesAndTerritories` is discouraged. Use `countriesAndTerritories` instead.Use of `europa$countriesAndTerritories` is discouraged. Use `countriesAndTerritories` instead.Use of `europa$dateRep` is discouraged. Use `dateRep` instead.Use of `europa$cases` is discouraged. Use `cases` instead.Use of `europa$countriesAndTerritories` is discouraged. Use `countriesAndTerritories` instead.Use of `europa$countriesAndTerritories` is discouraged. Use `countriesAndTerritories` instead.
Frame 1 (1%)
Frame 2 (2%)
Frame 3 (3%)
Frame 4 (4%)
Frame 5 (5%)
Frame 6 (6%)
Frame 7 (7%)
Frame 8 (8%)
Frame 9 (9%)
Frame 10 (10%)
Frame 11 (11%)
Frame 12 (12%)
Frame 13 (13%)
Frame 14 (14%)
Frame 15 (15%)
Frame 16 (16%)
Frame 17 (17%)
Frame 18 (18%)
Frame 19 (19%)
Frame 20 (20%)
Frame 21 (21%)
Frame 22 (22%)
Frame 23 (23%)
Frame 24 (24%)
Frame 25 (25%)
Frame 26 (26%)
Frame 27 (27%)
Frame 28 (28%)
Frame 29 (29%)
Frame 30 (30%)
Frame 31 (31%)
Frame 32 (32%)
Frame 33 (33%)
Frame 34 (34%)
Frame 35 (35%)
Frame 36 (36%)
Frame 37 (37%)
Frame 38 (38%)
Frame 39 (39%)
Frame 40 (40%)
Frame 41 (41%)
Frame 42 (42%)
Frame 43 (43%)
Frame 44 (44%)
Frame 45 (45%)
Frame 46 (46%)
Frame 47 (47%)
Frame 48 (48%)
Frame 49 (49%)
Frame 50 (50%)
Frame 51 (51%)
Frame 52 (52%)
Frame 53 (53%)
Frame 54 (54%)
Frame 55 (55%)
Frame 56 (56%)
Frame 57 (57%)
Frame 58 (58%)
Frame 59 (59%)
Frame 60 (60%)
Frame 61 (61%)
Frame 62 (62%)
Frame 63 (63%)
Frame 64 (64%)
Frame 65 (65%)
Frame 66 (66%)
Frame 67 (67%)
Frame 68 (68%)
Frame 69 (69%)
Frame 70 (70%)
Frame 71 (71%)
Frame 72 (72%)
Frame 73 (73%)
Frame 74 (74%)
Frame 75 (75%)
Frame 76 (76%)
Frame 77 (77%)
Frame 78 (78%)
Frame 79 (79%)
Frame 80 (80%)
Frame 81 (81%)
Frame 82 (82%)
Frame 83 (83%)
Frame 84 (84%)
Frame 85 (85%)
Frame 86 (86%)
Frame 87 (87%)
Frame 88 (88%)
Frame 89 (89%)
Frame 90 (90%)
Frame 91 (91%)
Frame 92 (92%)
Frame 93 (93%)
Frame 94 (94%)
Frame 95 (95%)
Frame 96 (96%)
Frame 97 (97%)
Frame 98 (98%)
Frame 99 (99%)
Frame 100 (100%)
Finalizing encoding... done!
lista.11<-c("Albania", "Kosovo", "Bosnia_and_Herzegovina","Montenegro","Serbia", "North_Macedonia")
europa.11<-filter(data,data$countriesAndTerritories %in% lista.11)
europa.11
p2.11<-ggplot(europa.11,aes(europa.11$dateRep, europa.11$cases, group=europa.11$countriesAndTerritories, color=factor(europa.11$countriesAndTerritories))
) +
geom_line() +
geom_point() +
ylab("New Cases") +
xlab("Time")+
ylim(0,1000)+
scale_color_brewer(palette="Dark2") +
ggtitle("New Cases Western Balkan (31 Dec 2019-13 Dec 2020)") +
geom_dl(aes(label =europa.11$countriesAndTerritories), method = list(dl.trans(x = x - 1.2), "last.points"))+
theme(legend.position="top")
p2.11
p2.11+facet_wrap(~europa.11$countriesAndTerritories)
lista.11<-c("Albania", "Kosovo", "Bosnia_and_Herzegovina","Montenegro","Serbia", "North_Macedonia")
europa.11<-filter(data,data$countriesAndTerritories %in% lista.11)
europa.11
p2.11<-ggplot(europa.11,aes(europa.11$dateRep, europa.11$deaths, group=europa.11$countriesAndTerritories, color=factor(europa.11$countriesAndTerritories))) +
geom_line() +
geom_point() +
ylab("Deaths") +
xlab("Time")+
ylim(0,50)+
scale_color_brewer(palette="Dark2") +
ggtitle("Deaths Western Balkan (31 Dec 2019-13 Dec 2020)") +
geom_dl(aes(label =europa.11$countriesAndTerritories), method = list(dl.trans(x = x - 1.2), "last.points"))+
theme(legend.position="top")
p2.11
p2.11+facet_wrap(~europa.11$countriesAndTerritories)
p2.11 + transition_reveal(europa.11$day)
Use of `europa.11$dateRep` is discouraged. Use `dateRep` instead.Use of `europa.11$deaths` is discouraged. Use `deaths` instead.Use of `europa.11$countriesAndTerritories` is discouraged. Use `countriesAndTerritories` instead.Use of `europa.11$countriesAndTerritories` is discouraged. Use `countriesAndTerritories` instead.Use of `europa.11$dateRep` is discouraged. Use `dateRep` instead.Use of `europa.11$deaths` is discouraged. Use `deaths` instead.Use of `europa.11$countriesAndTerritories` is discouraged. Use `countriesAndTerritories` instead.Use of `europa.11$countriesAndTerritories` is discouraged. Use `countriesAndTerritories` instead.Use of `europa.11$countriesAndTerritories` is discouraged. Use `countriesAndTerritories` instead.Use of `europa.11$dateRep` is discouraged. Use `dateRep` instead.Use of `europa.11$deaths` is discouraged. Use `deaths` instead.Use of `europa.11$countriesAndTerritories` is discouraged. Use `countriesAndTerritories` instead.Use of `europa.11$countriesAndTerritories` is discouraged. Use `countriesAndTerritories` instead.
Frame 1 (1%)
Frame 2 (2%)
Frame 3 (3%)
Frame 4 (4%)
Frame 5 (5%)
Frame 6 (6%)
Frame 7 (7%)
Frame 8 (8%)
Frame 9 (9%)
Frame 10 (10%)
Frame 11 (11%)
Frame 12 (12%)
Frame 13 (13%)
Frame 14 (14%)
Frame 15 (15%)
Frame 16 (16%)
Frame 17 (17%)
Frame 18 (18%)
Frame 19 (19%)
Frame 20 (20%)
Frame 21 (21%)
Frame 22 (22%)
Frame 23 (23%)
Frame 24 (24%)
Frame 25 (25%)
Frame 26 (26%)
Frame 27 (27%)
Frame 28 (28%)
Frame 29 (29%)
Frame 30 (30%)
Frame 31 (31%)
Frame 32 (32%)
Frame 33 (33%)
Frame 34 (34%)
Frame 35 (35%)
Frame 36 (36%)
Frame 37 (37%)
Frame 38 (38%)
Frame 39 (39%)
Frame 40 (40%)
Frame 41 (41%)
Frame 42 (42%)
Frame 43 (43%)
Frame 44 (44%)
Frame 45 (45%)
Frame 46 (46%)
Frame 47 (47%)
Frame 48 (48%)
Frame 49 (49%)
Frame 50 (50%)
Frame 51 (51%)
Frame 52 (52%)
Frame 53 (53%)
Frame 54 (54%)
Frame 55 (55%)
Frame 56 (56%)
Frame 57 (57%)
Frame 58 (58%)
Frame 59 (59%)
Frame 60 (60%)
Frame 61 (61%)
Frame 62 (62%)
Frame 63 (63%)
Frame 64 (64%)
Frame 65 (65%)
Frame 66 (66%)
Frame 67 (67%)
Frame 68 (68%)
Frame 69 (69%)
Frame 70 (70%)
Frame 71 (71%)
Frame 72 (72%)
Frame 73 (73%)
Frame 74 (74%)
Frame 75 (75%)
Frame 76 (76%)
Frame 77 (77%)
Frame 78 (78%)
Frame 79 (79%)
Frame 80 (80%)
Frame 81 (81%)
Frame 82 (82%)
Frame 83 (83%)
Frame 84 (84%)
Frame 85 (85%)
Frame 86 (86%)
Frame 87 (87%)
Frame 88 (88%)
Frame 89 (89%)
Frame 90 (90%)
Frame 91 (91%)
Frame 92 (92%)
Frame 93 (93%)
Frame 94 (94%)
Frame 95 (95%)
Frame 96 (96%)
Frame 97 (97%)
Frame 98 (98%)
Frame 99 (99%)
Frame 100 (100%)
Finalizing encoding... done!
https://daviddalpiaz.github.io/appliedstats/multiple-linear-regression.html
https://www.r-graph-gallery.com/ggplot2-package.html
https://cran.r-project.org/web/packages/Rcmdr/Rcmdr.pdf
https://www.tutorialspoint.com/r/r_pie_charts.htm
https://www.tutorialspoint.com/r/r_chi_square_tests.htm
https://www.tutorialspoint.com/r/r_analysis_of_covariance.htm
https://www.tutorialspoint.com/r/r_logistic_regression.htm
https://gregor-mathes.netlify.app/2021/01/01/rethinking-chapter-4/
https://www.r-bloggers.com/2013/02/collinearity-and-stepwise-vif-selection/