R cannot read an excel dataset directly. So, we need a package (just
like addins, that enhance the base functionalities of R). To do that,
first install the readxl package, then load it to use its
read_excel() function by library(readxl).
To install the package or library, consol: install.packages(‘readxl’), or Tools>Install Packages, or Packages in the Files pane.
To insert a command line: Click (C+), or Ctrl+Alt+I
library(readxl)
df = read_excel('datasets.xlsx', sheet = 'wrangling', range = 'B7:N507')
head(df)
## # A tibble: 6 × 13
## sr. jan feb mar apr may jun jul aug sep oct nov dec
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 0 1 1 0 0 0 0 0 0 0 0 0
## 2 2 0 1 1 1 0 0 0 0 0 0 0 0
## 3 3 1 0 1 0 1 0 0 0 0 0 0 0
## 4 4 0 0 1 1 0 1 0 0 1 0 0 0
## 5 5 1 0 0 0 0 0 0 0 0 1 0 0
## 6 6 0 1 0 0 0 0 0 0 0 0 0 0
tail(df)
## # A tibble: 6 × 13
## sr. jan feb mar apr may jun jul aug sep oct nov dec
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 495 0 1 0 1 0 0 1 0 0 0 0 0
## 2 496 0 1 1 0 0 0 0 0 0 0 0 0
## 3 497 1 0 1 1 1 0 0 0 0 0 0 0
## 4 498 0 0 0 1 1 0 1 0 0 0 0 0
## 5 499 1 0 0 0 0 0 0 0 0 0 0 1
## 6 500 0 0 0 1 0 0 0 0 0 0 0 0
Calculating the column sums, excluding the serial [column no. 1], and only for 1 to 50 rows.
colSums(df[c(1:50) , c(2:13)])
## jan feb mar apr may jun jul aug sep oct nov dec
## 13 22 20 20 8 15 7 4 3 2 2 3
To see the full dataset containing the money scarcity information:
scarcity=colSums(df[-1])*100/nrow(df)
scarcity
## jan feb mar apr may jun jul aug sep oct nov dec
## 28.6 35.4 39.2 35.8 22.4 21.8 18.0 9.0 8.2 8.0 7.6 4.2
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# convert to dataframe
df_scarcity = as.data.frame(scarcity)
#transfer the rownames to columns as 'Month'
df_plot = rownames_to_column(df_scarcity, var = 'Month')
str(df_plot)
## 'data.frame': 12 obs. of 2 variables:
## $ Month : chr "jan" "feb" "mar" "apr" ...
## $ scarcity: num 28.6 35.4 39.2 35.8 22.4 21.8 18 9 8.2 8 ...
df_plot
## Month scarcity
## 1 jan 28.6
## 2 feb 35.4
## 3 mar 39.2
## 4 apr 35.8
## 5 may 22.4
## 6 jun 21.8
## 7 jul 18.0
## 8 aug 9.0
## 9 sep 8.2
## 10 oct 8.0
## 11 nov 7.6
## 12 dec 4.2
df_plot$Month = factor(df_plot$Month,
levels = c('jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec'), labels = c('Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'))
ggplot(df_plot)+
aes(x= Month, y = scarcity, color = Month, fill = Month)+
geom_col(Width = 0.7) +
labs(x= '', y = 'Money scarcity') +
geom_text(aes(label = scarcity,y=scarcity +2), color='grey') +
theme_test() +
theme(legend.position = 'top')
## Warning in geom_col(Width = 0.7): Ignoring unknown parameters: `Width`
ggsave('my plot.png',dpi=600, height = 4.5, width = 8, units = 'in')