Lesson 3: R Markdown and Basic Functions

Read the ‘datasets.xlsx’

R cannot read an excel dataset directly. So, we need a package (just like addins, that enhance the base functionalities of R). To do that, first install the readxl package, then load it to use its read_excel() function by library(readxl).

To install the package or library, consol: install.packages(‘readxl’), or Tools>Install Packages, or Packages in the Files pane.

To insert a command line: Click (C+), or Ctrl+Alt+I

library(readxl)
df = read_excel('datasets.xlsx', sheet = 'wrangling', range = 'B7:N507')
head(df)
## # A tibble: 6 × 13
##     sr.   jan   feb   mar   apr   may   jun   jul   aug   sep   oct   nov   dec
##   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1     1     0     1     1     0     0     0     0     0     0     0     0     0
## 2     2     0     1     1     1     0     0     0     0     0     0     0     0
## 3     3     1     0     1     0     1     0     0     0     0     0     0     0
## 4     4     0     0     1     1     0     1     0     0     1     0     0     0
## 5     5     1     0     0     0     0     0     0     0     0     1     0     0
## 6     6     0     1     0     0     0     0     0     0     0     0     0     0
tail(df)
## # A tibble: 6 × 13
##     sr.   jan   feb   mar   apr   may   jun   jul   aug   sep   oct   nov   dec
##   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1   495     0     1     0     1     0     0     1     0     0     0     0     0
## 2   496     0     1     1     0     0     0     0     0     0     0     0     0
## 3   497     1     0     1     1     1     0     0     0     0     0     0     0
## 4   498     0     0     0     1     1     0     1     0     0     0     0     0
## 5   499     1     0     0     0     0     0     0     0     0     0     0     1
## 6   500     0     0     0     1     0     0     0     0     0     0     0     0

Calculating the column sums, excluding the serial [column no. 1], and only for 1 to 50 rows.

colSums(df[c(1:50) , c(2:13)])
## jan feb mar apr may jun jul aug sep oct nov dec 
##  13  22  20  20   8  15   7   4   3   2   2   3

To see the full dataset containing the money scarcity information:

scarcity=colSums(df[-1])*100/nrow(df)
scarcity
##  jan  feb  mar  apr  may  jun  jul  aug  sep  oct  nov  dec 
## 28.6 35.4 39.2 35.8 22.4 21.8 18.0  9.0  8.2  8.0  7.6  4.2
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# convert to dataframe
df_scarcity = as.data.frame(scarcity)

#transfer the rownames to columns as 'Month'
df_plot = rownames_to_column(df_scarcity, var = 'Month')


str(df_plot)
## 'data.frame':    12 obs. of  2 variables:
##  $ Month   : chr  "jan" "feb" "mar" "apr" ...
##  $ scarcity: num  28.6 35.4 39.2 35.8 22.4 21.8 18 9 8.2 8 ...
df_plot 
##    Month scarcity
## 1    jan     28.6
## 2    feb     35.4
## 3    mar     39.2
## 4    apr     35.8
## 5    may     22.4
## 6    jun     21.8
## 7    jul     18.0
## 8    aug      9.0
## 9    sep      8.2
## 10   oct      8.0
## 11   nov      7.6
## 12   dec      4.2
df_plot$Month = factor(df_plot$Month,
                       levels = c('jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec'), labels = c('Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'))
ggplot(df_plot)+
  aes(x= Month, y = scarcity, color = Month, fill = Month)+
  geom_col(Width = 0.7) +
  labs(x= '', y = 'Money scarcity') + 
  geom_text(aes(label = scarcity,y=scarcity +2), color='grey') +
  theme_test() +
  theme(legend.position = 'top')
## Warning in geom_col(Width = 0.7): Ignoring unknown parameters: `Width`

ggsave('my plot.png',dpi=600, height = 4.5, width = 8, units = 'in')