library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.1 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.3 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)
library(dplyr)
data("volcano")
str(volcano)
## num [1:87, 1:61] 100 101 102 103 104 105 105 106 107 108 ...
summary(volcano)
## V1 V2 V3 V4 V5
## Min. : 97.0 Min. : 97.0 Min. : 97 Min. : 98.0 Min. : 98.0
## 1st Qu.:106.5 1st Qu.:107.5 1st Qu.:108 1st Qu.:108.5 1st Qu.:109.0
## Median :111.0 Median :113.0 Median :114 Median :115.0 Median :116.0
## Mean :110.6 Mean :111.8 Mean :113 Mean :114.1 Mean :115.1
## 3rd Qu.:115.0 3rd Qu.:116.0 3rd Qu.:117 3rd Qu.:118.0 3rd Qu.:119.0
## Max. :124.0 Max. :128.0 Max. :131 Max. :134.0 Max. :136.0
## V6 V7 V8 V9
## Min. : 99.0 Min. : 99.0 Min. : 99.0 Min. :100.0
## 1st Qu.:109.5 1st Qu.:109.5 1st Qu.:108.5 1st Qu.:109.0
## Median :117.0 Median :118.0 Median :120.0 Median :122.0
## Mean :116.1 Mean :117.1 Mean :118.1 Mean :119.4
## 3rd Qu.:120.0 3rd Qu.:121.5 3rd Qu.:123.0 3rd Qu.:125.0
## Max. :138.0 Max. :140.0 Max. :142.0 Max. :144.0
## V10 V11 V12 V13 V14
## Min. :100.0 Min. :100.0 Min. : 99.0 Min. : 99.0 Min. : 99
## 1st Qu.:108.5 1st Qu.:110.5 1st Qu.:112.5 1st Qu.:114.5 1st Qu.:115
## Median :124.0 Median :126.0 Median :128.0 Median :132.0 Median :135
## Mean :121.3 Mean :123.3 Mean :125.4 Mean :127.6 Mean :130
## 3rd Qu.:128.0 3rd Qu.:132.0 3rd Qu.:135.0 3rd Qu.:138.5 3rd Qu.:141
## Max. :147.0 Max. :149.0 Max. :151.0 Max. :154.0 Max. :157
## V15 V16 V17 V18
## Min. : 99.0 Min. : 99.0 Min. :100.0 Min. :100.0
## 1st Qu.:115.5 1st Qu.:116.5 1st Qu.:117.5 1st Qu.:118.5
## Median :139.0 Median :142.0 Median :145.0 Median :148.0
## Mean :132.7 Mean :135.0 Mean :137.1 Mean :139.1
## 3rd Qu.:144.0 3rd Qu.:147.0 3rd Qu.:150.0 3rd Qu.:150.0
## Max. :161.0 Max. :165.0 Max. :168.0 Max. :172.0
## V19 V20 V21 V22
## Min. :100.0 Min. :100.0 Min. :100.0 Min. :100.0
## 1st Qu.:120.5 1st Qu.:122.5 1st Qu.:125.0 1st Qu.:126.5
## Median :150.0 Median :150.0 Median :153.0 Median :153.0
## Mean :141.1 Mean :143.2 Mean :145.2 Mean :147.1
## 3rd Qu.:153.5 3rd Qu.:157.0 3rd Qu.:161.0 3rd Qu.:166.0
## Max. :175.0 Max. :178.0 Max. :181.0 Max. :184.0
## V23 V24 V25 V26
## Min. :101.0 Min. :101.0 Min. :100.0 Min. :100.0
## 1st Qu.:126.5 1st Qu.:127.5 1st Qu.:127.5 1st Qu.:126.0
## Median :153.0 Median :153.0 Median :154.0 Median :155.0
## Mean :148.9 Mean :150.0 Mean :150.8 Mean :151.3
## 3rd Qu.:170.0 3rd Qu.:173.5 3rd Qu.:176.5 3rd Qu.:176.5
## Max. :187.0 Max. :190.0 Max. :190.0 Max. :190.0
## V27 V28 V29 V30
## Min. :100.0 Min. :100.0 Min. :100.0 Min. :100.0
## 1st Qu.:126.5 1st Qu.:126.5 1st Qu.:125.5 1st Qu.:124.5
## Median :154.0 Median :155.0 Median :154.0 Median :155.0
## Mean :151.1 Mean :150.4 Mean :149.4 Mean :148.4
## 3rd Qu.:173.5 3rd Qu.:170.5 3rd Qu.:169.0 3rd Qu.:167.0
## Max. :191.0 Max. :192.0 Max. :193.0 Max. :194.0
## V31 V32 V33 V34
## Min. :100.0 Min. :100.0 Min. :100.0 Min. :100.0
## 1st Qu.:125.0 1st Qu.:124.0 1st Qu.:122.5 1st Qu.:123.0
## Median :153.0 Median :152.0 Median :150.0 Median :149.0
## Mean :147.5 Mean :146.4 Mean :145.3 Mean :144.3
## 3rd Qu.:166.0 3rd Qu.:164.5 3rd Qu.:164.0 3rd Qu.:163.0
## Max. :195.0 Max. :194.0 Max. :193.0 Max. :192.0
## V35 V36 V37 V38 V39
## Min. : 99.0 Min. : 99 Min. : 98.0 Min. : 97.0 Min. : 97.0
## 1st Qu.:122.0 1st Qu.:121 1st Qu.:121.0 1st Qu.:120.5 1st Qu.:119.5
## Median :148.0 Median :146 Median :145.0 Median :142.0 Median :140.0
## Mean :143.6 Mean :143 Mean :142.5 Mean :141.8 Mean :141.2
## 3rd Qu.:162.0 3rd Qu.:162 3rd Qu.:161.0 3rd Qu.:161.5 3rd Qu.:163.5
## Max. :192.0 Max. :191 Max. :191.0 Max. :191.0 Max. :190.0
## V40 V41 V42 V43
## Min. : 97.0 Min. : 96.0 Min. : 96.0 Min. : 96.0
## 1st Qu.:118.0 1st Qu.:116.5 1st Qu.:114.5 1st Qu.:113.0
## Median :140.0 Median :139.0 Median :137.0 Median :133.0
## Mean :140.4 Mean :139.4 Mean :138.1 Mean :136.4
## 3rd Qu.:165.0 3rd Qu.:163.0 3rd Qu.:161.5 3rd Qu.:158.5
## Max. :190.0 Max. :189.0 Max. :186.0 Max. :185.0
## V44 V45 V46 V47
## Min. : 95.0 Min. : 95.0 Min. : 95.0 Min. : 95.0
## 1st Qu.:111.5 1st Qu.:110.0 1st Qu.:108.5 1st Qu.:108.5
## Median :130.0 Median :127.0 Median :124.0 Median :122.0
## Mean :134.7 Mean :132.6 Mean :130.3 Mean :128.4
## 3rd Qu.:157.0 3rd Qu.:154.5 3rd Qu.:151.0 3rd Qu.:148.5
## Max. :184.0 Max. :181.0 Max. :181.0 Max. :180.0
## V48 V49 V50 V51
## Min. : 94.0 Min. : 94.0 Min. : 94.0 Min. : 94.0
## 1st Qu.:107.5 1st Qu.:107.0 1st Qu.:104.5 1st Qu.:102.5
## Median :119.0 Median :117.0 Median :115.0 Median :115.0
## Mean :126.2 Mean :124.3 Mean :122.0 Mean :119.5
## 3rd Qu.:146.0 3rd Qu.:143.0 3rd Qu.:139.0 3rd Qu.:133.5
## Max. :179.0 Max. :175.0 Max. :171.0 Max. :162.0
## V52 V53 V54 V55
## Min. : 94.0 Min. : 94.0 Min. : 94.0 Min. : 94.0
## 1st Qu.:100.5 1st Qu.:101.5 1st Qu.:102.0 1st Qu.:102.5
## Median :113.0 Median :112.0 Median :110.0 Median :110.0
## Mean :117.0 Mean :114.9 Mean :112.8 Mean :111.0
## 3rd Qu.:128.5 3rd Qu.:125.0 3rd Qu.:120.0 3rd Qu.:116.0
## Max. :154.0 Max. :149.0 Max. :142.0 Max. :136.0
## V56 V57 V58 V59
## Min. : 94.0 Min. : 94.0 Min. : 94.0 Min. : 94.0
## 1st Qu.:102.0 1st Qu.:101.5 1st Qu.:100.5 1st Qu.:100.0
## Median :109.0 Median :108.0 Median :106.0 Median :106.0
## Mean :109.0 Mean :107.4 Mean :105.8 Mean :104.6
## 3rd Qu.:113.5 3rd Qu.:111.0 3rd Qu.:110.0 3rd Qu.:108.0
## Max. :130.0 Max. :124.0 Max. :119.0 Max. :116.0
## V60 V61
## Min. : 94.0 Min. : 94.0
## 1st Qu.:100.0 1st Qu.:100.0
## Median :105.0 Median :104.0
## Mean :103.8 Mean :103.2
## 3rd Qu.:107.0 3rd Qu.:107.0
## Max. :113.0 Max. :110.0
variable.names(volcano)
## NULL
df1<- read.csv("~/world_population.csv")
View(df1)
df2<- read.csv("~/CO2_emission.csv")
View(df2)
## checking variable names of each datasets
variable.names(df1)
## [1] "Rank" "CCA3"
## [3] "Country.Territory" "Capital"
## [5] "Continent" "X2022.Population"
## [7] "X2020.Population" "X2015.Population"
## [9] "X2010.Population" "X2000.Population"
## [11] "X1990.Population" "X1980.Population"
## [13] "X1970.Population" "Area..km.."
## [15] "Density..per.km.." "Growth.Rate"
## [17] "World.Population.Percentage"
variable.names(df2)
## [1] "Country.Name" "country_code" "Region" "Indicator.Name"
## [5] "X1990" "X1991" "X1992" "X1993"
## [9] "X1994" "X1995" "X1996" "X1997"
## [13] "X1998" "X1999" "X2000" "X2001"
## [17] "X2002" "X2003" "X2004" "X2005"
## [21] "X2006" "X2007" "X2008" "X2009"
## [25] "X2010" "X2011" "X2012" "X2013"
## [29] "X2014" "X2015" "X2016" "X2017"
## [33] "X2018" "X2019" "X2019.1"
merged_data <- merge(
df1,
df2,
by.x = c("Continent", "Country.Territory"),
by.y = c("Region", "Country.Name")
)
variable.names(merged_data)
## [1] "Continent" "Country.Territory"
## [3] "Rank" "CCA3"
## [5] "Capital" "X2022.Population"
## [7] "X2020.Population" "X2015.Population"
## [9] "X2010.Population" "X2000.Population"
## [11] "X1990.Population" "X1980.Population"
## [13] "X1970.Population" "Area..km.."
## [15] "Density..per.km.." "Growth.Rate"
## [17] "World.Population.Percentage" "country_code"
## [19] "Indicator.Name" "X1990"
## [21] "X1991" "X1992"
## [23] "X1993" "X1994"
## [25] "X1995" "X1996"
## [27] "X1997" "X1998"
## [29] "X1999" "X2000"
## [31] "X2001" "X2002"
## [33] "X2003" "X2004"
## [35] "X2005" "X2006"
## [37] "X2007" "X2008"
## [39] "X2009" "X2010"
## [41] "X2011" "X2012"
## [43] "X2013" "X2014"
## [45] "X2015" "X2016"
## [47] "X2017" "X2018"
## [49] "X2019" "X2019.1"
View(merged_data)
In R, group_by() and %>%(pipe operator) are commonly used with the dplyr package for data manipulation and analysis. ### 3.1. The Pipe Operator %>% We use %>% because makes code cleaner,makes code easier to read,easier to understand.
summary_data <- df1 %>%
group_by(Continent) %>%
summarize(
Avg_Population = mean(`X2022.Population`)
)
print(summary_data) #This is easier to read because the operations flow step by step.
## # A tibble: 6 × 2
## Continent Avg_Population
## <chr> <dbl>
## 1 Africa 25030367.
## 2 Asia 94427665.
## 3 Europe 14862951.
## 4 North America 15007403.
## 5 Oceania 1958198
## 6 South America 31201186.
group_by() is used to divide data into groups,then perform calculations on each group separately.
# Finding Average Population per Continent using group_by
continent_population <- df1 %>% # this Passes dataset df1 to the next operation.
group_by(Continent) %>% #Groups all countries according to continent.
summarize(
Avg_Population = mean(`X2022.Population`) #Calculates average population for each continent.
)
View(continent_population)
Debugging is the process of finding and fixing errors in a program. R provides debugging tools such as trace() and recover() to help programmers identify problems in functions and code execution.
trace() is used to monitor function execution, insert debugging messages, understand how functions work.
#Example
addition <- function(a, b) {
result <- a + b
return(result)
}
trace(addition,
tracer = quote(print("Function is running")))
## [1] "addition"
addition(5, 3)
## Tracing addition(5, 3) on entry
## [1] "Function is running"
## [1] 8
#Removing Trace
untrace(addition)
recover() is an interactive debugger that pauses execution when an error occurs and lets you inspect the call stack and variable environments at each frame.
#Example
div <- function(a, b) {
res <- a / b
#print(x)
return(res)
}
options(error = recover)
div(10, 2)
## [1] 5
#Disable Recover Mode
options(error = NULL)
data_mean <- function(x) {
# Input validation
if (!is.numeric(x)) {
stop("Input must be a numeric vector")
}
if (length(x) == 0) {
stop("Input vector cannot be empty")
}
total <- sum(x)
count <- length(x)
result <- total / count
return(result)
}
data_mean(df1$X2020.Population)
## [1] 33501071
#Example: Square each number
numbers <- c(2, 4, 6, 8, 10)
result <- sapply(numbers, function(x) x^2)
result
## [1] 4 16 36 64 100
#Example: Get string lengths safely
f_var <- c("apple", "banana", "kiwi", "mango")
res <- vapply(f_var, nchar, FUN.VALUE = numeric(1))
res
## apple banana kiwi mango
## 5 6 4 5
#Example: Basic map() returns a list
library(purrr)
nums <- list(4, 9, 16, 25)
res <- map(nums, sqrt)
res
## [[1]]
## [1] 2
##
## [[2]]
## [1] 3
##
## [[3]]
## [1] 4
##
## [[4]]
## [1] 5
#Example: Basic: Add two vectors element by element
x <- c(1, 2, 3, 4, 5)
y <- c(10, 20, 30, 40, 50)
res <- mapply(function(a, b) a + b, x, y)
res
## [1] 11 22 33 44 55