assignment.knit

library(pdftools)

## Using poppler version 26.04.0

library(readxl)
library(DBI)
library(RSQLite)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(magrittr)

Assignment 1

Extracting from database

con <- dbConnect(RSQLite::SQLite(),"R-database.db")
dbGetQuery(con, "select * from fishes")

##    id     fish_name    habitat average_weight_kg lifespan_years        diet
## 1   1       Tilapia Freshwater             171.0             20   Herbivore
## 2   2        Salmon   Brackish             213.0             19    Omnivore
## 3   3          Tuna   Brackish             275.0             24    Omnivore
## 4   4      Goldfish   Brackish              37.5             26   Carnivore
## 5   5       Catfish Freshwater             243.5             12    Omnivore
## 6   6     Clownfish     Marine             124.6             37    Omnivore
## 7   7 Rainbow Trout Freshwater             225.9             33 Planktivore
## 8   8       Sardine Freshwater              28.6             24   Herbivore
## 9   9       Piranha Freshwater             281.7             13   Carnivore
## 10 10     Blue Tang     Marine             194.2             25   Carnivore
## 11 11      Mackerel     Marine             246.9             38 Planktivore
## 12 12     Swordfish     Marine             223.6             24   Carnivore
## 13 13       Snapper     Marine             291.0             27    Omnivore
## 14 14          Carp     Marine               7.5              9    Omnivore
## 15 15     Barracuda Freshwater             161.2             23   Herbivore
## 16 16         Guppy     Marine             136.7             14 Planktivore
## 17 17          Bass Freshwater              58.4             33 Planktivore
## 18 18         Shark Freshwater              84.2              5   Herbivore
## 19 19           Eel     Marine             230.9             21   Carnivore
## 20 20       Anchovy   Brackish             212.6             29   Herbivore

dbDisconnect(con)

extracting from CSV file

fishes <- read.csv("Fish.csv")
head(fishes,10)

##    Species Weight Length1 Length2 Length3  Height  Width
## 1    Bream    242    23.2    25.4    30.0 11.5200 4.0200
## 2    Bream    290    24.0    26.3    31.2 12.4800 4.3056
## 3    Bream    340    23.9    26.5    31.1 12.3778 4.6961
## 4    Bream    363    26.3    29.0    33.5 12.7300 4.4555
## 5    Bream    430    26.5    29.0    34.0 12.4440 5.1340
## 6    Bream    450    26.8    29.7    34.7 13.6024 4.9274
## 7    Bream    500    26.8    29.7    34.5 14.1795 5.2785
## 8    Bream    390    27.6    30.0    35.0 12.6700 4.6900
## 9    Bream    450    27.6    30.0    35.1 14.0049 4.8438
## 10   Bream    500    28.5    30.7    36.2 14.2266 4.9594

Extracting from excel file

fish_excel <- read_excel("fish_excel.xlsx")
head(fish_excel,10)

## # A tibble: 10 × 7
##    Species Weight Length1 Length2 Length3 Height Width
##    <chr>    <dbl>   <dbl>   <dbl>   <dbl>  <dbl> <dbl>
##  1 Bream      242    23.2    25.4    30     11.5  4.02
##  2 Bream      290    24      26.3    31.2   12.5  4.31
##  3 Bream      340    23.9    26.5    31.1   12.4  4.70
##  4 Bream      363    26.3    29      33.5   12.7  4.46
##  5 Bream      430    26.5    29      34     12.4  5.13
##  6 Bream      450    26.8    29.7    34.7   13.6  4.93
##  7 Bream      500    26.8    29.7    34.5   14.2  5.28
##  8 Bream      390    27.6    30      35     12.7  4.69
##  9 Bream      450    27.6    30      35.1   14.0  4.84
## 10 Bream      500    28.5    30.7    36.2   14.2  4.96

extracting from pdf file

fishes_pdf = pdf_text("fish_pdf.pdf")
head(fishes,20)

##    Species Weight Length1 Length2 Length3  Height  Width
## 1    Bream    242    23.2    25.4    30.0 11.5200 4.0200
## 2    Bream    290    24.0    26.3    31.2 12.4800 4.3056
## 3    Bream    340    23.9    26.5    31.1 12.3778 4.6961
## 4    Bream    363    26.3    29.0    33.5 12.7300 4.4555
## 5    Bream    430    26.5    29.0    34.0 12.4440 5.1340
## 6    Bream    450    26.8    29.7    34.7 13.6024 4.9274
## 7    Bream    500    26.8    29.7    34.5 14.1795 5.2785
## 8    Bream    390    27.6    30.0    35.0 12.6700 4.6900
## 9    Bream    450    27.6    30.0    35.1 14.0049 4.8438
## 10   Bream    500    28.5    30.7    36.2 14.2266 4.9594
## 11   Bream    475    28.4    31.0    36.2 14.2628 5.1042
## 12   Bream    500    28.7    31.0    36.2 14.3714 4.8146
## 13   Bream    500    29.1    31.5    36.4 13.7592 4.3680
## 14   Bream    340    29.5    32.0    37.3 13.9129 5.0728
## 15   Bream    600    29.4    32.0    37.2 14.9544 5.1708
## 16   Bream    600    29.4    32.0    37.2 15.4380 5.5800
## 17   Bream    700    30.4    33.0    38.3 14.8604 5.2854
## 18   Bream    700    30.4    33.0    38.5 14.9380 5.1975
## 19   Bream    610    30.9    33.5    38.6 15.6330 5.1338
## 20   Bream    650    31.0    33.5    38.7 14.4738 5.7276

Assignment 2

Merging datasets io 2 to 3 variables

gdp = read.csv("countries_gdp.csv")
head(gdp)

##         country year gdp_billion_usd
## 1 United States 2018          635.02
## 2         China 2020         6129.85
## 3       Germany 2019        18414.42
## 4         Japan 2022         2182.60
## 5         India 2021          804.25
## 6        Brazil 2018         5473.76

population = read.csv("countries_population.csv")
head(population)

##         country year population
## 1 United States 2020 1350993687
## 2         China 2022  777605304
## 3       Germany 2022  413936598
## 4         India 2018   99407116
## 5        Brazil 2019  622455910
## 6        Canada 2018  500914620

#Their is Difference Way we can make The Marging Their is

#merge
#inner_join
#left_join
#right_join
#full_join

 merge(gdp, population, by = c("country", "year"))

##     country year gdp_billion_usd population
## 1 Australia 2020          6954.0 1468907436
## 2    Rwanda 2019         17456.5 1440190226

 inner_join(gdp, population, by = c("country", "year"))

##     country year gdp_billion_usd population
## 1    Rwanda 2019         17456.5 1440190226
## 2 Australia 2020          6954.0 1468907436

 merge(gdp, population, by = c("country", "year"), all.x = TRUE)

##          country year gdp_billion_usd population
## 1      Argentina 2018        18246.00         NA
## 2      Australia 2020         6954.00 1468907436
## 3         Brazil 2018         5473.76         NA
## 4         Canada 2022        15054.45         NA
## 5          China 2020         6129.85         NA
## 6          Egypt 2022         3129.41         NA
## 7         France 2022         4978.95         NA
## 8        Germany 2019        18414.42         NA
## 9          India 2021          804.25         NA
## 10     Indonesia 2020        15097.11         NA
## 11         Italy 2020         2564.23         NA
## 12         Japan 2022         2182.60         NA
## 13         Kenya 2018        18972.60         NA
## 14        Mexico 2019        23930.75         NA
## 15       Nigeria 2021        14735.75         NA
## 16        Norway 2021         1979.22         NA
## 17        Rwanda 2019        17456.50 1440190226
## 18  South Africa 2022        10493.80         NA
## 19         Spain 2021         2426.94         NA
## 20 United States 2018          635.02         NA

 left_join(gdp, population, by = c("country", "year"))

##          country year gdp_billion_usd population
## 1  United States 2018          635.02         NA
## 2          China 2020         6129.85         NA
## 3        Germany 2019        18414.42         NA
## 4          Japan 2022         2182.60         NA
## 5          India 2021          804.25         NA
## 6         Brazil 2018         5473.76         NA
## 7         Canada 2022        15054.45         NA
## 8         France 2022         4978.95         NA
## 9   South Africa 2022        10493.80         NA
## 10       Nigeria 2021        14735.75         NA
## 11         Kenya 2018        18972.60         NA
## 12        Rwanda 2019        17456.50 1440190226
## 13     Australia 2020         6954.00 1468907436
## 14        Mexico 2019        23930.75         NA
## 15         Italy 2020         2564.23         NA
## 16         Spain 2021         2426.94         NA
## 17     Indonesia 2020        15097.11         NA
## 18     Argentina 2018        18246.00         NA
## 19         Egypt 2022         3129.41         NA
## 20        Norway 2021         1979.22         NA

 merge(gdp, population, by = c("country", "year"), all.y = TRUE)

##          country year gdp_billion_usd population
## 1      Australia 2020          6954.0 1468907436
## 2     Bangladesh 2022              NA  472619987
## 3         Brazil 2019              NA  622455910
## 4         Canada 2018              NA  500914620
## 5          China 2022              NA  777605304
## 6         France 2018              NA  817314859
## 7        Germany 2022              NA  413936598
## 8          India 2018              NA   99407116
## 9      Indonesia 2021              NA  815874363
## 10         Italy 2019              NA 1148056643
## 11         Kenya 2020              NA  763938025
## 12        Mexico 2018              NA 1309098873
## 13       Nigeria 2020              NA  350297012
## 14      Pakistan 2020              NA 1375389012
## 15        Rwanda 2019         17456.5 1440190226
## 16  South Africa 2020              NA  974691209
## 17         Spain 2019              NA  351904183
## 18        Sweden 2019              NA   69934737
## 19        Uganda 2020              NA  121125830
## 20 United States 2020              NA 1350993687

 right_join(gdp, population, by = c("country", "year"))

##          country year gdp_billion_usd population
## 1         Rwanda 2019         17456.5 1440190226
## 2      Australia 2020          6954.0 1468907436
## 3  United States 2020              NA 1350993687
## 4          China 2022              NA  777605304
## 5        Germany 2022              NA  413936598
## 6          India 2018              NA   99407116
## 7         Brazil 2019              NA  622455910
## 8         Canada 2018              NA  500914620
## 9         France 2018              NA  817314859
## 10  South Africa 2020              NA  974691209
## 11       Nigeria 2020              NA  350297012
## 12         Kenya 2020              NA  763938025
## 13        Mexico 2018              NA 1309098873
## 14         Italy 2019              NA 1148056643
## 15         Spain 2019              NA  351904183
## 16     Indonesia 2021              NA  815874363
## 17      Pakistan 2020              NA 1375389012
## 18    Bangladesh 2022              NA  472619987
## 19        Uganda 2020              NA  121125830
## 20        Sweden 2019              NA   69934737

 merge(gdp, population, by = c("country", "year"), all = TRUE)

##          country year gdp_billion_usd population
## 1      Argentina 2018        18246.00         NA
## 2      Australia 2020         6954.00 1468907436
## 3     Bangladesh 2022              NA  472619987
## 4         Brazil 2018         5473.76         NA
## 5         Brazil 2019              NA  622455910
## 6         Canada 2018              NA  500914620
## 7         Canada 2022        15054.45         NA
## 8          China 2020         6129.85         NA
## 9          China 2022              NA  777605304
## 10         Egypt 2022         3129.41         NA
## 11        France 2018              NA  817314859
## 12        France 2022         4978.95         NA
## 13       Germany 2019        18414.42         NA
## 14       Germany 2022              NA  413936598
## 15         India 2018              NA   99407116
## 16         India 2021          804.25         NA
## 17     Indonesia 2020        15097.11         NA
## 18     Indonesia 2021              NA  815874363
## 19         Italy 2019              NA 1148056643
## 20         Italy 2020         2564.23         NA
## 21         Japan 2022         2182.60         NA
## 22         Kenya 2018        18972.60         NA
## 23         Kenya 2020              NA  763938025
## 24        Mexico 2018              NA 1309098873
## 25        Mexico 2019        23930.75         NA
## 26       Nigeria 2020              NA  350297012
## 27       Nigeria 2021        14735.75         NA
## 28        Norway 2021         1979.22         NA
## 29      Pakistan 2020              NA 1375389012
## 30        Rwanda 2019        17456.50 1440190226
## 31  South Africa 2020              NA  974691209
## 32  South Africa 2022        10493.80         NA
## 33         Spain 2019              NA  351904183
## 34         Spain 2021         2426.94         NA
## 35        Sweden 2019              NA   69934737
## 36        Uganda 2020              NA  121125830
## 37 United States 2018          635.02         NA
## 38 United States 2020              NA 1350993687

 full_join(gdp, population, by = c("country", "year"))

##          country year gdp_billion_usd population
## 1  United States 2018          635.02         NA
## 2          China 2020         6129.85         NA
## 3        Germany 2019        18414.42         NA
## 4          Japan 2022         2182.60         NA
## 5          India 2021          804.25         NA
## 6         Brazil 2018         5473.76         NA
## 7         Canada 2022        15054.45         NA
## 8         France 2022         4978.95         NA
## 9   South Africa 2022        10493.80         NA
## 10       Nigeria 2021        14735.75         NA
## 11         Kenya 2018        18972.60         NA
## 12        Rwanda 2019        17456.50 1440190226
## 13     Australia 2020         6954.00 1468907436
## 14        Mexico 2019        23930.75         NA
## 15         Italy 2020         2564.23         NA
## 16         Spain 2021         2426.94         NA
## 17     Indonesia 2020        15097.11         NA
## 18     Argentina 2018        18246.00         NA
## 19         Egypt 2022         3129.41         NA
## 20        Norway 2021         1979.22         NA
## 21 United States 2020              NA 1350993687
## 22         China 2022              NA  777605304
## 23       Germany 2022              NA  413936598
## 24         India 2018              NA   99407116
## 25        Brazil 2019              NA  622455910
## 26        Canada 2018              NA  500914620
## 27        France 2018              NA  817314859
## 28  South Africa 2020              NA  974691209
## 29       Nigeria 2020              NA  350297012
## 30         Kenya 2020              NA  763938025
## 31        Mexico 2018              NA 1309098873
## 32         Italy 2019              NA 1148056643
## 33         Spain 2019              NA  351904183
## 34     Indonesia 2021              NA  815874363
## 35      Pakistan 2020              NA 1375389012
## 36    Bangladesh 2022              NA  472619987
## 37        Uganda 2020              NA  121125830
## 38        Sweden 2019              NA   69934737

Assignment 3

How Groupby work

group_by() is used to split data into groups so you can perform calculations separately for each group. %>% is called the pipe operator in R. The pipe passes the result from one step into the next step.

sales <- data.frame(
  country = c("Rwanda", "Rwanda", "Kenya", "Kenya", "Uganda"),
  year = c(2020, 2021, 2020, 2021, 2021),
  revenue = c(100, 150, 200, 250, 300)
)
sales %>%
  group_by(country) %>%
  summarise(total_revenue = sum(revenue), min_revenue=min(revenue), max_revenue=max(revenue), mean_revenue=mean(revenue))

## # A tibble: 3 × 5
##   country total_revenue min_revenue max_revenue mean_revenue
##   <chr>           <dbl>       <dbl>       <dbl>        <dbl>
## 1 Kenya             450         200         250          225
## 2 Rwanda            250         100         150          125
## 3 Uganda            300         300         300          300

Assignment 4

How to use trace() & recover ()

They help you:

see where an error happened
inspect function calls
debug complex code

These are built into base R.

trace() in R is a debugging tool used to temporarily insert code into an existing function without modifying the original source code.

It is useful for:

Debugging package functions
Monitoring function calls
Inspecting arguments
Logging execution flow

Basic Idea

Think of trace() as:

"Run extra code whenever this function executes."

Basic Syntax

trace(function_name)

Example:

trace(mean)

Now whenever mean() runs, R enters tracing mode.

Remove Trace

untrace(mean)

Example 1 — Simple Trace

trace(mean)

mean(c(1,2,3))

## trace: mean(c(1, 2, 3))

## [1] 2

Example 2 — Insert Custom Message

trace(
  mean,
  tracer = quote(print("mean() was called"))
)

## Tracing function "mean" in package "base"

## [1] "mean"

Now run:

mean(c(1,2,3))

## Tracing mean(c(1, 2, 3)) on entry 
## [1] "mean() was called"

## [1] 2

Understanding `tracer`

tracer contains the code that R should execute when the function runs.

Usually written with:

quote(...)

because R needs unevaluated code.

Example 3 — Inspect Arguments

trace(
  mean,
  tracer = quote(print(x))
)

## Tracing function "mean" in package "base"

## [1] "mean"

mean(c(10,20,30))

## Tracing mean(c(10, 20, 30)) on entry 
## [1] 10 20 30

## [1] 20

Example 4 — Trace Your Own Function

calculate_total <- function(price, quantity) {
  price * quantity
}

Trace it:

trace(
  calculate_total,
  tracer = quote({
    print(price)
    print(quantity)
  })
)

## [1] "calculate_total"

Run:

calculate_total(100, 5)

## Tracing calculate_total(100, 5) on entry 
## [1] 100
## [1] 5

## [1] 500

Example 5 — Trace Specific Position

trace(
  calculate_total,
  tracer = quote(print("Before multiplication")),
  at = 1
)

## [1] "calculate_total"

The `at` Parameter

Value	Meaning
`at = 1`	Beginning of function
`at = 2`	Second expression
etc.	Specific expression position

Common Use Cases

1. Debug Package Functions

You can inspect package functions without editing the package source code.

2. Log Function Calls

trace(
  my_function,
  tracer = quote(cat("Function executed\n"))
)

3. Inspect Variables Mid-Execution

trace(
  my_function,
  tracer = quote(print(my_variable))
)

Difference Between `trace()` and `debug()`

trace()	debug()
Inject custom code	Step through interactively
Good for logging	Good for detailed debugging
Non-interactive	Interactive

Difference Between `trace()` and `traceback()`

Tool	Purpose
`trace()`	Add debugging behavior
`traceback()`	Show error call stack

Difference Between `trace()` and `recover()`

Tool	Purpose
`trace()`	Observe function execution
`recover()`	Enter failed function environments

Real Workflow Example

process_data <- function(data) {
  cleaned <- na.omit(data)
  mean(cleaned)
}

Add trace:

trace(
  process_data,
  tracer = quote(print(data))
)

## [1] "process_data"

Now every call prints the input data.

Important Note

trace() modifies function behavior temporarily during the session.

It does not permanently change the original function source code.

Best Practice

Always remove traces after debugging:

untrace(process_data)

2. recover()

recover() lets you enter the environment where the error happened and inspect variables interactively. Enable Recover Mode

options(error = recover)

Now whenever an error occurs, R enters debugging mode.

Example

divide <- function(x, y) {
  result <- x / y
  log(result)
}

calculate <- function() {
  divide(10, "a")
}

calculate()

## Error in x/y: non-numeric argument to binary operator

What Happens Instead of stopping normally, R shows: Enter a frame number, or 0 to exit

Example:

1: calculate() 2: divide(10, “a”)

You can type:

to inspect the divide() function environment.

Inside Recover Mode

You can inspect variables:

x
y

Check objects:

ls()

##  [1] "calculate"       "calculate_total" "con"             "divide"         
##  [5] "fish_excel"      "fishes"          "fishes_pdf"      "gdp"            
##  [9] "population"      "process_data"    "sales"

Run expressions:

class(y)

Exit Recover

Type:

## function (...)  .Primitive("c")

or:

## [1] 0

Disable Recover Mode

options(error = NULL)

Assignment 5

lapply()
sapply()
vapply()

These functions are used for iteration and functional programming in R.

1. `lapply()` — Returns a List

numbers <- list(1, 2, 3, 4)

result <- lapply(numbers, function(x) x * 2)

print(result)

## [[1]]
## [1] 2
## 
## [[2]]
## [1] 4
## 
## [[3]]
## [1] 6
## 
## [[4]]
## [1] 8

2. `sapply()` — Simplifies Output

sapply() simplifies the result into vectors, matrices, or arrays whenever possible.

Example — Producing a Vector

numbers <- list(1, 2, 3, 4)

result <- sapply(numbers, function(x) x * 2)

print(result)

## [1] 2 4 6 8

Example — Producing a Matrix

numbers <- list(1, 2, 3)

result <- sapply(numbers, function(x) c(x, x^2))

print(result)

##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    1    4    9

3. `vapply()` — Safer Version of `sapply()`

vapply() requires specifying the expected output type.

Syntax

vapply(X, FUN, FUN.VALUE)

Example — Numeric Vector

numbers <- list(1, 2, 3, 4)

result <- vapply(
  numbers,
  function(x) x * 2,
  numeric(1)
)

print(result)

## [1] 2 4 6 8

Example — Character Output

words <- list("cat", "dog", "fish")

result <- vapply(
  words,
  toupper,
  character(1)
)

print(result)

## [1] "CAT"  "DOG"  "FISH"

“CAT” “DOG” “FISH”

Difference Between `sapply()` and `vapply()`

sapply()	vapply()
Automatically guesses output type	Requires fixed output type
Easier to write	Safer and more predictable
May return unexpected structures	Consistent output

Assignment 6

Mean
Median
Mode
Quantiles

without using R built-in statistical functions such as:

mean()
median()
quantile()
Mode()

We will use: - sapply() - lapply() - vapply()

and core R logic.

Sample Data

numbers <- c(10, 20, 30, 40, 50, 20, 30, 30)

1. Custom Mean Function

Formula

\[ Mean = \frac{\sum x}{n} \]

Implementation

custom_mean <- function(x) {

  total <- 0

  sapply(x, function(value) {
    total <<- total + value
  })

  total / length(x)
}

Test

custom_mean(numbers)

## [1] 28.75

2. Custom Median Function

Steps

Sort values
Find middle position
If even:
- average middle two values
If odd:
- return middle value

Implementation

custom_median <- function(x) {

  sorted <- sort(x)

  n <- length(sorted)

  middle <- n / 2

  if (n %% 2 == 0) {

    left <- sorted[middle]
    right <- sorted[middle + 1]

    (left + right) / 2

  } else {

    sorted[(n + 1) / 2]

  }
}

Test

custom_median(numbers)

## [1] 30

3. Custom Mode Function

Mode

The most frequently occurring value.

Implementation

custom_mode <- function(x) {

  unique_values <- unique(x)

  frequencies <- sapply(unique_values, function(value) {

    count <- 0

    sapply(x, function(item) {

      if (item == value) {
        count <<- count + 1
      }

    })

    count
  })

  unique_values[which.max(frequencies)]
}

Test

custom_mode(numbers)

## [1] 30

4. Custom Quantile Function

Quantiles

Quantiles divide ordered data into intervals.

Example: - 0.25 = first quartile - 0.50 = median - 0.75 = third quartile

Formula

\[ Position = (n - 1)p + 1 \]

where: - \(n\) = number of observations - \(p\) = probability

Implementation

custom_quantile <- function(x, probs) {

  sorted <- sort(x)

  n <- length(sorted)

  results <- sapply(probs, function(p) {

    position <- (n - 1) * p + 1

    lower <- floor(position)
    upper <- ceiling(position)

    if (lower == upper) {

      sorted[lower]

    } else {

      lower_value <- sorted[lower]
      upper_value <- sorted[upper]

      lower_value +
        (position - lower) *
        (upper_value - lower_value)
    }

  })

  results
}

Test

custom_quantile(
  numbers,
  probs = c(0.25, 0.5, 0.75)
)

## [1] 20.0 30.0 32.5

5. Using `vapply()` for Safer Computation

Example:

safe_square <- function(x) {

  vapply(
    x,
    function(value) value^2,
    numeric(1)
  )
}

Test

safe_square(c(1,2,3,4))

## [1]  1  4  9 16

6. Using `lapply()`

lapply() always returns a list.

Example

lapply(
  numbers,
  function(x) x * 2
)

## [[1]]
## [1] 20
## 
## [[2]]
## [1] 40
## 
## [[3]]
## [1] 60
## 
## [[4]]
## [1] 80
## 
## [[5]]
## [1] 100
## 
## [[6]]
## [1] 40
## 
## [[7]]
## [1] 60
## 
## [[8]]
## [1] 60

Summary Table

Function	Purpose	Returns
`lapply()`	Iteration	List
`sapply()`	Simplified iteration	Vector/Matrix
`vapply()`	Type-safe iteration	Fixed type
`custom_mean()`	Average	Numeric
`custom_median()`	Middle value	Numeric
`custom_mode()`	Most frequent value	Numeric
`custom_quantile()`	Quartiles/percentiles	Vector

Full Example

numbers <- c(10, 20, 30, 40, 50, 20, 30, 30)

custom_mean(numbers)

## [1] 28.75

custom_median(numbers)

## [1] 30

custom_mode(numbers)

## [1] 30

custom_quantile(
  numbers,
  probs = c(0.25, 0.5, 0.75)
)

## [1] 20.0 30.0 32.5

ADVENTIST UNIVERSITY OF CENTRAL AFRICA

MASTER OF IT IN BIGDATA ANALYTICS

Nestor Ngabonziza

20251MBI022

ALL ASSIGNMENT OF R PROGRAMMING

2026-05-24

Assignment 1

Extracting from database

extracting from CSV file

Extracting from excel file

extracting from pdf file

Assignment 2

Merging datasets io 2 to 3 variables

Assignment 3

How Groupby work

Assignment 4

How to use trace() & recover ()

Basic Idea

Basic Syntax

Remove Trace

Example 1 — Simple Trace

Example 2 — Insert Custom Message

Understanding tracer

Example 3 — Inspect Arguments

Example 4 — Trace Your Own Function

Example 5 — Trace Specific Position

The at Parameter

Common Use Cases

1. Debug Package Functions

2. Log Function Calls

3. Inspect Variables Mid-Execution

Difference Between trace() and debug()

Difference Between trace() and traceback()

Difference Between trace() and recover()

Real Workflow Example

Important Note

Best Practice

2. recover()

Assignment 5

1. lapply() — Returns a List

2. sapply() — Simplifies Output

Example — Producing a Vector

Example — Producing a Matrix

3. vapply() — Safer Version of sapply()

Syntax

Example — Numeric Vector

Example — Character Output

Difference Between sapply() and vapply()

Assignment 6

Sample Data

1. Custom Mean Function

Formula

Implementation

Test

2. Custom Median Function

Steps

Implementation

Test

3. Custom Mode Function

Mode

Implementation

Test

4. Custom Quantile Function

Quantiles

Formula

Implementation

Test

5. Using vapply() for Safer Computation

Test

6. Using lapply()

Example

Summary Table

Full Example

Understanding `tracer`

The `at` Parameter

Difference Between `trace()` and `debug()`

Difference Between `trace()` and `traceback()`

Difference Between `trace()` and `recover()`

1. `lapply()` — Returns a List

2. `sapply()` — Simplifies Output

3. `vapply()` — Safer Version of `sapply()`

Difference Between `sapply()` and `vapply()`

5. Using `vapply()` for Safer Computation

6. Using `lapply()`