All_assignment.knit

This is The All Library I have used in this Assignment

library(haven)
library(readr)
library(DBI)
library(RSQLite)
library(readr)
library(pdftools)

## Using poppler version 26.01.0

library(readxl)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(magrittr)

ASSIGNMENT 1

This is How About to Import difference Dataset with Difference extensions

data_1 <- read.csv("C:/Users/ELOHOME/Documents/AUCA Doc/R Programming/energydata_complete.csv")
data_2 <- read_excel("C:/Users/ELOHOME/Documents/AUCA Doc/R Programming/sample1.xlsx")
data_3 <- pdf_text("C:/Users/ELOHOME/Documents/AUCA Doc/R Programming/sample1.pdf")

#When you want to Display The Data we have Imported we can use

#View(data_1)
#View(data_2)
#View(data_3)

This is How About to Import the Database

#This Is How to import Database
con <- dbConnect(RSQLite::SQLite(), "energy.db")

#This is How to Display The Data from Database
#dbGetQuery(con, "SELECT * FROM energy_readings")

#This is How to Count The Number of Rows Into the data
dbGetQuery(con, "SELECT COUNT(*) FROM energy_readings")

##   COUNT(*)
## 1    19735

#This is How to Close Connection of Database
dbDisconnect(con)

ASSIGNMENT 2

This is How to Merging 2 to 3 Variables

df_a <- data.frame(
   country = c("Rwanda", "Kenya", "Uganda", "Tanzania"),
   year    = c(2020, 2020, 2020, 2020),
   gdp     = c(10.3, 95.5, 27.5, 63.2)
 )
 
df_b <- data.frame(
   country    = c("Rwanda", "Kenya", "Uganda", "Ethiopia"),
   year       = c(2020, 2020, 2020, 2020),
   population = c(13.2, 54.0, 47.1, 120.3)
 )

#Their is Difference Way we can make The Marging Their is

#merge
#inner_join
#left_join
#right_join
#full_join

 merge(df_a, df_b, by = c("country", "year"))

##   country year  gdp population
## 1   Kenya 2020 95.5       54.0
## 2  Rwanda 2020 10.3       13.2
## 3  Uganda 2020 27.5       47.1

 inner_join(df_a, df_b, by = c("country", "year"))

##   country year  gdp population
## 1  Rwanda 2020 10.3       13.2
## 2   Kenya 2020 95.5       54.0
## 3  Uganda 2020 27.5       47.1

 merge(df_a, df_b, by = c("country", "year"), all.x = TRUE)

##    country year  gdp population
## 1    Kenya 2020 95.5       54.0
## 2   Rwanda 2020 10.3       13.2
## 3 Tanzania 2020 63.2         NA
## 4   Uganda 2020 27.5       47.1

 left_join(df_a, df_b, by = c("country", "year"))

##    country year  gdp population
## 1   Rwanda 2020 10.3       13.2
## 2    Kenya 2020 95.5       54.0
## 3   Uganda 2020 27.5       47.1
## 4 Tanzania 2020 63.2         NA

 merge(df_a, df_b, by = c("country", "year"), all.y = TRUE)

##    country year  gdp population
## 1 Ethiopia 2020   NA      120.3
## 2    Kenya 2020 95.5       54.0
## 3   Rwanda 2020 10.3       13.2
## 4   Uganda 2020 27.5       47.1

 right_join(df_a, df_b, by = c("country", "year"))

##    country year  gdp population
## 1   Rwanda 2020 10.3       13.2
## 2    Kenya 2020 95.5       54.0
## 3   Uganda 2020 27.5       47.1
## 4 Ethiopia 2020   NA      120.3

 merge(df_a, df_b, by = c("country", "year"), all = TRUE)

##    country year  gdp population
## 1 Ethiopia 2020   NA      120.3
## 2    Kenya 2020 95.5       54.0
## 3   Rwanda 2020 10.3       13.2
## 4 Tanzania 2020 63.2         NA
## 5   Uganda 2020 27.5       47.1

 full_join(df_a, df_b, by = c("country", "year"))

##    country year  gdp population
## 1   Rwanda 2020 10.3       13.2
## 2    Kenya 2020 95.5       54.0
## 3   Uganda 2020 27.5       47.1
## 4 Tanzania 2020 63.2         NA
## 5 Ethiopia 2020   NA      120.3

ASSIGNMENT 3

This Question Is About How to Use Group_by and %>%

abc <- data.frame(
   country = c("Rwanda", "Rwanda", "Uganda", "Tanzania"),
   year    = c(2020, 2020, 2020, 2020),
   gdp     = c(10.3, 95.5, 27.5, 63.2)
 )

# This %>% We can Use it When you want to Count the Row of Row You have in your Table

abc %>% tally()

##   n
## 1 4

# This is How we Group the Data with same Variable like here i use Country
abc %>%
  group_by(country) %>%
  summarise(
    average_gdp = mean(gdp),
    max_gdp = max(gdp),
    min_gdp = min(gdp)
  )

## # A tibble: 3 × 4
##   country  average_gdp max_gdp min_gdp
##   <chr>          <dbl>   <dbl>   <dbl>
## 1 Rwanda          52.9    95.5    10.3
## 2 Tanzania        63.2    63.2    63.2
## 3 Uganda          27.5    27.5    27.5

ASSIGNMENT 4

This Question Is how to use Trace() and Recover() in R Programming

# This is The Example of How to use Trace and Untrace
trace(sum, tracer = quote(cat("sum was called!\n")))

## Tracing function "sum" in package "base"

## [1] "sum"

sum(1:5)

## Tracing sum(1:5) on entry 
## sum was called!

## [1] 15

untrace(sum)

## Untracing function "sum" in package "base"

# This is The Example of How to use Recover
options(error = recover)
add_one <- function(x) x + 1
double  <- function(x) add_one(x) * 2

double("a")   # "a" causes a crash

## Error in x + 1: non-numeric argument to binary operator

options(error = NULL)

ASSIGNMENT 5

This Question is About to Create My Own Function That can Calculate Summary Statistics

# ── Mean ──────────────────────────────────────────────
my_mean <- function(data) {
  sum(data) / length(data)
}

# ── Median ────────────────────────────────────────────
my_median <- function(data) {
  sorted_data <- sort(data)
  n <- length(sorted_data)
  mid <- floor(n / 2)
  
  if (n %% 2 == 0) {
    return((sorted_data[mid] + sorted_data[mid + 1]) / 2)
  } else {
    return(sorted_data[mid + 1])
  }
}

# ── Minimum ───────────────────────────────────────────
my_min <- function(data) {
  minimum <- data[1]
  for (x in data) {
    if (x < minimum) minimum <- x
  }
  minimum
}

# ── Maximum ───────────────────────────────────────────
my_max <- function(data) {
  maximum <- data[1]
  for (x in data) {
    if (x > maximum) maximum <- x
  }
  maximum
}

# ── Standard Deviation (Population) ──────────────────
my_sd <- function(data) {
  mean_val <- my_mean(data)
  variance <- sum((data - mean_val)^2) / length(data)
  sqrt(variance)
}

# ── All-in-one Summary Function ───────────────────────
summary_statistics <- function(data) {
  cat("================================\n")
  cat("       SUMMARY STATISTICS       \n")
  cat("================================\n")
  cat(sprintf("Mean:               %.4f\n", my_mean(data)))
  cat(sprintf("Median:             %.4f\n", my_median(data)))
  cat(sprintf("Minimum:            %.4f\n", my_min(data)))
  cat(sprintf("Maximum:            %.4f\n", my_max(data)))
  # cat(sprintf("Standard Deviation: %.4f\n", my_sd(data)))
  cat("================================\n")
}

# ── Test it ───────────────────────────────────────────
data <- c(12, 7, 3, 14, 6, 11, 5, 4, 13, 8)
summary_statistics(data)

## ================================
##        SUMMARY STATISTICS       
## ================================
## Mean:               8.3000
## Median:             7.5000
## Minimum:            3.0000
## Maximum:            14.0000
## ================================

ASSIGNMENT 6

These functions are used for iteration and functional programming in R.

lapply()
sapply()
vapply()

1. lapply() — Returns a List

numbers <- list(1, 2, 3, 4)

result <- lapply(numbers, function(x) x * 2)

print(result)

## [[1]]
## [1] 2
## 
## [[2]]
## [1] 4
## 
## [[3]]
## [1] 6
## 
## [[4]]
## [1] 8

2. sapply() — Simplifies Output

sapply() simplifies the result into vectors, matrices, or arrays whenever possible.

Example — Producing a Vector

numbers <- list(1, 2, 3, 4)

result <- sapply(numbers, function(x) x * 2)

print(result)

## [1] 2 4 6 8

Example — Producing a Matrix

numbers <- list(1, 2, 3)

result <- sapply(numbers, function(x) c(x, x^2))

print(result)

##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    1    4    9

3. vapply() — Safer Version of sapply()

vapply() requires specifying the expected output type.

Syntax

vapply(X, FUN, FUN.VALUE)

Example — Numeric Vector

numbers <- list(1, 2, 3, 4)

result <- vapply(
  numbers,
  function(x) x * 2,
  numeric(1)
)

print(result)

## [1] 2 4 6 8

#### Example — Character Output

words <- list("cat", "dog", "fish")

result <- vapply(
  words,
  toupper,
  character(1)
)

print(result)

## [1] "CAT"  "DOG"  "FISH"

ADVENTIST UNIVERSITY OF CENTRAL AFRICA

MASTER OF IT IN BIGDATA ANALYTICS

SHEMA DELPHIN

20251MBI056

ALL ASSIGNMENT OF R PROGRAMMING

Delphin

2026-05-24