This is The All Library I have used in this Assignment
library(haven)
library(readr)
library(DBI)
library(RSQLite)
library(readr)
library(pdftools)
## Using poppler version 26.01.0
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(magrittr)
This is How About to Import difference Dataset with Difference extensions
data_1 <- read.csv("C:/Users/ELOHOME/Documents/AUCA Doc/R Programming/energydata_complete.csv")
data_2 <- read_excel("C:/Users/ELOHOME/Documents/AUCA Doc/R Programming/sample1.xlsx")
data_3 <- pdf_text("C:/Users/ELOHOME/Documents/AUCA Doc/R Programming/sample1.pdf")
When you want to Display The Data we have Imported we can use
View(Variable_name)
This is How About to Import the Database
#This Is How to import Database
con <- dbConnect(RSQLite::SQLite(), "energy.db")
#This is How to Display The Data from Database
#dbGetQuery(con, "SELECT * FROM energy_readings")
#This is How to Count The Number of Rows Into the data
dbGetQuery(con, "SELECT COUNT(*) FROM energy_readings")
## COUNT(*)
## 1 19735
#This is How to Close Connection of Database
dbDisconnect(con)
This is How to Merging 2 to 3 Variables
df_a <- data.frame(
country = c("Rwanda", "Kenya", "Uganda", "Tanzania"),
year = c(2020, 2020, 2020, 2020),
gdp = c(10.3, 95.5, 27.5, 63.2)
)
df_b <- data.frame(
country = c("Rwanda", "Kenya", "Uganda", "Ethiopia"),
year = c(2020, 2020, 2020, 2020),
population = c(13.2, 54.0, 47.1, 120.3)
)
#Their is Difference Way we can make The Marging Their is
#merge
#inner_join
#left_join
#right_join
#full_join
merge(df_a, df_b, by = c("country", "year"))
## country year gdp population
## 1 Kenya 2020 95.5 54.0
## 2 Rwanda 2020 10.3 13.2
## 3 Uganda 2020 27.5 47.1
inner_join(df_a, df_b, by = c("country", "year"))
## country year gdp population
## 1 Rwanda 2020 10.3 13.2
## 2 Kenya 2020 95.5 54.0
## 3 Uganda 2020 27.5 47.1
merge(df_a, df_b, by = c("country", "year"), all.x = TRUE)
## country year gdp population
## 1 Kenya 2020 95.5 54.0
## 2 Rwanda 2020 10.3 13.2
## 3 Tanzania 2020 63.2 NA
## 4 Uganda 2020 27.5 47.1
left_join(df_a, df_b, by = c("country", "year"))
## country year gdp population
## 1 Rwanda 2020 10.3 13.2
## 2 Kenya 2020 95.5 54.0
## 3 Uganda 2020 27.5 47.1
## 4 Tanzania 2020 63.2 NA
merge(df_a, df_b, by = c("country", "year"), all.y = TRUE)
## country year gdp population
## 1 Ethiopia 2020 NA 120.3
## 2 Kenya 2020 95.5 54.0
## 3 Rwanda 2020 10.3 13.2
## 4 Uganda 2020 27.5 47.1
right_join(df_a, df_b, by = c("country", "year"))
## country year gdp population
## 1 Rwanda 2020 10.3 13.2
## 2 Kenya 2020 95.5 54.0
## 3 Uganda 2020 27.5 47.1
## 4 Ethiopia 2020 NA 120.3
merge(df_a, df_b, by = c("country", "year"), all = TRUE)
## country year gdp population
## 1 Ethiopia 2020 NA 120.3
## 2 Kenya 2020 95.5 54.0
## 3 Rwanda 2020 10.3 13.2
## 4 Tanzania 2020 63.2 NA
## 5 Uganda 2020 27.5 47.1
full_join(df_a, df_b, by = c("country", "year"))
## country year gdp population
## 1 Rwanda 2020 10.3 13.2
## 2 Kenya 2020 95.5 54.0
## 3 Uganda 2020 27.5 47.1
## 4 Tanzania 2020 63.2 NA
## 5 Ethiopia 2020 NA 120.3
This Question Is About How to Use Group_by and %>%
abc <- data.frame(
country = c("Rwanda", "Rwanda", "Uganda", "Tanzania"),
year = c(2020, 2020, 2020, 2020),
gdp = c(10.3, 95.5, 27.5, 63.2)
)
# This %>% We can Use it When you want to Count the Row of Row You have in your Table
abc %>% tally()
## n
## 1 4
# This is How we Group the Data with same Variable like here i use Country
abc %>%
group_by(country) %>%
summarise(
average_gdp = mean(gdp),
max_gdp = max(gdp),
min_gdp = min(gdp)
)
## # A tibble: 3 × 4
## country average_gdp max_gdp min_gdp
## <chr> <dbl> <dbl> <dbl>
## 1 Rwanda 52.9 95.5 10.3
## 2 Tanzania 63.2 63.2 63.2
## 3 Uganda 27.5 27.5 27.5
This Question Is how to use Trace() and Recover() in R Programming
# This is The Example of How to use Trace and Untrace
trace(sum, tracer = quote(cat("sum was called!\n")))
## Tracing function "sum" in package "base"
## [1] "sum"
sum(1:5)
## Tracing sum(1:5) on entry
## sum was called!
## [1] 15
untrace(sum)
## Untracing function "sum" in package "base"
# This is The Example of How to use Recover
options(error = recover)
add_one <- function(x) x + 1
double <- function(x) add_one(x) * 2
double("a") # "a" causes a crash
## Error in x + 1: non-numeric argument to binary operator
options(error = NULL)
This Question is About to Create My Own Function That can Calculate Summary Statistics
# ── Mean ──────────────────────────────────────────────
my_mean <- function(data) {
sum(data) / length(data)
}
# ── Median ────────────────────────────────────────────
my_median <- function(data) {
sorted_data <- sort(data)
n <- length(sorted_data)
mid <- floor(n / 2)
if (n %% 2 == 0) {
return((sorted_data[mid] + sorted_data[mid + 1]) / 2)
} else {
return(sorted_data[mid + 1])
}
}
# ── Minimum ───────────────────────────────────────────
my_min <- function(data) {
minimum <- data[1]
for (x in data) {
if (x < minimum) minimum <- x
}
minimum
}
# ── Maximum ───────────────────────────────────────────
my_max <- function(data) {
maximum <- data[1]
for (x in data) {
if (x > maximum) maximum <- x
}
maximum
}
# ── Standard Deviation (Population) ──────────────────
my_sd <- function(data) {
mean_val <- my_mean(data)
variance <- sum((data - mean_val)^2) / length(data)
sqrt(variance)
}
# ── All-in-one Summary Function ───────────────────────
summary_statistics <- function(data) {
cat("================================\n")
cat(" SUMMARY STATISTICS \n")
cat("================================\n")
cat(sprintf("Mean: %.4f\n", my_mean(data)))
cat(sprintf("Median: %.4f\n", my_median(data)))
cat(sprintf("Minimum: %.4f\n", my_min(data)))
cat(sprintf("Maximum: %.4f\n", my_max(data)))
# cat(sprintf("Standard Deviation: %.4f\n", my_sd(data)))
cat("================================\n")
}
# ── Test it ───────────────────────────────────────────
data <- c(12, 7, 3, 14, 6, 11, 5, 4, 13, 8)
summary_statistics(data)
## ================================
## SUMMARY STATISTICS
## ================================
## Mean: 8.3000
## Median: 7.5000
## Minimum: 3.0000
## Maximum: 14.0000
## ================================
These functions are used for iteration and functional programming in R.
numbers <- list(1, 2, 3, 4)
result <- lapply(numbers, function(x) x * 2)
print(result)
## [[1]]
## [1] 2
##
## [[2]]
## [1] 4
##
## [[3]]
## [1] 6
##
## [[4]]
## [1] 8
sapply() simplifies the result into vectors, matrices, or arrays whenever possible.
numbers <- list(1, 2, 3, 4)
result <- sapply(numbers, function(x) x * 2)
print(result)
## [1] 2 4 6 8
numbers <- list(1, 2, 3)
result <- sapply(numbers, function(x) c(x, x^2))
print(result)
## [,1] [,2] [,3]
## [1,] 1 2 3
## [2,] 1 4 9
vapply() requires specifying the expected output type.
vapply(X, FUN, FUN.VALUE)
numbers <- list(1, 2, 3, 4)
result <- vapply(
numbers,
function(x) x * 2,
numeric(1)
)
print(result)
## [1] 2 4 6 8
#### Example — Character Output
words <- list("cat", "dog", "fish")
result <- vapply(
words,
toupper,
character(1)
)
print(result)
## [1] "CAT" "DOG" "FISH"