All class assignment in R

Using Built-in Datasets

library(readr)
data("airquality")
View(airquality)
variable.names(airquality)

## [1] "Ozone"   "Solar.R" "Wind"    "Temp"    "Month"   "Day"

str(airquality)

## 'data.frame':    153 obs. of  6 variables:
##  $ Ozone  : int  41 36 12 18 NA 28 23 19 8 NA ...
##  $ Solar.R: int  190 118 149 313 NA NA 299 99 19 194 ...
##  $ Wind   : num  7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
##  $ Temp   : int  67 72 74 62 56 66 65 59 61 69 ...
##  $ Month  : int  5 5 5 5 5 5 5 5 5 5 ...
##  $ Day    : int  1 2 3 4 5 6 7 8 9 10 ...

###IMPORTING DATASETS FROM DBMS

library(DBI)
library(RMySQL)

#Create connection

con<-dbConnect(MySQL(),
               user="root",
               password="",
               dbname="r_testing",
               host="127.0.0.1")
datatable<-dbReadTable(con,"students")
dbIsValid(con)

## [1] TRUE

dbListTables(con)

## [1] "students"

View(datatable)
apply(datatable[,c("ages","score")],2,mean)

##  ages score 
## 21.75 16.25

##Importing datasets from kaggle

ai_student_impact_dataset_1_ <- read_csv("ai_student_impact_dataset (1).csv")

## Rows: 50000 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Major_Category, Year_of_Study, Primary_Use_Case, Prompt_Engineering...
## dbl (9): Student_ID, Pre_Semester_GPA, Weekly_GenAI_Hours, Tool_Diversity, T...
## lgl (1): Paid_Subscription
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

View(ai_student_impact_dataset_1_)

##===assignmt 2 Marging datasets using 2&3 variables===##

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.1     ✔ purrr     1.2.2
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.3     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

CO2 <- read.csv("~/myprojectstudy/CO2_emission.csv")
View(CO2)
population <- read.csv("~/myprojectstudy/world_population.csv")
View(population)

variable.names(CO2)

##  [1] "Country.Name"   "country_code"   "Region"         "Indicator.Name"
##  [5] "X1990"          "X1991"          "X1992"          "X1993"         
##  [9] "X1994"          "X1995"          "X1996"          "X1997"         
## [13] "X1998"          "X1999"          "X2000"          "X2001"         
## [17] "X2002"          "X2003"          "X2004"          "X2005"         
## [21] "X2006"          "X2007"          "X2008"          "X2009"         
## [25] "X2010"          "X2011"          "X2012"          "X2013"         
## [29] "X2014"          "X2015"          "X2016"          "X2017"         
## [33] "X2018"          "X2019"          "X2019.1"

variable.names(population)

##  [1] "Rank"                        "CCA3"                       
##  [3] "Country.Territory"           "Capital"                    
##  [5] "Continent"                   "X2022.Population"           
##  [7] "X2020.Population"            "X2015.Population"           
##  [9] "X2010.Population"            "X2000.Population"           
## [11] "X1990.Population"            "X1980.Population"           
## [13] "X1970.Population"            "Area..km.."                 
## [15] "Density..per.km.."           "Growth.Rate"                
## [17] "World.Population.Percentage"

#MERGING

merged <- merge(
  population,
  CO2,
  by.x = c("Continent", "Country.Territory"),
  by.y = c("Region", "Country.Name")
)

colnames(merged)

##  [1] "Continent"                   "Country.Territory"          
##  [3] "Rank"                        "CCA3"                       
##  [5] "Capital"                     "X2022.Population"           
##  [7] "X2020.Population"            "X2015.Population"           
##  [9] "X2010.Population"            "X2000.Population"           
## [11] "X1990.Population"            "X1980.Population"           
## [13] "X1970.Population"            "Area..km.."                 
## [15] "Density..per.km.."           "Growth.Rate"                
## [17] "World.Population.Percentage" "country_code"               
## [19] "Indicator.Name"              "X1990"                      
## [21] "X1991"                       "X1992"                      
## [23] "X1993"                       "X1994"                      
## [25] "X1995"                       "X1996"                      
## [27] "X1997"                       "X1998"                      
## [29] "X1999"                       "X2000"                      
## [31] "X2001"                       "X2002"                      
## [33] "X2003"                       "X2004"                      
## [35] "X2005"                       "X2006"                      
## [37] "X2007"                       "X2008"                      
## [39] "X2009"                       "X2010"                      
## [41] "X2011"                       "X2012"                      
## [43] "X2013"                       "X2014"                      
## [45] "X2015"                       "X2016"                      
## [47] "X2017"                       "X2018"                      
## [49] "X2019"                       "X2019.1"

head(merged)

##       Continent Country.Territory Rank CCA3          Capital X2022.Population
## 1 North America           Bermuda  206  BMU         Hamilton            64184
## 2 North America            Canada   39  CAN           Ottawa         38454327
## 3 North America     United States    3  USA Washington, D.C.        338289857
##   X2020.Population X2015.Population X2010.Population X2000.Population
## 1            64031            63144            63447            61371
## 2         37888705         35732126         33963412         30683313
## 3        335942003        324607776        311182845        282398554
##   X1990.Population X1980.Population X1970.Population Area..km..
## 1            57470            53565            52019         54
## 2         27657204         24511510         21434577    9984670
## 3        248083732        223140018        200328340    9372610
##   Density..per.km.. Growth.Rate World.Population.Percentage country_code
## 1         1188.5926      1.0000                        0.00          BMU
## 2            3.8513      1.0078                        0.48          CAN
## 3           36.0935      1.0038                        4.24          USA
##                           Indicator.Name    X1990    X1991    X1992    X1993
## 1 CO2 emissions (metric tons per capita)       NA       NA       NA       NA
## 2 CO2 emissions (metric tons per capita) 15.14889 14.74101 15.02823 14.71339
## 3 CO2 emissions (metric tons per capita) 19.40735 19.00340 19.02286 19.21833
##      X1994    X1995    X1996    X1997    X1998    X1999    X2000    X2001
## 1       NA       NA       NA       NA       NA       NA       NA       NA
## 2 15.06035 15.29060 15.59259 15.94399 16.07651 16.25852 16.75763 16.33157
## 3 19.25621 19.21691 19.57538 20.33086 20.26630 20.10113 20.46981 20.17153
##      X2002   X2003    X2004    X2005    X2006    X2007    X2008    X2009
## 1       NA      NA       NA       NA       NA       NA       NA       NA
## 2 16.72030 17.2083 16.79427 17.02749 16.59535 17.38057 16.55692 15.50215
## 3 19.44555 19.5065 19.59763 19.46926 18.94591 19.04291 18.27849 16.80870
##      X2010    X2011    X2012    X2013    X2014    X2015    X2016    X2017
## 1       NA       NA       NA       NA       NA       NA       NA       NA
## 2 15.79214 15.99594 15.73447 15.83846 15.84991 15.64859 15.42060 15.54457
## 3 17.43174 16.60422 15.78978 16.11118 16.04092 15.56003 15.14989 14.82326
##      X2018    X2019  X2019.1
## 1       NA       NA       NA
## 2 15.65058 15.43061 15.43061
## 3 15.22255 14.67341 14.67341

##=====assignmt 3.Use of group-by and %>% =====##

#Instead of nesting functions inside each other, the pipe lets you write steps from left to right, top to bottom — the way you read. Each step receives the output of the previous step as its input data,Without pipe — hard to read, inside-out logic

#but group_by it tells every function that comes after it: “apply yourself within each group separately, not across the whole table.” By itself it does nothing useful — its power comes from what follows it

merged %>%
  group_by(Continent) %>%
  summarise(
    avg_CO2     = mean(X2019, na.rm = TRUE),
    total_pop     = sum(X2022.Population, na.rm = TRUE),
    country_count = n()
  ) %>%
  arrange(desc(avg_CO2))

## # A tibble: 1 × 4
##   Continent     avg_CO2 total_pop country_count
##   <chr>           <dbl>     <int>         <int>
## 1 North America    15.1 376808368             3

##====assignmt 4.using trace and recover debugging====##

Trace:Injects code into a function without editing it. Runs your injected code every time that function is called. Recover:When an error occurs, drops you into any frame of the call stack so you can inspect variables at any level

# A simple function
add_numbers <- function(x, y) {
  x + y
}

# Inject a print at the start of add_numbers
trace("add_numbers", quote(cat("Called with x =", x, "y =", y, "\n")))

## [1] "add_numbers"

add_numbers(3, 5)

## Tracing add_numbers(3, 5) on entry 
## Called with x = 3 y = 5

## [1] 8

add_numbers(10, -2)

## Tracing add_numbers(10, -2) on entry 
## Called with x = 10 y = -2

## [1] 8

 # When an error occurs, instead of just printing the error message and stopping, recover() pauses execution and shows you the entire call stack. You pick any frame to enter and inspect variables there — like rewinding time to any point before the crash 

# Three nested functions
outer_fn <- function(x) {
  result <- middle_fn(x + 1)
  result * 2
}

middle_fn <- function(x) {
  result <- inner_fn(x)
  result + 5
}

inner_fn <- function(x) {
  log("not a number")   # BUG: passing a string to log()
}
# Turn on: errors will now open the call stack browser
#options(error = recover)
options(error = recover)


#Error: unexpected symbol in:
#"outer_fn(3)
#options"

# Turn off: back to normal error messages
options(error = NULL)

##====assignmt 5.creating a function to define a mean=====##

data_mean <- function(x) {
  
  # Input validation
  if (!is.numeric(x)) {
    stop("Input must be a numeric vector")
  }
  if (length(x) == 0) {
    stop("Input vector cannot be empty")
  }
  
  total  <- sum(x)        
  count  <- length(x)     
  result <- total / count 
  
  return(result)
}

data_mean(population$X2020.Population)

## [1] 33501071

##=====6. sapply() and vapply(), map() and mapply()====##

#It applies a function to each element of a vector, list, or data frame and tries to simplify the output into a vector or matrix.

vapply(): means vector apply.

Applies a function to each element. Checks whether the returned result matches the specified type. Produces an error if types differ

map(): is from the purrr package in the tidyverse.

It applies a function to each element of a list or vector.

mapply() means multivariate apply.

It applies a function simultaneously to multiple vectors or lists

#sapply

  years <- c( "X2015", "X2016","X2017","X2018","X2019")

sapply(CO2[years], mean, na.rm = TRUE)

##    X2015    X2016    X2017    X2018    X2019 
## 4.184129 4.195432 4.199802 4.164970 4.115138

# vapply

# Load purrr
library(purrr)

# map


map_dbl(CO2[years], max, na.rm = TRUE)

##    X2015    X2016    X2017    X2018    X2019 
## 33.04351 32.74589 32.12799 31.06753 32.47447

# mapply


mapply(sum,
       CO2$`X2018`,
       CO2$`X2019`,
       na.rm = TRUE)

##   [1]  0.00000000  0.32311967  1.56981200  3.47498727 12.84419283 37.72024134
##   [7]  7.71642226  4.02051226  0.00000000 10.65141659 30.73179640 14.42704817
##  [13]  6.83523018  0.12418554 16.29192254  1.26464186  0.48248440  1.07364246
##  [19] 11.43289089 39.89730848 14.55241272 13.18210069 12.36774508  3.28415374
##  [25]  0.00000000  3.98652793  4.12966552  8.68106588 33.70955495  2.76781910
##  [31]  6.39013398  0.09986644 31.08119187  8.76103245  9.44545665 15.09326886
##  [37]  0.82760395  0.74473508  0.07409836  2.58683519  3.19239852  0.73651380
##  [43]  2.30376590  3.30665804  4.64191032  0.00000000  0.00000000 12.05296990
##  [49] 18.51485436 16.44781711  0.84854235  4.74086410 10.82633206  4.91726429
##  [55]  7.89775954  4.61098725  5.00286636  0.49781308 10.58392239 19.26098157
##  [61]  0.32165360 15.45549455  3.67650907  9.04464425  0.00000000  3.09080248
##  [67]  4.83240450 10.64826267  5.28568376  1.28636184  0.00000000  0.61135973
##  [73]  0.48826906  0.34252300  8.85775417 11.65420747  5.81764448  0.00000000
##  [79]  2.28366926  0.00000000  6.96563822  0.00000000  2.09686039  8.08294250
##  [85]  0.59054706  9.49316039  4.44599474  0.00000000  3.61031662 14.98246700
##  [91] 15.39088811  8.68560708  9.36811573 13.79580413 10.68818637  5.77943731
##  [97]  4.92482729 17.34266164 23.30825373  0.76410546  3.33591826  1.87864863
## [103]  1.54217441  9.49933649 24.02457120 44.11162679  5.35130983  8.12826444
## [109]  0.47348308 16.85859831  5.86797793  7.63746998  2.09159859  0.70855352
## [115]  8.35649858 30.63663413  7.99762404  0.00000000  0.00000000  3.76285984
## [121]  0.00000000  6.47797987  0.28032546  8.04605945  7.10799519  6.14325194
## [127]  7.51274743  0.58329238  6.49156175  1.29274132  8.19770229 13.95076821
## [133]  0.00000000  0.48261150  1.71079914  6.55863141  0.15389818 15.67927485
## [139]  3.41943488  0.00000000  0.17911969  1.13465816  1.59089814 17.22092463
## [145] 13.79881264  1.01078833 11.19316554 13.43368738 30.93630825  1.76255283
## [151]  5.54995831  3.45210191  2.64430891 27.84602029  1.74213133 15.98725756
## [157]  0.00000000  4.16114118  9.15315396  2.38306702  0.00000000  0.00000000
## [163] 63.54200206  7.67848605 23.29377418  0.21019163 30.75096950  0.98065155
## [169]  1.29377533 16.71330846  1.08888035  0.22760569  2.26926254  0.00000000
## [175]  0.08865660 13.22246498  0.29489586  1.36092522  8.39514600 11.75705173
## [181] 13.29654826  6.94402966  1.63700324  0.00000000 12.24248805  3.12253951
## [187]  0.00000000  0.28258706  0.58604045  7.53846236  1.96437611 24.74860902
## [193]  0.96054259  3.08154699 25.11568562  5.14204105  9.75694376  1.72718813
## [199]  0.42772795  0.26951408  8.09636661  3.77082691 29.89595826  6.88826626
## [205]  4.61935915  8.51357769  0.00000000  0.00000000  6.47745010  1.41779483
## [211]  3.00075030  0.72170176 15.02341470  0.82620618  1.66038979

Conclusion

These functions are important in R programming because they:

simplify repetitive tasks, improve efficiency, reduce manual loops, support data analysis and research.

In CO₂ emission analysis, they help calculate:

averages, trends, comparisons, growth rates, summary statistics quickly and efficiently.

All class assignment in R

UWIKUZO Theopiste 20251MBI003

2026-05-24

Using Built-in Datasets