ASSIGNMENT OF R PROGRAMMING FOR DATA SCIENCE

Q1. Importing Data from Different Sources in R

#loading all libraries that will be required
library(readxl)
library(haven)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)

#A.Import Data from Excel

data <- read_excel("D:/AUCA Notes/R programming/datsets/power consumption.xlsx")
head(data)

## # A tibble: 6 × 3
##   Date             operator `Meter Reading(in kwh)`
##   <chr>            <chr>                      <dbl>
## 1 11/2/2026@12h57  Emmanuel                 111236.
## 2 12/02/2026@12h58 Emmanuel                 111506.
## 3 13/02/2026@13:04 Emmanuel                 111761 
## 4 16/02/2026@13:11 Emmanuel                 112628.
## 5 17/02/2026@09:38 Emmanuel                 112889.
## 6 24/02/2026@12:27 Emmanuel                 115056.

#B.Import Data from Statistical Packages (SPSS, SAS, Stata)

#data <- read_sav("data.sav") # read data from the spss
##data <- read_sas("data.sas7bdat") # read sas file
#C.Import Data from Text Files
#1.Importing CSV file
data <- read.csv("D:/AUCA Notes/R programming/datsets/world_population.csv")
head(data)

##   Rank CCA3 Country.Territory          Capital Continent X2022.Population
## 1   36  AFG       Afghanistan            Kabul      Asia         41128771
## 2  138  ALB           Albania           Tirana    Europe          2842321
## 3   34  DZA           Algeria          Algiers    Africa         44903225
## 4  213  ASM    American Samoa        Pago Pago   Oceania            44273
## 5  203  AND           Andorra Andorra la Vella    Europe            79824
## 6   42  AGO            Angola           Luanda    Africa         35588987
##   X2020.Population X2015.Population X2010.Population X2000.Population
## 1         38972230         33753499         28189672         19542982
## 2          2866849          2882481          2913399          3182021
## 3         43451666         39543154         35856344         30774621
## 4            46189            51368            54849            58230
## 5            77700            71746            71519            66097
## 6         33428485         28127721         23364185         16394062
##   X1990.Population X1980.Population X1970.Population Area..km..
## 1         10694796         12486631         10752971     652230
## 2          3295066          2941651          2324731      28748
## 3         25518074         18739378         13795915    2381741
## 4            47818            32886            27075        199
## 5            53569            35611            19860        468
## 6         11828638          8330047          6029700    1246700
##   Density..per.km.. Growth.Rate World.Population.Percentage
## 1           63.0587      1.0257                        0.52
## 2           98.8702      0.9957                        0.04
## 3           18.8531      1.0164                        0.56
## 4          222.4774      0.9831                        0.00
## 5          170.5641      1.0100                        0.00
## 6           28.5466      1.0315                        0.45

#importing TEXT file
data <- read.table("D:/AUCA Notes/R programming/datsets/assignment.txt", header = TRUE)
head(data)

##   X10.03.2026.12.12 Emmanuel X119230.7
## 1  11/03/2026@17:15 Emmanuel  119548.2
## 2  16/03/2026@12:23 Emmanuel  120885.9
## 3  17/03/2026@12:13 Emmanuel  121165.1
## 4  18/03/2026@12:54 Emmanuel  121449.8
## 5  19/03/2026@12:36 Emmanuel  121708.2
## 6  23/03/2026@12:09 Emmanuel  122785.1

Q2.A. Merging Data sets by 2 to 3 Variables

#creating dataset 1
data1 <- data.frame(
  ID = c(1,2,3),
  Name = c("emmy","peter","kevin"),
  Age = c(20,19,22)
)
#creating dataset 2
data2 <- data.frame(
  ID = c(1,2,3),
  Name = c("emmy","peter","kevin"),
  Marks = c(80,90,70)
)
#melging data1 and data2 by two variable(ID and name)
merged_data <- merge(data1, data2,by = c("ID","Name")) 
head(merged_data)

##   ID  Name Age Marks
## 1  1  emmy  20    80
## 2  2 peter  19    90
## 3  3 kevin  22    70

Q2.B Merging datasets by 3 Variables(ID,Name and Age)

#creating dataset 1
data1 <- data.frame(
  ID = c(1,2,3),
  Name = c("emmy","peter","kevine"),
  Age = c(20,21,22),
  Gender = c("M","M","F")
)
#creating dataset 2
data2 <- data.frame(
  ID = c(1,2,3),
  Name = c("emmy","peter","kevine"),
  Age = c(20,21,22),
  Marks = c(80,90,70)
)
merged_data <- merge(data1, data2,by = c("ID","Name","Age")) #melging datasets
head(merged_data)

##   ID   Name Age Gender Marks
## 1  1   emmy  20      M    80
## 2  2  peter  21      M    90
## 3  3 kevine  22      F    70

Q3.show how to use group by,$ and %>%`

# Reading dataset
co2 <- read.csv("D:/AUCA Notes/R programming/datsets/CO2_emission.csv")

# Using $ to select and view only one column called (Country.Name)
co2$Country.Name

##   [1] "Aruba"                          "Afghanistan"                   
##   [3] "Angola"                         "Albania"                       
##   [5] "Andorra"                        "United Arab Emirates"          
##   [7] "Argentina"                      "Armenia"                       
##   [9] "American Samoa"                 "Antigua and Barbuda"           
##  [11] "Australia"                      "Austria"                       
##  [13] "Azerbaijan"                     "Burundi"                       
##  [15] "Belgium"                        "Benin"                         
##  [17] "Burkina Faso"                   "Bangladesh"                    
##  [19] "Bulgaria"                       "Bahrain"                       
##  [21] "Bahamas, The"                   "Bosnia and Herzegovina"        
##  [23] "Belarus"                        "Belize"                        
##  [25] "Bermuda"                        "Bolivia"                       
##  [27] "Brazil"                         "Barbados"                      
##  [29] "Brunei Darussalam"              "Bhutan"                        
##  [31] "Botswana"                       "Central African Republic"      
##  [33] "Canada"                         "Switzerland"                   
##  [35] "Chile"                          "China"                         
##  [37] "Cote d'Ivoire"                  "Cameroon"                      
##  [39] "Congo, Dem. Rep."               "Congo, Rep."                   
##  [41] "Colombia"                       "Comoros"                       
##  [43] "Cabo Verde"                     "Costa Rica"                    
##  [45] "Cuba"                           "Curacao"                       
##  [47] "Cayman Islands"                 "Cyprus"                        
##  [49] "Czech Republic"                 "Germany"                       
##  [51] "Djibouti"                       "Dominica"                      
##  [53] "Denmark"                        "Dominican Republic"            
##  [55] "Algeria"                        "Ecuador"                       
##  [57] "Egypt, Arab Rep."               "Eritrea"                       
##  [59] "Spain"                          "Estonia"                       
##  [61] "Ethiopia"                       "Finland"                       
##  [63] "Fiji"                           "France"                        
##  [65] "Faroe Islands"                  "Micronesia, Fed. Sts."         
##  [67] "Gabon"                          "United Kingdom"                
##  [69] "Georgia"                        "Ghana"                         
##  [71] "Gibraltar"                      "Guinea"                        
##  [73] "Gambia, The"                    "Guinea-Bissau"                 
##  [75] "Equatorial Guinea"              "Greece"                        
##  [77] "Grenada"                        "Greenland"                     
##  [79] "Guatemala"                      "Guam"                          
##  [81] "Guyana"                         "Hong Kong SAR, China"          
##  [83] "Honduras"                       "Croatia"                       
##  [85] "Haiti"                          "Hungary"                       
##  [87] "Indonesia"                      "Isle of Man"                   
##  [89] "India"                          "Ireland"                       
##  [91] "Iran, Islamic Rep."             "Iraq"                          
##  [93] "Iceland"                        "Israel"                        
##  [95] "Italy"                          "Jamaica"                       
##  [97] "Jordan"                         "Japan"                         
##  [99] "Kazakhstan"                     "Kenya"                         
## [101] "Kyrgyz Republic"                "Cambodia"                      
## [103] "Kiribati"                       "St. Kitts and Nevis"           
## [105] "Korea, Rep."                    "Kuwait"                        
## [107] "Lao PDR"                        "Lebanon"                       
## [109] "Liberia"                        "Libya"                         
## [111] "St. Lucia"                      "Liechtenstein"                 
## [113] "Sri Lanka"                      "Lesotho"                       
## [115] "Lithuania"                      "Luxembourg"                    
## [117] "Latvia"                         "Macao SAR, China"              
## [119] "St. Martin (French part)"       "Morocco"                       
## [121] "Monaco"                         "Moldova"                       
## [123] "Madagascar"                     "Maldives"                      
## [125] "Mexico"                         "Marshall Islands"              
## [127] "North Macedonia"                "Mali"                          
## [129] "Malta"                          "Myanmar"                       
## [131] "Montenegro"                     "Mongolia"                      
## [133] "Northern Mariana Islands"       "Mozambique"                    
## [135] "Mauritania"                     "Mauritius"                     
## [137] "Malawi"                         "Malaysia"                      
## [139] "Namibia"                        "New Caledonia"                 
## [141] "Niger"                          "Nigeria"                       
## [143] "Nicaragua"                      "Netherlands"                   
## [145] "Norway"                         "Nepal"                         
## [147] "Nauru"                          "New Zealand"                   
## [149] "Oman"                           "Pakistan"                      
## [151] "Panama"                         "Peru"                          
## [153] "Philippines"                    "Palau"                         
## [155] "Papua New Guinea"               "Poland"                        
## [157] "Puerto Rico"                    "Korea, Dem. People's Rep."     
## [159] "Portugal"                       "Paraguay"                      
## [161] "West Bank and Gaza"             "French Polynesia"              
## [163] "Qatar"                          "Romania"                       
## [165] "Russian Federation"             "Rwanda"                        
## [167] "Saudi Arabia"                   "Sudan"                         
## [169] "Senegal"                        "Singapore"                     
## [171] "Solomon Islands"                "Sierra Leone"                  
## [173] "El Salvador"                    "San Marino"                    
## [175] "Somalia"                        "Serbia"                        
## [177] "South Sudan"                    "Sao Tome and Principe"         
## [179] "Suriname"                       "Slovak Republic"               
## [181] "Slovenia"                       "Sweden"                        
## [183] "Eswatini"                       "Sint Maarten (Dutch part)"     
## [185] "Seychelles"                     "Syrian Arab Republic"          
## [187] "Turks and Caicos Islands"       "Chad"                          
## [189] "Togo"                           "Thailand"                      
## [191] "Tajikistan"                     "Turkmenistan"                  
## [193] "Timor-Leste"                    "Tonga"                         
## [195] "Trinidad and Tobago"            "Tunisia"                       
## [197] "Turkiye"                        "Tuvalu"                        
## [199] "Tanzania"                       "Uganda"                        
## [201] "Ukraine"                        "Uruguay"                       
## [203] "United States"                  "Uzbekistan"                    
## [205] "St. Vincent and the Grenadines" "Venezuela, RB"                 
## [207] "British Virgin Islands"         "Virgin Islands (U.S.)"         
## [209] "Vietnam"                        "Vanuatu"                       
## [211] "Samoa"                          "Yemen, Rep."                   
## [213] "South Africa"                   "Zambia"                        
## [215] "Zimbabwe"

Q4. how to use trace() and recover() ##it is used to see what happens inside the function

population <- read.csv("D:/AUCA Notes/R programming/datsets/world_population.csv")
trace(mean)
# Watch what mean() is doing

mean(population$X2022.Population, na.rm = TRUE)

## trace: mean(population$X2022.Population, na.rm = TRUE)

## [1] 34074415

# Calculate average population for 2022

untrace(mean)
# Stop watching mean()



# USING recover()

options(error = recover)
# Turn on recover mode

mean(population$X2022.Population, na.rm = "TRUE")

## [1] 34074415

# Intentional error for practice

options(error = NULL)
# Turn off recover mode

Q5.creating the summary()function

# Read imported population dataset
population <- read.csv("D:/AUCA Notes/R programming/datsets/world_population.csv")

# Create our own summary() function
my_summary <- function(x){

  # Remove missing values
  x <- x[!is.na(x)]

  # Count total values
  n <- length(x)

  # Find minimum value
  minimum <- min(x)

  # Find maximum value
  maximum <- max(x)

  # Find average (mean)
  average <- mean(x)

  # Find median
  middle <- median(x)

  # Find first quartile (Q1)
  q1 <- quantile(x, 0.25)

  # Find third quartile (Q3)
  q3 <- quantile(x, 0.75)

  # Find total sum
  total <- sum(x)

  # Create results
  result <- list(
    Count = n,
    Minimum = minimum,
    Maximum = maximum,
    Mean = average,
    Median = middle,
    Q1 = q1,
    Q3 = q3,
    Sum = total
  )

  return(result)
}

# Apply function to imported population data
my_summary(population$X2022.Population)

## $Count
## [1] 234
## 
## $Minimum
## [1] 510
## 
## $Maximum
## [1] 1425887337
## 
## $Mean
## [1] 34074415
## 
## $Median
## [1] 5559945
## 
## $Q1
##      25% 
## 419738.5 
## 
## $Q3
##      75% 
## 22476505 
## 
## $Sum
## [1] 7973413042

# This shows summary statistics for 2022 population.

Q6. Apply Functions (lapply, sapply, vapply, mapply)

Q7.Apply geom_bar(),geom_barplot(),geom_smooth

# Read dataset
population <- read.csv("D:/AUCA Notes/R programming/datsets/world_population.csv")

# Load packages

# Check column names
colnames(population)

##  [1] "Rank"                        "CCA3"                       
##  [3] "Country.Territory"           "Capital"                    
##  [5] "Continent"                   "X2022.Population"           
##  [7] "X2020.Population"            "X2015.Population"           
##  [9] "X2010.Population"            "X2000.Population"           
## [11] "X1990.Population"            "X1980.Population"           
## [13] "X1970.Population"            "Area..km.."                 
## [15] "Density..per.km.."           "Growth.Rate"                
## [17] "World.Population.Percentage"

# geom_bar()

# Number of countries in each continent

ggplot(population, aes(x = Continent)) +
  geom_bar() +
  ggtitle("Number of Countries by Continent")

# 2. geom_col() (Barplot)

# Average population by continent

continent_pop <- population %>%
  group_by(Continent) %>%
  summarise(
    Average_Population = mean(X2022.Population,
                              na.rm = TRUE))

ggplot(continent_pop,
       aes(x = Continent,
           y = Average_Population)) +
  geom_col() +
  ggtitle("Average Population by Continent")

# 3. geom_smooth()
# Relationship between area and 2022 population

ggplot(population,
       aes(x = `Area..km..`,
           y = X2022.Population)) +
  geom_point() +
  geom_smooth() +
  ggtitle("Relationship Between Area and Population")

## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

ASSIGNMENT OF R PROGRAMMING FOR DATA SCIENCE

Emmanuel IRADUKUNDA,20251MBI047

2026-05-24