Assignment 1: Importing data from different sources

Import data from csv file and do some functions on it.

options(repos = c(CRAN = “https://cloud.r-project.org”)) install.packages(“readxl”)

library(readxl)

student_data <- read.csv("C:\\Moses\\StudentsPerformance.csv")

str(student_data)

## 'data.frame':    1000 obs. of  8 variables:
##  $ gender                     : chr  "female" "female" "female" "male" ...
##  $ ethnicity                  : chr  "group B" "group C" "group B" "group A" ...
##  $ parental.level.of.education: chr  "bachelor's degree" "some college" "master's degree" "associate's degree" ...
##  $ lunch                      : chr  "standard" "standard" "standard" "free/reduced" ...
##  $ test.preparation.course    : chr  "none" "completed" "none" "none" ...
##  $ math.score                 : int  72 69 90 47 76 71 88 40 64 38 ...
##  $ reading.score              : int  72 90 95 57 78 83 95 43 64 60 ...
##  $ writing.score              : int  74 88 93 44 75 78 92 39 67 50 ...

head(student_data)

##   gender ethnicity parental.level.of.education        lunch
## 1 female   group B           bachelor's degree     standard
## 2 female   group C                some college     standard
## 3 female   group B             master's degree     standard
## 4   male   group A          associate's degree free/reduced
## 5   male   group C                some college     standard
## 6 female   group B          associate's degree     standard
##   test.preparation.course math.score reading.score writing.score
## 1                    none         72            72            74
## 2               completed         69            90            88
## 3                    none         90            95            93
## 4                    none         47            57            44
## 5                    none         76            78            75
## 6                    none         71            83            78

tail(student_data)

##      gender ethnicity parental.level.of.education        lunch
## 995    male   group A                 high school     standard
## 996  female   group E             master's degree     standard
## 997    male   group C                 high school free/reduced
## 998  female   group C                 high school free/reduced
## 999  female   group D                some college     standard
## 1000 female   group D                some college free/reduced
##      test.preparation.course math.score reading.score writing.score
## 995                     none         63            63            62
## 996                completed         88            99            95
## 997                     none         62            55            55
## 998                completed         59            71            65
## 999                completed         68            78            77
## 1000                    none         77            86            86

View(student_data)

data <- student_data[2:3,]
print(data)

##   gender ethnicity parental.level.of.education    lunch test.preparation.course
## 2 female   group C                some college standard               completed
## 3 female   group B             master's degree standard                    none
##   math.score reading.score writing.score
## 2         69            90            88
## 3         90            95            93

summary(student_data$writing.score)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   10.00   57.75   69.00   68.05   79.00  100.00

hist(student_data$reading.score, col = "blue")

Importing data from MySql server

LOAD LIBRARY

library(RMySQL)

## Loading required package: DBI

CONNECT TO MYSQL DATABASE

con <- dbConnect(
  MySQL(),
  user = "root",
  password = "Motta@123",
  host = "localhost",
  port = 3306,
  dbname = "sakila"
)

SHOW AVAILABLE TABLES

tables <- dbListTables(con)
print(tables)

##  [1] "actor"                      "actor_info"                
##  [3] "address"                    "category"                  
##  [5] "city"                       "country"                   
##  [7] "customer"                   "customer_list"             
##  [9] "film"                       "film_actor"                
## [11] "film_category"              "film_list"                 
## [13] "film_text"                  "inventory"                 
## [15] "language"                   "nicer_but_slower_film_list"
## [17] "payment"                    "rental"                    
## [19] "sales_by_film_category"     "sales_by_store"            
## [21] "staff"                      "staff_list"                
## [23] "store"

Assignment 2: Merging datasets with 2 to 3 variables

###Example 1: Merge Using Two Variables

df1 <- data.frame(
  id = c(1, 2, 3, 4),
  subject = c("Python", "C programming", "Python", "R programming"),
  name = c("Moses", "Kevine", "Paul", "Jackson")
)

df2 <- data.frame(
  id = c(1, 2, 3, 4),
  subject = c("Python", "C programming", "Python", "R programming"),
  marks = c(80, 90, 75, 85)
)

merged_data <- merge(df1, df2, by = c("id", "subject"))

merged_data

##   id       subject    name marks
## 1  1        Python   Moses    80
## 2  2 C programming  Kevine    90
## 3  3        Python    Paul    75
## 4  4 R programming Jackson    85

###Example 2: Merge with Different Column Names (Left Join Style)

df1 <- data.frame(
  student_id = c(1, 2, 3, 4),
  subject = c("Python", "C programming", "Python", "R programming"),
  name = c("Moses", "Kevine", "Paul", "Jackson")
)

df2 <- data.frame(
  id = c(1, 2, 3, 4),
  subject = c("Python", "C programming", "Python", "R programming"),
  marks = c(80, 90, 75, 85)
)

merged_data <- merge(df1, df2,
                     by.x = "student_id",
                     by.y = "id",
                     all.x = TRUE)

merged_data

##   student_id     subject.x    name     subject.y marks
## 1          1        Python   Moses        Python    80
## 2          2 C programming  Kevine C programming    90
## 3          3        Python    Paul        Python    75
## 4          4 R programming Jackson R programming    85

Assignment 3: How to use these functions: select(),filter(), arrange(), rename(), mutate(), group_by, %>%

###select()

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

student <- student_data %>% select (gender, ethnicity, lunch, math.score)
head(student)

##   gender ethnicity        lunch math.score
## 1 female   group B     standard         72
## 2 female   group C     standard         69
## 3 female   group B     standard         90
## 4   male   group A free/reduced         47
## 5   male   group C     standard         76
## 6 female   group B     standard         71

###filter()

library(dplyr)
student<- student_data %>% filter(gender == "female" & math.score > 95)
head(student)

##   gender ethnicity parental.level.of.education    lunch test.preparation.course
## 1 female   group E           bachelor's degree standard               completed
## 2 female   group C           bachelor's degree standard               completed
## 3 female   group D            some high school standard               completed
## 4 female   group E                 high school standard                    none
## 5 female   group E                some college standard                    none
## 6 female   group E           bachelor's degree standard                    none
##   math.score reading.score writing.score
## 1         99           100           100
## 2         96           100           100
## 3         97           100           100
## 4         99            93            90
## 5        100            92            97
## 6        100           100           100

###arrange()

library(dplyr)
student <- student_data %>% arrange(gender, desc(math.score))
head(student)

##   gender ethnicity parental.level.of.education    lunch test.preparation.course
## 1 female   group E                some college standard                    none
## 2 female   group E           bachelor's degree standard                    none
## 3 female   group E          associate's degree standard                    none
## 4 female   group E           bachelor's degree standard               completed
## 5 female   group E                 high school standard                    none
## 6 female   group D                some college standard                    none
##   math.score reading.score writing.score
## 1        100            92            97
## 2        100           100           100
## 3        100           100           100
## 4         99           100           100
## 5         99            93            90
## 6         98           100            99

###rename()

student <- student_data %>% rename(student_gender = gender, math_marks = math.score)
head(student)

##   student_gender ethnicity parental.level.of.education        lunch
## 1         female   group B           bachelor's degree     standard
## 2         female   group C                some college     standard
## 3         female   group B             master's degree     standard
## 4           male   group A          associate's degree free/reduced
## 5           male   group C                some college     standard
## 6         female   group B          associate's degree     standard
##   test.preparation.course math_marks reading.score writing.score
## 1                    none         72            72            74
## 2               completed         69            90            88
## 3                    none         90            95            93
## 4                    none         47            57            44
## 5                    none         76            78            75
## 6                    none         71            83            78

###mutate()

student <- student_data %>% mutate(total_marks = math.score + reading.score + writing.score) ## adding new column of total score
head(student)

##   gender ethnicity parental.level.of.education        lunch
## 1 female   group B           bachelor's degree     standard
## 2 female   group C                some college     standard
## 3 female   group B             master's degree     standard
## 4   male   group A          associate's degree free/reduced
## 5   male   group C                some college     standard
## 6 female   group B          associate's degree     standard
##   test.preparation.course math.score reading.score writing.score total_marks
## 1                    none         72            72            74         218
## 2               completed         69            90            88         247
## 3                    none         90            95            93         278
## 4                    none         47            57            44         148
## 5                    none         76            78            75         229
## 6                    none         71            83            78         232

###mutate()

student <- student_data %>% mutate(total_marks = math.score + reading.score + writing.score, avg = (math.score + reading.score + writing.score)/3)
head(student)

##   gender ethnicity parental.level.of.education        lunch
## 1 female   group B           bachelor's degree     standard
## 2 female   group C                some college     standard
## 3 female   group B             master's degree     standard
## 4   male   group A          associate's degree free/reduced
## 5   male   group C                some college     standard
## 6 female   group B          associate's degree     standard
##   test.preparation.course math.score reading.score writing.score total_marks
## 1                    none         72            72            74         218
## 2               completed         69            90            88         247
## 3                    none         90            95            93         278
## 4                    none         47            57            44         148
## 5                    none         76            78            75         229
## 6                    none         71            83            78         232
##        avg
## 1 72.66667
## 2 82.33333
## 3 92.66667
## 4 49.33333
## 5 76.33333
## 6 77.33333

###mutate()

student <- student_data %>% mutate(total_marks = math.score + reading.score + writing.score, avg = (math.score + reading.score + writing.score)/3, result = ifelse(((math.score + reading.score + writing.score)/3) >= 50, "Pass", "Fail"))
head(student)

##   gender ethnicity parental.level.of.education        lunch
## 1 female   group B           bachelor's degree     standard
## 2 female   group C                some college     standard
## 3 female   group B             master's degree     standard
## 4   male   group A          associate's degree free/reduced
## 5   male   group C                some college     standard
## 6 female   group B          associate's degree     standard
##   test.preparation.course math.score reading.score writing.score total_marks
## 1                    none         72            72            74         218
## 2               completed         69            90            88         247
## 3                    none         90            95            93         278
## 4                    none         47            57            44         148
## 5                    none         76            78            75         229
## 6                    none         71            83            78         232
##        avg result
## 1 72.66667   Pass
## 2 82.33333   Pass
## 3 92.66667   Pass
## 4 49.33333   Fail
## 5 76.33333   Pass
## 6 77.33333   Pass

GROUP_BY

student <- student_data %>% group_by(gender) %>% summarise(avg = mean(math.score))
head(student)

## # A tibble: 2 × 2
##   gender   avg
##   <chr>  <dbl>
## 1 female  63.6
## 2 male    68.7

Assignment 4: How to use trace(), recover() functions

using trace() function to find the average marks of mathematics subject and this is used to modify or inspect a function during execution.

math_marks <- student_data$math.score
myfun <- function(math_marks) {
  avg <- mean(math_marks)
  print(avg)
  
}
trace(myfun, quote(cat("Function is running\n")), print = FALSE)

## [1] "myfun"

myfun(math_marks)

## Function is running
## [1] 66.089

recover() function is used in debugging error environments, When an error occurs, it lets you inspect function calls step-by-step

enable recover function

options(error = recover)

###Example 1: Division by 0

f <- function(x) {
  y <- x / 0
  y
}
f(10)

## [1] Inf

Manual summary function to give statistics

marks <- student_data$math.score

my_summary <- function(math_marks) {

  # Sort the vector
  x <- sort(marks)

  n <- length(marks)

  # Minimum
  minimum <- marks[1]

  # Maximum
  maximum <- marks[n]

  # Mean
  total <- 0

  for(i in marks) {
    total <- total + i
  }

  mean_value <- total / n

  # Median
  if(n %% 2 == 1) {

    median_value <- marks[(n + 1) / 2]

  } else {

    median_value <- (x[n/2] + x[(n/2) + 1]) / 2
  }

  # First Quartile (Q1)
  q1_position <- (n + 1) / 4
  q1 <- x[round(q1_position)]

  # Third Quartile (Q3)
  q3_position <- 3 * (n + 1) / 4
  q3 <- x[round(q3_position)]

  # Display results
  cat("Minimum :", minimum, "\n")
  cat("1st Quartile :", q1, "\n")
  cat("Median :", median_value, "\n")
  cat("Mean :", mean_value, "\n")
  cat("3rd Quartile :", q3, "\n")
  cat("Maximum :", maximum, "\n")
}

my_summary(marks)

## Minimum : 72 
## 1st Quartile : 57 
## Median : 66 
## Mean : 66.089 
## 3rd Quartile : 77 
## Maximum : 77

Assignment 5: Manual Two-Sample t-Test Function

ttest_function <- function(x, y) {

  # Find lengths manually

  n1 <- 0

  for(i in x) {
    n1 <- n1 + 1
  }

  n2 <- 0

  for(i in y) {
    n2 <- n2 + 1
  }

  # Compute means manually

  sum1 <- 0

  for(i in x) {
    sum1 <- sum1 + i
  }

  mean1 <- sum1 / n1

  sum2 <- 0

  for(i in y) {
    sum2 <- sum2 + i
  }

  mean2 <- sum2 / n2

  # Compute variances manually

  ss1 <- 0

  for(i in x) {
    ss1 <- ss1 + (i - mean1)^2
  }

  var1 <- ss1 / (n1 - 1)

  ss2 <- 0

  for(i in y) {
    ss2 <- ss2 + (i - mean2)^2
  }

  var2 <- ss2 / (n2 - 1)

  # Pooled variance

  pooled_var <- (
    ((n1 - 1) * var1) +
    ((n2 - 1) * var2)
  ) / (n1 + n2 - 2)

  # Standard error

  se <- sqrt(
    pooled_var * ((1 / n1) + (1 / n2))
  )

  # t statistic

  t_value <- (mean1 - mean2) / se

  # Print results

  cat("Sample 1 Mean :", mean1, "\n")
  cat("Sample 2 Mean :", mean2, "\n")
  cat("Variance 1 :", var1, "\n")
  cat("Variance 2 :", var2, "\n")
  cat("t statistic :", t_value, "\n")
}

x <- student_data$math.score
y <- student_data$reading.score
ttest_function(x,y)

## Sample 1 Mean : 66.089 
## Sample 2 Mean : 69.169 
## Variance 1 : 229.919 
## Variance 2 : 213.1656 
## t statistic : -4.627084

Assignment 6: sapply() and vapply(), variants of lapply() that produces vectors,matrices, and arrays as Output, instead of lists map(), and mapply which iterate over multiple input data structures in parallel

###lapply(): Applies a function to each element and keeps output in a list

math <- student_data$math.score
reading <- student_data$reading.score
numbers <- list(math,reading)

lapply(numbers, sum)

## [[1]]
## [1] 66089
## 
## [[2]]
## [1] 69169

###sapply() : Like lapply(), but tries to simplify output.

math <- student_data$math.score
reading <- student_data$reading.score
numbers <- list(math,reading)
sapply(numbers, sum)

## [1] 66089 69169

###vapply(): You MUST specify expected output type

math <- student_data$math.score
reading <- student_data$reading.score
numbers <- list(math,reading)

vapply(numbers, sum, numeric(1))

## [1] 66089 69169

###mapply(): Applies function across multiple vectors at the same time.

math <- student_data$math.score
reading <- student_data$reading.score
mapply(sum, math)

##    [1]  72  69  90  47  76  71  88  40  64  38  58  40  65  78  50  69  88  18
##   [19]  46  54  66  65  44  69  74  73  69  67  70  62  69  63  56  40  97  81
##   [37]  74  50  75  57  55  58  53  59  50  65  55  66  57  82  53  77  53  88
##   [55]  71  33  82  52  58   0  79  39  62  69  59  67  45  60  61  39  58  63
##   [73]  41  61  49  44  30  80  61  62  47  49  50  72  42  73  76  71  58  73
##   [91]  65  27  71  43  79  78  65  63  58  65  79  68  85  60  98  58  87  66
##  [109]  52  70  77  62  54  51  99  84  75  78  51  55  79  91  88  63  83  87
##  [127]  72  65  82  51  89  53  87  75  74  58  51  70  59  71  76  59  42  57
##  [145]  88  22  88  73  68 100  62  77  59  54  62  70  66  60  61  66  82  75
##  [163]  49  52  81  96  53  58  68  67  72  94  79  63  43  81  46  71  52  97
##  [181]  62  46  50  65  45  65  80  62  48  77  66  76  62  77  69  61  59  55
##  [199]  45  78  67  65  69  57  59  74  82  81  74  58  80  35  42  60  87  84
##  [217]  83  34  66  61  56  87  55  86  52  45  72  57  68  88  76  46  67  92
##  [235]  83  80  63  64  54  84  73  80  56  59  75  85  89  58  65  68  47  71
##  [253]  60  80  54  62  64  78  70  65  64  79  44  99  76  59  63  69  88  71
##  [271]  69  58  47  65  88  83  85  59  65  73  53  45  73  70  37  81  97  67
##  [289]  88  77  76  86  63  65  78  67  46  71  40  90  81  56  67  80  74  69
##  [307]  99  51  53  49  73  66  67  68  59  71  77  83  63  56  67  75  71  43
##  [325]  41  82  61  28  82  41  71  47  62  90  83  61  76  49  24  35  58  61
##  [343]  69  67  79  72  62  77  75  87  52  66  63  46  59  61  63  42  59  80
##  [361]  58  85  52  27  59  49  69  61  44  73  84  45  74  82  59  46  80  85
##  [379]  71  66  80  87  79  38  38  67  64  57  62  73  73  77  76  57  65  48
##  [397]  50  85  74  60  59  53  49  88  54  63  65  82  52  87  70  84  71  63
##  [415]  51  84  71  74  68  57  82  57  47  59  41  62  86  69  65  68  64  61
##  [433]  61  47  73  50  75  75  70  89  67  78  59  73  79  67  69  86  47  81
##  [451]  64 100  65  65  53  37  79  53 100  72  53  54  71  77  75  84  26  72
##  [469]  77  91  83  63  68  59  90  71  76  80  55  76  73  52  68  59  49  70
##  [487]  61  60  64  79  65  64  83  81  54  68  54  59  66  76  74  94  63  95
##  [505]  40  82  68  55  79  86  76  64  62  54  77  76  74  66  66  67  71  91
##  [523]  69  54  53  68  56  36  29  62  68  47  62  79  73  66  51  51  85  97
##  [541]  75  79  81  82  64  78  92  72  62  79  79  87  40  77  53  32  55  61
##  [559]  53  73  74  63  96  63  48  48  92  61  63  68  71  91  53  50  74  40
##  [577]  61  81  48  53  81  77  63  73  69  65  55  44  54  48  58  71  68  74
##  [595]  92  56  30  53  69  65  54  29  76  60  84  75  85  40  61  58  69  58
##  [613]  94  65  82  60  37  88  95  65  35  62  58 100  61 100  69  61  49  44
##  [631]  67  79  66  75  84  71  67  80  86  76  41  74  72  74  70  65  59  64
##  [649]  50  69  51  68  85  65  73  62  77  69  43  90  74  73  55  65  80  50
##  [667]  63  77  73  81  66  52  69  65  69  50  73  70  81  63  67  60  62  29
##  [685]  62  94  85  77  53  93  49  73  66  77  49  79  75  59  57  66  79  57
##  [703]  87  63  59  62  46  66  89  42  93  80  98  81  60  76  73  96  76  91
##  [721]  62  55  74  50  47  81  65  68  73  53  68  55  87  55  53  67  92  53
##  [739]  81  61  80  37  81  59  55  72  69  69  50  87  71  68  79  77  58  84
##  [757]  55  70  52  69  53  48  78  62  60  74  58  76  68  58  52  75  52  62
##  [775]  66  49  66  35  72  94  46  77  76  52  91  32  72  19  68  52  48  60
##  [793]  66  89  42  57  70  70  69  52  67  76  87  82  73  75  64  41  90  59
##  [811]  51  45  54  87  72  94  45  61  60  77  85  78  49  71  48  62  56  65
##  [829]  69  68  61  74  64  77  58  60  73  75  58  66  39  64  23  74  40  90
##  [847]  91  64  59  80  71  61  87  82  62  97  75  65  52  87  53  81  39  71
##  [865]  97  82  59  61  78  49  59  70  82  90  43  80  81  57  59  64  63  71
##  [883]  64  55  51  62  93  54  69  44  86  85  50  88  59  32  36  63  67  65
##  [901]  85  73  34  93  67  88  57  79  67  70  50  69  52  47  46  68 100  44
##  [919]  57  91  69  35  72  54  74  74  64  65  46  48  67  62  61  70  98  70
##  [937]  67  57  85  77  72  78  81  61  58  54  82  49  49  57  94  75  74  58
##  [955]  62  72  84  92  45  75  56  48 100  65  72  62  66  63  68  75  89  78
##  [973]  53  49  54  64  60  62  55  91   8  81  79  78  74  57  40  81  44  67
##  [991]  86  65  55  62  63  88  62  59  68  77

###map():

library(purrr)
math <- student_data$math.score
reading <- student_data$reading.score

numbers <- list(math, reading)

map(numbers, sum)

## [[1]]
## [1] 66089
## 
## [[2]]
## [1] 69169

map_dbl(numbers, mean)

## [1] 66.089 69.169

Class Assignments

Moses

2026-05-24

Assignment 1: Importing data from different sources

Import data from csv file and do some functions on it.

Importing data from MySql server

LOAD LIBRARY

CONNECT TO MYSQL DATABASE

SHOW AVAILABLE TABLES

Assignment 2: Merging datasets with 2 to 3 variables

Assignment 3: How to use these functions: select(),filter(), arrange(), rename(), mutate(), group_by, %>%

GROUP_BY

Assignment 4: How to use trace(), recover() functions

using trace() function to find the average marks of mathematics subject and this is used to modify or inspect a function during execution.

recover() function is used in debugging error environments, When an error occurs, it lets you inspect function calls step-by-step

Manual summary function to give statistics

Assignment 5: Manual Two-Sample t-Test Function

Assignment 6: sapply() and vapply(), variants of lapply() that produces vectors,matrices, and arrays as Output, instead of lists map(), and mapply which iterate over multiple input data structures in parallel