#Assignment1

##Import data from both statistical package and database management system

###generate and save data in statistical package :

library(haven)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.1     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.3     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
st_data<- data.frame(st_id=c(1,2,3), f_name=c("Solaange","Didi","Luc"), l_name=c("Uwi","Inga","Ira"),semester=c(1,1,1))
write_sav(st_data, "st_data.sav")

##import data from SP:

getwd()
## [1] "C:/Users/Soluc/OneDrive/Documents/r-programming-msc/Assignments"
d_from_sp<- read_sav("C:/Users/Soluc/OneDrive/Documents/r-programming-msc/Assignments/st_data.sav")
print(d_from_sp)
## # A tibble: 3 × 4
##   st_id f_name   l_name semester
##   <dbl> <chr>    <chr>     <dbl>
## 1     1 Solaange Uwi           1
## 2     2 Didi     Inga          1
## 3     3 Luc      Ira           1

#data from DBMS

library(RSQLite)
library(DBI)
library(RMySQL)
## 
## Attaching package: 'RMySQL'
## The following object is masked from 'package:RSQLite':
## 
##     isIdCurrent
con<- dbConnect(RSQLite::SQLite(),"st_database.db")
dbWriteTable(con,"st_table", st_data, overwrite = TRUE)

#imprting data from DBMS:

d_from_db <- dbReadTable(con,"st_table")
print(d_from_db)
##   st_id   f_name l_name semester
## 1     1 Solaange    Uwi        1
## 2     2     Didi   Inga        1
## 3     3      Luc    Ira        1
#View(d_from_db)

#Assignment 2

##How to marge dataset using 2 to 3 variables

students_set <- data.frame(
  st_id         = c(1, 2, 3),
  f_name        = c("Solange", "Didi", "Luc"),
  l_name        = c("Uwi", "Inga", "Ira"),
  semester      = c(1, 1, 1),
  c_name        = c("Algo", "Ethics", "R"),
  academic_year = c(2026, 2026, 2026))

courses_set <- data.frame(
  c_id          = c(1, 2, 3),
  c_name        = c("Algo", "Ethics", "R"),
  teacher       = c("Andrew", "Lydie", "Kevin"),
  semester      = c(1, 1, 1),            
  academic_year = c(2026, 2026, 2025)     
)
merged_by_2v <- merge(students_set, courses_set, by = c("semester", "c_name"))
merged_by_2v
##   semester c_name st_id  f_name l_name academic_year.x c_id teacher
## 1        1   Algo     1 Solange    Uwi            2026    1  Andrew
## 2        1 Ethics     2    Didi   Inga            2026    2   Lydie
## 3        1      R     3     Luc    Ira            2026    3   Kevin
##   academic_year.y
## 1            2026
## 2            2026
## 3            2025
merged_by_3v <- merge(students_set, courses_set, by = c("semester", "c_name","academic_year"))
merged_by_3v
##   semester c_name academic_year st_id  f_name l_name c_id teacher
## 1        1   Algo          2026     1 Solange    Uwi    1  Andrew
## 2        1 Ethics          2026     2    Didi   Inga    2   Lydie

#Assignment 3 ##how to use group_by function with %>%

students_set %>%
  group_by(c_name) %>%
  filter(st_id== 1) 
## # A tibble: 1 × 6
## # Groups:   c_name [1]
##   st_id f_name  l_name semester c_name academic_year
##   <dbl> <chr>   <chr>     <dbl> <chr>          <dbl>
## 1     1 Solange Uwi           1 Algo            2026

#Assignment 4 ##Create a function to calculate the mean

f11 <- function(x) {
  answer <- sum(x) / length(x)
  return(answer)
}
f11(c(10, 20, 30, 40))
## [1] 25

###how to use trace() and recover()

marks <- data.frame(
  st_id   = c(1, 2, 3),
  st_name = c("Solange", "Didi", "Luc"),
  math    = c(85, 90, 78),
  science = c(88, 76, 92),
  r_prog  = c(95, 88, 70)
)
marks
##   st_id st_name math science r_prog
## 1     1 Solange   85      88     95
## 2     2    Didi   90      76     88
## 3     3     Luc   78      92     70
trace(mean)

mean(marks$math)
## trace: mean(marks$math)
## [1] 84.33333
untrace(mean)
options(error = recover)
mean(marks$st_name)
## Warning in mean.default(marks$st_name): argument is not numeric or logical:
## returning NA
## [1] NA

#Assignment5 ## use ggplots

students <- data.frame(
  
  name= c("Alice","Bob","Carol","David","Eva","Frank","Grace","Henry",
                 "Iris","James","Karen","Leo","Mia","Noah","Olivia","Paul",
                 "Quinn","Rose","Sam","Tina"),
  gender= c("F","M","F","M","F","M","F","M","F","M",
                 "F","M","F","M","F","M","F","F","M","F"),
  class= c("A","A","A","A","B","B","B","B","C","C",
                 "C","C","A","B","C","A","B","C","A","B"),
  math_score = c(85,72,90,65,78,88,55,92,70,83,
                 60,95,74,81,68,77,89,62,91,73),
  eng_score  = c(78,65,88,70,82,75,60,85,72,79,
                 65,80,90,77,71,69,84,66,88,76),
  hours_study= c(5,3,6,2,4,5,2,7,3,5,
                 2,8,4,5,3,4,6,3,7,4),
  absent_days= c(2,5,1,8,3,2,9,1,6,3,
                 7,0,2,4,5,3,1,6,2,4),
  grade      = c("B","C","A","D","B","B","F","A","C","B",
                 "D","A","B","B","C","B","A","D","A","C")
)
students%>%
  ggplot(
    aes(
        x=class,
      y=eng_score,
      fill = class
      
    )

  )+
  geom_boxplot()+
  labs(
    title = "Campare classes by eng score",
    subtitle="here is the graph",
    
  )

#Assignment 6

##use sapply(), vapply (), lapply(),map() and mapply()

lapply(marks[c("math", "science", "r_prog")], mean)
## $math
## [1] 84.33333
## 
## $science
## [1] 85.33333
## 
## $r_prog
## [1] 84.33333
sapply(marks[c("math", "science", "r_prog")], mean)
##     math  science   r_prog 
## 84.33333 85.33333 84.33333
vapply(marks[c("math", "science", "r_prog")], mean, numeric(1))
##     math  science   r_prog 
## 84.33333 85.33333 84.33333
mapply(mean, marks[c("math", "science", "r_prog")])
##     math  science   r_prog 
## 84.33333 85.33333 84.33333
map(marks[c("math", "science", "r_prog")], mean)
## $math
## [1] 84.33333
## 
## $science
## [1] 85.33333
## 
## $r_prog
## [1] 84.33333