#Assignment1
##Import data from both statistical package and database management system
###generate and save data in statistical package :
library(haven)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.1 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.3 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
st_data<- data.frame(st_id=c(1,2,3), f_name=c("Solaange","Didi","Luc"), l_name=c("Uwi","Inga","Ira"),semester=c(1,1,1))
write_sav(st_data, "st_data.sav")
##import data from SP:
getwd()
## [1] "C:/Users/Soluc/OneDrive/Documents/r-programming-msc/Assignments"
d_from_sp<- read_sav("C:/Users/Soluc/OneDrive/Documents/r-programming-msc/Assignments/st_data.sav")
print(d_from_sp)
## # A tibble: 3 × 4
## st_id f_name l_name semester
## <dbl> <chr> <chr> <dbl>
## 1 1 Solaange Uwi 1
## 2 2 Didi Inga 1
## 3 3 Luc Ira 1
#data from DBMS
library(RSQLite)
library(DBI)
library(RMySQL)
##
## Attaching package: 'RMySQL'
## The following object is masked from 'package:RSQLite':
##
## isIdCurrent
con<- dbConnect(RSQLite::SQLite(),"st_database.db")
dbWriteTable(con,"st_table", st_data, overwrite = TRUE)
#imprting data from DBMS:
d_from_db <- dbReadTable(con,"st_table")
print(d_from_db)
## st_id f_name l_name semester
## 1 1 Solaange Uwi 1
## 2 2 Didi Inga 1
## 3 3 Luc Ira 1
#View(d_from_db)
#Assignment 2
##How to marge dataset using 2 to 3 variables
students_set <- data.frame(
st_id = c(1, 2, 3),
f_name = c("Solange", "Didi", "Luc"),
l_name = c("Uwi", "Inga", "Ira"),
semester = c(1, 1, 1),
c_name = c("Algo", "Ethics", "R"),
academic_year = c(2026, 2026, 2026))
courses_set <- data.frame(
c_id = c(1, 2, 3),
c_name = c("Algo", "Ethics", "R"),
teacher = c("Andrew", "Lydie", "Kevin"),
semester = c(1, 1, 1),
academic_year = c(2026, 2026, 2025)
)
merged_by_2v <- merge(students_set, courses_set, by = c("semester", "c_name"))
merged_by_2v
## semester c_name st_id f_name l_name academic_year.x c_id teacher
## 1 1 Algo 1 Solange Uwi 2026 1 Andrew
## 2 1 Ethics 2 Didi Inga 2026 2 Lydie
## 3 1 R 3 Luc Ira 2026 3 Kevin
## academic_year.y
## 1 2026
## 2 2026
## 3 2025
merged_by_3v <- merge(students_set, courses_set, by = c("semester", "c_name","academic_year"))
merged_by_3v
## semester c_name academic_year st_id f_name l_name c_id teacher
## 1 1 Algo 2026 1 Solange Uwi 1 Andrew
## 2 1 Ethics 2026 2 Didi Inga 2 Lydie
#Assignment 3 ##how to use group_by function with %>%
students_set %>%
group_by(c_name) %>%
filter(st_id== 1)
## # A tibble: 1 × 6
## # Groups: c_name [1]
## st_id f_name l_name semester c_name academic_year
## <dbl> <chr> <chr> <dbl> <chr> <dbl>
## 1 1 Solange Uwi 1 Algo 2026
#Assignment 4 ##Create a function to calculate the mean
f11 <- function(x) {
answer <- sum(x) / length(x)
return(answer)
}
f11(c(10, 20, 30, 40))
## [1] 25
###how to use trace() and recover()
marks <- data.frame(
st_id = c(1, 2, 3),
st_name = c("Solange", "Didi", "Luc"),
math = c(85, 90, 78),
science = c(88, 76, 92),
r_prog = c(95, 88, 70)
)
marks
## st_id st_name math science r_prog
## 1 1 Solange 85 88 95
## 2 2 Didi 90 76 88
## 3 3 Luc 78 92 70
trace(mean)
mean(marks$math)
## trace: mean(marks$math)
## [1] 84.33333
untrace(mean)
options(error = recover)
mean(marks$st_name)
## Warning in mean.default(marks$st_name): argument is not numeric or logical:
## returning NA
## [1] NA
#Assignment5 ## use ggplots
students <- data.frame(
name= c("Alice","Bob","Carol","David","Eva","Frank","Grace","Henry",
"Iris","James","Karen","Leo","Mia","Noah","Olivia","Paul",
"Quinn","Rose","Sam","Tina"),
gender= c("F","M","F","M","F","M","F","M","F","M",
"F","M","F","M","F","M","F","F","M","F"),
class= c("A","A","A","A","B","B","B","B","C","C",
"C","C","A","B","C","A","B","C","A","B"),
math_score = c(85,72,90,65,78,88,55,92,70,83,
60,95,74,81,68,77,89,62,91,73),
eng_score = c(78,65,88,70,82,75,60,85,72,79,
65,80,90,77,71,69,84,66,88,76),
hours_study= c(5,3,6,2,4,5,2,7,3,5,
2,8,4,5,3,4,6,3,7,4),
absent_days= c(2,5,1,8,3,2,9,1,6,3,
7,0,2,4,5,3,1,6,2,4),
grade = c("B","C","A","D","B","B","F","A","C","B",
"D","A","B","B","C","B","A","D","A","C")
)
students%>%
ggplot(
aes(
x=class,
y=eng_score,
fill = class
)
)+
geom_boxplot()+
labs(
title = "Campare classes by eng score",
subtitle="here is the graph",
)
#Assignment 6
##use sapply(), vapply (), lapply(),map() and mapply()
lapply(marks[c("math", "science", "r_prog")], mean)
## $math
## [1] 84.33333
##
## $science
## [1] 85.33333
##
## $r_prog
## [1] 84.33333
sapply(marks[c("math", "science", "r_prog")], mean)
## math science r_prog
## 84.33333 85.33333 84.33333
vapply(marks[c("math", "science", "r_prog")], mean, numeric(1))
## math science r_prog
## 84.33333 85.33333 84.33333
mapply(mean, marks[c("math", "science", "r_prog")])
## math science r_prog
## 84.33333 85.33333 84.33333
map(marks[c("math", "science", "r_prog")], mean)
## $math
## [1] 84.33333
##
## $science
## [1] 85.33333
##
## $r_prog
## [1] 84.33333