Assignment 1.Importing Data from Different sources.

#install.packages("haven")
library(haven)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.1     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.3     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(purrr)

#creating statistical packages data.

studentsdata <- data.frame(studentsname=c("Joanah","Neza","Divine"),StudentsID=c(01,02,03),Marks=c(7,8,10),Gender=c("F","M","F"))
print(studentsdata
      )
##   studentsname StudentsID Marks Gender
## 1       Joanah          1     7      F
## 2         Neza          2     8      M
## 3       Divine          3    10      F
write_sav(studentsdata, "studentsdata.sav")
getwd()
## [1] "/Users/macbook/Desktop/AUCA"
data_from_sp <- read_sav("/Users/macbook/Desktop/AUCA/studentsdata.sav")
print(data_from_sp)
## # A tibble: 3 × 4
##   studentsname StudentsID Marks Gender
##   <chr>             <dbl> <dbl> <chr> 
## 1 Joanah                1     7 F     
## 2 Neza                  2     8 M     
## 3 Divine                3    10 F
#View(data_from_sp)
#install.packages("RMySQL")
#install.packages("RSQLite")
library(RSQLite)
library(RMySQL)
## Loading required package: DBI
## 
## Attaching package: 'RMySQL'
## The following object is masked from 'package:RSQLite':
## 
##     isIdCurrent
library(DBI)

database

getwd()
## [1] "/Users/macbook/Desktop/AUCA"
con <- dbConnect(RSQLite::SQLite(),"/Users/macbook/Desktop/AUCA/student.db")

insertdata

dbWriteTable(con,"studentstable",studentsdata,overwrite =TRUE)
data_from_db <-dbReadTable(con,"studentstable")
print(data_from_db)
##   studentsname StudentsID Marks Gender
## 1       Joanah          1     7      F
## 2         Neza          2     8      M
## 3       Divine          3    10      F

Assignment 2: Merging Datasets to 2 to 3 variables.

Let’s first create a dataset called “Supervisors” to merge with “studentsdata2” set which has also to be created.

Supervisors <- data.frame(Supnames=c("Fred","Dominique","Belinda"), Department = c("IT", "Mathematics", "Statistics"),SupID=c(11,12,13),Gender=c("M","M","F"))
print(Supervisors)
##    Supnames  Department SupID Gender
## 1      Fred          IT    11      M
## 2 Dominique Mathematics    12      M
## 3   Belinda  Statistics    13      F
studentsdata2 <- data.frame(studentsname=c("Joanah","Neza","Divine"), Department = c("IT", "Mathematics", "Statistics"),StudentsID=c(01,02,03),Marks=c(7,8,10),Gender=c("F","M","F"),Supnames=c("Fred","Dominique","Belinda"))
print(studentsdata2)
##   studentsname  Department StudentsID Marks Gender  Supnames
## 1       Joanah          IT          1     7      F      Fred
## 2         Neza Mathematics          2     8      M Dominique
## 3       Divine  Statistics          3    10      F   Belinda

Let’s merge the two datasets “studentsdata2” and “Supervisors” .Let’s call the name “mergedata”. By 2 Variables

mergeddata <-merge(studentsdata2,Supervisors, by = c("Supnames" , "Gender"))
print(mergeddata)                
##    Supnames Gender studentsname Department.x StudentsID Marks Department.y
## 1   Belinda      F       Divine   Statistics          3    10   Statistics
## 2 Dominique      M         Neza  Mathematics          2     8  Mathematics
##   SupID
## 1    13
## 2    12

Let’s merge the two datasets “studentsdata2” and “Supervisors” .Let’s call the name “mergedata”. By 3 Variables

mergeddata <-merge(studentsdata2,Supervisors, by = c("Supnames" , "Gender","Department"))
print(mergeddata) 
##    Supnames Gender  Department studentsname StudentsID Marks SupID
## 1   Belinda      F  Statistics       Divine          3    10    13
## 2 Dominique      M Mathematics         Neza          2     8    12

Assignment 3:Groupby %>%

Let’s view the departnment under which Supervisor Dominique is into

Supervisors %>%
  group_by(Department) %>%
  filter(Supnames =="Dominique")
## # A tibble: 1 × 4
## # Groups:   Department [1]
##   Supnames  Department  SupID Gender
##   <chr>     <chr>       <dbl> <chr> 
## 1 Dominique Mathematics    12 M

Assignment 4 :

4.1. Formulate a function to calculate the mean.

meantot <-function(x) {
  result <- sum(x) / length(x)
  return(result)
}
meantot(studentsdata2$Marks)
## [1] 8.333333

4.2. How to us trace () and recover ()

#trace(mean)

mean(marks\(math) untrace(mean) #options(error = recover) mean(marks\)st_name)

Assignment 5 :

ggplot,geom_point

studentsdata2%>%
  ggplot(
    aes(x=studentsname,
        y=Marks,
        colour = Department
        )
  )+
  geom_point()+
  geom_smooth()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Assignment 6

Let’s use sapply() .## Let us calculate the number of characters in each student name.

sapply(studentsdata2$studentsname, nchar)
## Joanah   Neza Divine 
##      6      4      6

###.Vapply.This helps to specify how the result should look like. ##Let us uppercase of each student name and the output must be a character

vapply(studentsdata2$studentsname, toupper, character(1))
##   Joanah     Neza   Divine 
## "JOANAH"   "NEZA" "DIVINE"

Let use Map

Get the length of each student name

Get marks multiplied by 2 for each student

map(studentsdata2$studentsname, nchar)
## [[1]]
## [1] 6
## 
## [[2]]
## [1] 4
## 
## [[3]]
## [1] 6
map(studentsdata2$Marks, function(x) x * 2)
## [[1]]
## [1] 14
## 
## [[2]]
## [1] 16
## 
## [[3]]
## [1] 20

Mapply .This applies function to multiple arguments simultaneously.

mapply(function(name, dept) paste(name, "studies", dept),
       studentsdata2$studentsname,
       studentsdata2$Department)
##                      Joanah                        Neza 
##         "Joanah studies IT"  "Neza studies Mathematics" 
##                      Divine 
## "Divine studies Statistics"

Lapply

get class/type of each column in the dataset

lapply(studentsdata2, class)
## $studentsname
## [1] "character"
## 
## $Department
## [1] "character"
## 
## $StudentsID
## [1] "numeric"
## 
## $Marks
## [1] "numeric"
## 
## $Gender
## [1] "character"
## 
## $Supnames
## [1] "character"