Lab class session

## Warning: package 'kableExtra' was built under R version 4.0.2

## Warning: package 'dplyr' was built under R version 4.0.2

Questions

Question 1. Create a chunk and calculate the following in one chunk

sqrt (16)

## [1] 4

16^0.5

## [1] 4

4^3

## [1] 64

Question 2. In the R Markdown file, create a second chunk that contains the following syntax

pi

## [1] 3.141593

round(pi)

## [1] 3

round(pi, digits=4)

## [1] 3.1416

trunc(pi)

## [1] 3

Queston 3. In the R Markdonw file

# Create a vector x that contains.
x <- c(3, 6, 8)
print (x)

## [1] 3 6 8

x/2

## [1] 1.5 3.0 4.0

x^2

## [1]  9 36 64

sqrt(x)

## [1] 1.732051 2.449490 2.828427

# Find the second element of x 
x[2]

## [1] 6

# Find the first and third element of vector x
x[1]

## [1] 3

x[3]

## [1] 8

# Generate a vector y that contains values of (2, 5, 1)
Y <-c(2, 5, 1)
print (Y)

## [1] 2 5 1

# Calculate x-y
x-Y

## [1] 1 1 7

# Calculate x*y
x*Y

## [1]  6 30  8

Question 4. Assume that we have registered the height and weight for four people: Heights in cm are 180, 165, 160, 193; weights in kg are 87, 58, 65, 100. Make two vectors, height and weight, with the data. The body mass index (BMI) is defined as weight in kg /(height in m)2 Make a vector with the BMI values for the four people. Finally make a vector with the weights for those people who have a BMI larger than 25. Include your answers in one chunk.

library(units)

## udunits system database from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/units/share/udunits

ud_units [1:3]

## $m
## 1 [m]
## 
## $kg
## 1 [kg]
## 
## $s
## 1 [s]

# create a function CM_to_M
cm_to_m <- function(x) x/100

# convert Heights in cm  180, 165, 160, 193 to meters; 
Height <-cm_to_m(c(180, 165, 160, 193))

#weights in kg are 87, 58, 65, 100
Weight <- c(87, 58, 65, 100)

# Calculate BMI=weight in kg /(height in m)2 
BMI <- (Weight/(Height)^2)

# Subset calculated BMI that is greater than 25
BMI_25 <- subset(BMI, BMI>25)

Question 5. Make a vector called score, which contains the following statistics 77, 93, 92, 68,75,100

# create the Vector scores
score <- c(77, 93, 92, 68,75,100)

# calculate the summary of all scores
sum(score)

## [1] 505

# calculate the average score.
mean(score)

## [1] 84.16667

# sorting the Vector scores.
sort(score)

## [1]  68  75  77  92  93 100

# Calculates the median value of Vector scores
median(score)

## [1] 84.5

# Calculate the standard deviation value of Vector scores
sd(score)

## [1] 12.54459

# Calculate the variance value of Vector scores
var(score)

## [1] 157.3667

# Calculate the minimum value of vector scores
min(score)

## [1] 68

# Calculate the Maximum value of vector scores
max(score)

## [1] 100

Question 6. Assume that you are interested in cone-shaped structures, and have measured the height and radius of 6 cones. Make vectors with these values as follows:

# Making Vector as follows 
R <- c(2.27, 1.98, 1.69, 1.88, 1.64, 2.14) 
H <- c(8.28, 8.04, 9.06, 8.70, 7.58, 8.34) 

# Calculate the Volume of a cone as 13πR2H
Cone_vol <- (1/3*pi*R^2*H)

# Displaying the Cone_vols.
View(Cone_vol)

Question 7. Compute the mean, median and standard deviation of the cone volumes. Compute also the mean of volume for the cones with a height less than 8.5

# calculate the average score.
mean(Cone_vol)

## [1] 33.05525

# calculate the average score.
median(Cone_vol)

## [1] 32.60413

# calculate the average score.
sd(Cone_vol)

## [1] 8.44469

# Subset calculated Cone_vol that is less than 8.5
Cone_vol_8.5 <- subset(Cone_vol, H<8.5)

# Compute also the mean of volume for the cones with a height less than 8.5
mean(Cone_vol_8.5 )

## [1] 34.75834

Question 8. Load a library called MASS, and load the data within the MASS library called cates

library(MASS)

## Warning: package 'MASS' was built under R version 4.0.2

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

# LOad cat
data(cats)

# What’s the dimension of the data “cats”?
dim(cats)

## [1] 144   3

#How many variables are there? What are they?
str(cats)

## 'data.frame':    144 obs. of  3 variables:
##  $ Sex: Factor w/ 2 levels "F","M": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Bwt: num  2 2 2 2.1 2.1 2.1 2.1 2.1 2.1 2.1 ...
##  $ Hwt: num  7 7.4 9.5 7.2 7.3 7.6 8.1 8.2 8.3 8.5 ...

#How many observations are there?
# 144 Observations

#Is sex a factor variable? 
class(cats$Sex)

## [1] "factor"

Question 9. Load a dataset called “surveyS18.csv”

library(haven)

#set working directory.
setwd("~/Documents/R_programming")
samo <- read.csv("surveyS18.csv")

#What’s the dimension of the data?
dim(samo)

## [1] 177  53

#How many variables are there? & How many observations are there?
str(samo)

## 'data.frame':    177 obs. of  53 variables:
##  $ pets                     : int  0 0 1 0 2 13 4 1 3 1 ...
##  $ us_region                : chr  "California" "Not from the US" "New England" "Southeast" ...
##  $ class_year               : chr  "Junior" "First-year" "Sophomore" "Senior" ...
##  $ hair_color               : chr  "Black" "Black" "Black" "Black" ...
##  $ campus                   : chr  "West" "East" "East" "Off campus" ...
##  $ watch_sports             : chr  "Other" "Other" "Basketball" "Soccer" ...
##  $ beyonce_love             : chr  "hell yes" "yes" "yes" "hell yes" ...
##  $ fav_artist               : chr  "none of these" "Taylor swift" "Taylor swift" "Taylor swift" ...
##  $ social_network           : chr  "Instagram" "Snapchat" "Yikyak" "Instagram" ...
##  $ relationship_status      : chr  "no" "no" "no" "no" ...
##  $ num_siblings             : int  1 1 1 1 2 2 2 3 0 2 ...
##  $ num_languages            : int  3 2 1 NA 1 2 3 1 1 2 ...
##  $ nights_drinking          : num  1 1 1 2 1 0 1 0 1 0 ...
##  $ pbj_or_n                 : chr  "All three" "Nutella" "Nutella" "Nutella" ...
##  $ tenting                  : chr  "unsure" "none" "none" "none" ...
##  $ countries_visited        : int  12 15 14 3 13 5 15 1 8 6 ...
##  $ first_kiss               : int  15 12 5 7 15 14 16 17 16 18 ...
##  $ hours_watching_television: num  6 5 10 8 3 2 10 5 4 1 ...
##  $ perimeter                : chr  "What even is the Perimeter " "What even is the Perimeter " "Outside" "All of the Above" ...
##  $ fav_campus_eatery        : chr  "None of the above" "Other" "Blue Express" "Divinity Refectory" ...
##  $ fb_visits_per_day        : int  30 12 1 2 12 4 10 10 3 5 ...
##  $ ac                       : chr  "Yes" "Yes" "Yes" "Yes" ...
##  $ condiment                : chr  "Soy Sauce" "Maple Syrup" "Sriracha" "Sriracha" ...
##  $ vegetarian               : chr  "no" "no" "no" "no" ...
##  $ voted_for_president      : chr  "yes" "no, I was not eligible due to age or citizenship status" "yes" "yes" ...
##  $ social_club              : chr  "Greek life" "Athletic Team" "Greek life" "Independent" ...
##  $ space_time               : chr  "Time" "Time" "Time" "Time" ...
##  $ university_applications  : int  13 1 1 2 3 9 2 7 1 11 ...
##  $ pizza_consumption        : num  3 1 2 2 2 4 2 3 12 0 ...
##  $ sick                     : int  4 5 1 0 3 3 2 0 1 0 ...
##  $ games_attended           : int  1 0 0 0 2 3 3 5 7 2 ...
##  $ pepsi_or_coke            : chr  "Coke" "No preference" "Coke" "Coke" ...
##  $ fav_friend_character     : chr  "Monica" "Rachel" "Monica" "Phoebe" ...
##  $ continents_visited       : int  5 5 5 3 3 2 4 2 4 3 ...
##  $ game_of_thrones          : chr  "House Targaryen" NA "House Targaryen" NA ...
##  $ netflix_binge_show       : chr  "Friends" "Friends" "House of Cards" "Gilmore Girls" ...
##  $ chipotle_order           : chr  "Burrito" "Burrito Bowl" "Burrito Bowl" "Burrito Bowl" ...
##  $ fav_pokemon              : chr  "Charizard" "Charizard" "Charizard" "Charizard" ...
##  $ first_tooth              : int  3 10 5 6 NA 5 4 6 9 NA ...
##  $ fav_cheese               : chr  "Brie" "None of the above" "Feta" "Brie" ...
##  $ cat_or_dog               : chr  "Dog" "Dog" "Dog" "Dog" ...
##  $ fav_late_night_food      : chr  "Cookout" "Pizza" "Jimmy John's" "Cookout" ...
##  $ fav_dessert_flavor       : chr  "vanilla" "chocolate" "chocolate" "chocolate" ...
##  $ fav_off_campus_restaurant: chr  "Satisfactions" "Cosmic" "Cosmic" "Chipotle" ...
##  $ vending_machine          : chr  "Sometimes" "Sometimes" "No" "Shhhhh!" ...
##  $ multicultural            : chr  "Yes" "No" "No" "Yes" ...
##  $ smell                    : chr  "Bakery" "Clean Laundry" "Pine" "Pizza" ...
##  $ parents_age              : num  52 48 50 60 50 45 40 49 54 54 ...
##  $ marketplace_worst_meal   : chr  "chocolate covered crickets" "watery pasta" "watery pasta" "watery pasta" ...
##  $ best_book_turned_show    : chr  "Harry Potter" "Harry Potter" "Harry Potter" "Harry Potter" ...
##  $ fav_fantasy_universe     : chr  "Harry Potter" "Harry Potter" "Lord of the Rings" "Harry Potter" ...
##  $ hogwarts_house           : chr  "Hufflepuff (because I'm good at something)" "Slytherin (because I'll cut you)" "Gryffindor (because I'm a badass)" "Ravenclaw (because I'm better than you)" ...
##  $ shooters                 : num  3 1 1 2 0.4 0 2 1 4 0 ...

Question 10. Load a dataset called “Inwage.dta”

# a.    Check if the data set has been imported correctly. (HINT: use head or View)
setwd("~/Documents/R_programming")
In_wage <- read_dta ("lnwage.dta")
View(In_wage)

# b.    What’s the dimension of the data?
dim(In_wage)

## [1] 1434    6

# c.    How many variables are there?  
str(In_wage)

## tibble [1,434 × 6] (S3: tbl_df/tbl/data.frame)
##  $ lnwage: num [1:1434] 3.73 3.6 3.16 3.39 3.81 ...
##   ..- attr(*, "format.stata")= chr "%9.0g"
##  $ educ  : num [1:1434] 9 9 10.5 12 12 10.5 10.5 17.5 17.5 10.5 ...
##   ..- attr(*, "format.stata")= chr "%9.0g"
##  $ exper : num [1:1434] 9.04 7.25 2.5 26.5 13.92 ...
##   ..- attr(*, "format.stata")= chr "%9.0g"
##  $ tenure: num [1:1434] 3.4167 20.6667 0.0833 6.1667 1.3333 ...
##   ..- attr(*, "format.stata")= chr "%9.0g"
##  $ female: num [1:1434] 1 0 1 0 0 0 0 0 0 0 ...
##   ..- attr(*, "format.stata")= chr "%9.0g"
##  $ wt    : num [1:1434] 0.53 1.06 1.06 1.06 1.06 ...
##   ..- attr(*, "format.stata")= chr "%9.0g"

# Answer:3 variables

# d.    How many observations are there?
#  Answer: 1434 observations

# e.    Check whether female is a factor variable. If not, make it as a factor variable. Check the structure of the variable female again. 
class(In_wage$female)

## [1] "numeric"

typeof(In_wage$female)

## [1] "double"

# converting a numeric variable to a factor variable.
In_wage["female"] <- factor(In_wage[, "female"])

# Checking  to see that the variable is now a factor variable.
typeof(In_wage$female)

## [1] "integer"

class(In_wage$female)

## [1] "factor"

str(In_wage$female)

##  Factor w/ 1 level "c(1, 0)": NA NA NA NA NA NA NA NA NA NA ...
##  - attr(*, "names")= chr [1:1434] "female" "female" "female" "female" ...

# f.    What’s the average education (in years) among all respondents? (educ is the variable that captures educational attainment in years)  
mean(In_wage$educ)

## [1] 11.53696

Question 11. a. Enter the following into a vector with the name color. Remember to surround each piece of text with quotes.

color <- c("purple" , "red" , "yellow", "brown")
## Question 11. b.Display the 2nd element in the vector (red) in the console. 
# Answer: 
color[2]

## [1] "red"

## Question 11. c. . Enter the following into a vector with the name weight
weight <- c(23,21,18, 26 )

## Question 11. d. Join the 2 vectors together using the data.frame function to make a data frame named info with 2 columns and 4 rows. Call the first column ‘color’ and the second one ‘weight’.
info <- data.frame(color,weight)

Lab class session

Samson Olowolaju

Sept. 10, 2020

Questions

Question 1. Create a chunk and calculate the following in one chunk

Question 2. In the R Markdown file, create a second chunk that contains the following syntax

Queston 3. In the R Markdonw file

Question 5. Make a vector called score, which contains the following statistics 77, 93, 92, 68,75,100

Question 6. Assume that you are interested in cone-shaped structures, and have measured the height and radius of 6 cones. Make vectors with these values as follows:

Question 7. Compute the mean, median and standard deviation of the cone volumes. Compute also the mean of volume for the cones with a height less than 8.5

Question 8. Load a library called MASS, and load the data within the MASS library called cates

Question 9. Load a dataset called “surveyS18.csv”

Question 10. Load a dataset called “Inwage.dta”

Question 11. a. Enter the following into a vector with the name color. Remember to surround each piece of text with quotes.