R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# 1)

csv_file <- "https://raw.githubusercontent.com/ArcticNick/Rdataset/dd55db0eaf9da8cbe2c39bcd3b12e74885cb980d/Affairs.csv"
data <- read.csv(csv_file)

# Alternatively, having R read from the link

#my_url <- "https://vincentarelbundock.github.io/Rdatasets/csv/AER/Affairs.csv"
#data <- read.csv(my_url)

summary(data)
##        X           affairs          gender               age       
##  Min.   :   4   Min.   : 0.000   Length:601         Min.   :17.50  
##  1st Qu.: 528   1st Qu.: 0.000   Class :character   1st Qu.:27.00  
##  Median :1009   Median : 0.000   Mode  :character   Median :32.00  
##  Mean   :1060   Mean   : 1.456                      Mean   :32.49  
##  3rd Qu.:1453   3rd Qu.: 0.000                      3rd Qu.:37.00  
##  Max.   :9029   Max.   :12.000                      Max.   :57.00  
##   yearsmarried      children         religiousness     education    
##  Min.   : 0.125   Length:601         Min.   :1.000   Min.   : 9.00  
##  1st Qu.: 4.000   Class :character   1st Qu.:2.000   1st Qu.:14.00  
##  Median : 7.000   Mode  :character   Median :3.000   Median :16.00  
##  Mean   : 8.178                      Mean   :3.116   Mean   :16.17  
##  3rd Qu.:15.000                      3rd Qu.:4.000   3rd Qu.:18.00  
##  Max.   :15.000                      Max.   :5.000   Max.   :20.00  
##    occupation        rating     
##  Min.   :1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:3.000  
##  Median :5.000   Median :4.000  
##  Mean   :4.195   Mean   :3.932  
##  3rd Qu.:6.000   3rd Qu.:5.000  
##  Max.   :7.000   Max.   :5.000
mean_age <- mean(data$age)
mean_yearsmarried <- mean(data$yearsmarried)

median_age <- median(data$age)
median_yearsmarried <- median(data$yearsmarried)

print(paste("Mean of age: ", sprintf("%.2f", mean_age)))
## [1] "Mean of age:  32.49"
print(paste("Median of age: ", median_age))
## [1] "Median of age:  32"
print(paste("Mean of Years Married: ", sprintf("%.2f", mean_yearsmarried)))
## [1] "Mean of Years Married:  8.18"
print(paste("Median of yearsmarried: ", median_yearsmarried))
## [1] "Median of yearsmarried:  7"
# 2) and 3)

# subset of age, years married, and gender but for only the top 200 rows from data

subset_data <- slice(select(data, age, yearsmarried, gender), 1:200)

# adding another columns is_male and checks condition 
subset_data <- mutate(subset_data, is_male = ifelse(gender == "male", "yes", "nope"))
head(subset_data, 10)
##    age yearsmarried gender is_male
## 1   37        10.00   male     yes
## 2   27         4.00 female    nope
## 3   32        15.00 female    nope
## 4   57        15.00   male     yes
## 5   22         0.75   male     yes
## 6   32         1.50 female    nope
## 7   22         0.75 female    nope
## 8   57        15.00   male     yes
## 9   32        15.00 female    nope
## 10  22         1.50   male     yes
# 4)
#print summary and print mean and median of the subset data
summary(subset_data)
##       age         yearsmarried       gender            is_male         
##  Min.   :17.50   Min.   : 0.125   Length:200         Length:200        
##  1st Qu.:27.00   1st Qu.: 1.500   Class :character   Class :character  
##  Median :32.00   Median : 7.000   Mode  :character   Mode  :character  
##  Mean   :32.45   Mean   : 7.861                                        
##  3rd Qu.:37.00   3rd Qu.:15.000                                        
##  Max.   :57.00   Max.   :15.000
mean_age2 <- mean(subset_data$age)
mean_yearsmarried2 <- mean(subset_data$yearsmarried)

median_age2 <- median(subset_data$age)
median_yearsmarried2 <- median(subset_data$yearsmarried)

# The print the mean and median for the same two attributes
print("Means and Medians from the subset data")
## [1] "Means and Medians from the subset data"
print(paste("Mean of subset age: ", sprintf("%.2f", mean_age2)))
## [1] "Mean of subset age:  32.45"
print(paste("Mean of subset Years Married: ", sprintf("%.2f", mean_yearsmarried2)))
## [1] "Mean of subset Years Married:  7.86"
print(paste("Median of age from subset data: ", median_age2))
## [1] "Median of age from subset data:  32"
print(paste("Median of yearsmarried from the subset data: ", median_yearsmarried2))
## [1] "Median of yearsmarried from the subset data:  7"
# compare the mean and median from the two data set
if (mean_age > mean_age2) {
    print("The mean of the age from data is greater than the mean of the subset data.")
} else {
    print("The mean of the age from data is less than the mean of the subset data.")
}
## [1] "The mean of the age from data is greater than the mean of the subset data."
if (median_age > median_age2) {
    print("The median of the age from data is greater than the mean of the subset data.")
} else {
    print("The median of the age from data is less than the mean of the subset data.")
}
## [1] "The median of the age from data is less than the mean of the subset data."
# 5) Changing 3 values in a column: changing the values 1.5, 4, and 15

subset_data <- mutate(subset_data, yearsmarried = ifelse(yearsmarried == 15.00, "number changed", yearsmarried), yearsmarried = ifelse(yearsmarried == 1.5, "one point five", yearsmarried), yearsmarried = ifelse(yearsmarried == 4, "four", yearsmarried))
head(subset_data, 20)
##    age   yearsmarried gender is_male
## 1   37             10   male     yes
## 2   27           four female    nope
## 3   32 number changed female    nope
## 4   57 number changed   male     yes
## 5   22           0.75   male     yes
## 6   32 one point five female    nope
## 7   22           0.75 female    nope
## 8   57 number changed   male     yes
## 9   32 number changed female    nope
## 10  22 one point five   male     yes
## 11  37 number changed   male     yes
## 12  27           four   male     yes
## 13  47 number changed   male     yes
## 14  22 one point five female    nope
## 15  27           four female    nope
## 16  37 number changed female    nope
## 17  37 number changed female    nope
## 18  22           0.75 female    nope
## 19  22 one point five female    nope
## 20  27             10 female    nope
# 6) Display enough row to see examples of all steps 1-5 above

head(data,15)
##     X affairs gender age yearsmarried children religiousness education
## 1   4       0   male  37        10.00       no             3        18
## 2   5       0 female  27         4.00       no             4        14
## 3  11       0 female  32        15.00      yes             1        12
## 4  16       0   male  57        15.00      yes             5        18
## 5  23       0   male  22         0.75       no             2        17
## 6  29       0 female  32         1.50       no             2        17
## 7  44       0 female  22         0.75       no             2        12
## 8  45       0   male  57        15.00      yes             2        14
## 9  47       0 female  32        15.00      yes             4        16
## 10 49       0   male  22         1.50       no             4        14
## 11 50       0   male  37        15.00      yes             2        20
## 12 55       0   male  27         4.00      yes             4        18
## 13 64       0   male  47        15.00      yes             5        17
## 14 80       0 female  22         1.50       no             2        17
## 15 86       0 female  27         4.00       no             4        14
##    occupation rating
## 1           7      4
## 2           6      4
## 3           1      4
## 4           6      5
## 5           6      3
## 6           5      5
## 7           1      3
## 8           4      4
## 9           1      2
## 10          4      5
## 11          7      2
## 12          6      4
## 13          6      4
## 14          5      4
## 15          5      4
head(subset_data,15)
##    age   yearsmarried gender is_male
## 1   37             10   male     yes
## 2   27           four female    nope
## 3   32 number changed female    nope
## 4   57 number changed   male     yes
## 5   22           0.75   male     yes
## 6   32 one point five female    nope
## 7   22           0.75 female    nope
## 8   57 number changed   male     yes
## 9   32 number changed female    nope
## 10  22 one point five   male     yes
## 11  37 number changed   male     yes
## 12  27           four   male     yes
## 13  47 number changed   male     yes
## 14  22 one point five female    nope
## 15  27           four female    nope