library(plyr)
filename <- "/Users/Audiorunner13/CUNY MSDS Course Work/R Programming Bridge/Data/Affairs.csv"
affairs <- read.csv(file=filename, header = TRUE, sep = ",")
head(affairs, 10)
typeof(affairs)
## [1] "list"
class(affairs)
## [1] "data.frame"
summary(affairs)
## X affairs gender age
## Min. : 4 Min. : 0.000 Length:601 Min. :17.50
## 1st Qu.: 528 1st Qu.: 0.000 Class :character 1st Qu.:27.00
## Median :1009 Median : 0.000 Mode :character Median :32.00
## Mean :1060 Mean : 1.456 Mean :32.49
## 3rd Qu.:1453 3rd Qu.: 0.000 3rd Qu.:37.00
## Max. :9029 Max. :12.000 Max. :57.00
## yearsmarried children religiousness education
## Min. : 0.125 Length:601 Min. :1.000 Min. : 9.00
## 1st Qu.: 4.000 Class :character 1st Qu.:2.000 1st Qu.:14.00
## Median : 7.000 Mode :character Median :3.000 Median :16.00
## Mean : 8.178 Mean :3.116 Mean :16.17
## 3rd Qu.:15.000 3rd Qu.:4.000 3rd Qu.:18.00
## Max. :15.000 Max. :5.000 Max. :20.00
## occupation rating
## Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:3.000
## Median :5.000 Median :4.000
## Mean :4.195 Mean :3.932
## 3rd Qu.:6.000 3rd Qu.:5.000
## Max. :7.000 Max. :5.000
sprintf("The mean age of the observations in the dataset: %s", mean(affairs$age))
## [1] "The mean age of the observations in the dataset: 32.4875207986689"
sprintf("The median age of the observations in the dataset: %s", median(affairs$age))
## [1] "The median age of the observations in the dataset: 32"
sprintf("The mean number of affairs of the observations in the dataset: %s", mean(affairs$affairs))
## [1] "The mean number of affairs of the observations in the dataset: 1.45590682196339"
sprintf("The median number of affairs of the observations in the dataset: %s", median(affairs$affairs))
## [1] "The median number of affairs of the observations in the dataset: 0"
affairs.men <- affairs[affairs$gender == "male", c("X","affairs","age","yearsmarried","education")]
head(affairs.men, 10)
Remove males with 0 affairs from subset
affairs.men <- affairs.men[affairs.men$affairs > 0,]
head(affairs.men, 10)
affairs.men <- rename(affairs.men, c("X"="ID", "affairs"="Number of Affairs", "age"="Age", "yearsmarried"="Years Married", "education"="Education" ))
head(affairs.men, 10)
summary(affairs.men)
## ID Number of Affairs Age Years Married
## Min. : 6 Min. : 1.000 Min. :22.00 Min. : 0.125
## 1st Qu.: 522 1st Qu.: 2.000 1st Qu.:27.00 1st Qu.: 4.000
## Median : 970 Median : 3.000 Median :32.00 Median :10.000
## Mean :1017 Mean : 5.487 Mean :34.24 Mean : 9.257
## 3rd Qu.:1560 3rd Qu.: 7.000 3rd Qu.:37.00 3rd Qu.:15.000
## Max. :1959 Max. :12.000 Max. :57.00 Max. :15.000
## Education
## Min. :12.00
## 1st Qu.:16.00
## Median :18.00
## Mean :17.17
## 3rd Qu.:18.00
## Max. :20.00
sprintf("The mean age of a male that has had an affair: %s", mean(affairs.men$Age))
## [1] "The mean age of a male that has had an affair: 34.2435897435897"
sprintf("The median age of a male that has had an affair: %s", median(affairs.men$Age))
## [1] "The median age of a male that has had an affair: 32"
affairs.men$education[affairs.men$Education == 18] <- "Master's Degree"
affairs.men$education[affairs.men$Education == 17] <- "Some Graduate Work"
affairs.men$education[affairs.men$Education == 16] <- "College Degree"
affairs.men$education[affairs.men$Education == 14] <- "Some College"
affairs.men$education[affairs.men$Education == 20] <- "Advanced Degree"
head(affairs.men, 10)
# Create a new data frame with a subset of the columns and rows. Make sure to rename it.
affairs.women <- affairs[affairs$gender == "female", c("X","affairs","age","yearsmarried","education")]
head(affairs.women, 10)
# Remove females with 0 affairs from subset
affairs.women <- affairs.women[affairs.women$affairs > 0,]
head(affairs.women, 10)
# Create new columns names for the new data frame.
affairs.women <- rename(affairs.women, c("X"="ID", "affairs"="Number of Affairs", "age"="Age", "yearsmarried"="Years Married", "education"="Education" ))
head(affairs.women, 10)
affairs.women$education[affairs.women$Education == 9] <- "Grade SChool"
affairs.women$education[affairs.women$Education == 12] <- "High School Graduate"
affairs.women$education[affairs.women$Education == 18] <- "Master's Degree"
affairs.women$education[affairs.women$Education == 17] <- "Some Graduate Work"
affairs.women$education[affairs.women$Education == 16] <- "College Degree"
affairs.women$education[affairs.women$Education == 14] <- "Some College"
affairs.women$education[affairs.women$Education == 20] <- "Advanced Degree"
head(affairs.women, 10)
# Use the Summary function to create an overview of your female data frame.
summary(affairs.women)
## ID Number of Affairs Age Years Married
## Min. : 12.0 Min. : 1.000 Min. :17.50 Min. : 0.75
## 1st Qu.: 346.8 1st Qu.: 2.000 1st Qu.:27.00 1st Qu.: 4.00
## Median : 979.0 Median : 7.000 Median :32.00 Median :10.00
## Mean :1038.4 Mean : 6.208 Mean :32.51 Mean : 9.83
## 3rd Qu.:1564.0 3rd Qu.:12.000 3rd Qu.:38.25 3rd Qu.:15.00
## Max. :9010.0 Max. :12.000 Max. :57.00 Max. :15.00
## Education education
## Min. : 9.00 Length:72
## 1st Qu.:14.00 Class :character
## Median :16.00 Mode :character
## Mean :15.25
## 3rd Qu.:17.00
## Max. :20.00
It appears that the average age of males and females that have an affair is almost equal to the average age of the entire dataset? The mean age of a male, female and dataset respectively are 32.24, 32.51 and 32.49.
The median age for men, women and the entire dataset is equal to 32.
However, one interesting statistic is that women tend have more affairs than men.
sprintf("The mean number of a male affairs is: %s", mean(affairs.men$"Number of Affairs"))
## [1] "The mean number of a male affairs is: 5.48717948717949"
sprintf("The mean number of a female affairs is: %s", mean(affairs.women$"Number of Affairs"))
## [1] "The mean number of a female affairs is: 6.20833333333333"
BONUS - place the original .csv in a github file and have R read from the link. This will be a very useful skill as you progress in your data science education and career.
library(RCurl)
filename <- getURL("https://raw.githubusercontent.com/audiorunner13/Masters-Coursework/main/Affairs.csv")
affairs_bonus <- read.csv(text = filename)
head(affairs_bonus, 10)