#Get CSV Data from github repo
theData <- "https://raw.github.com/geeman1209/MSDATA2020/master/Winter Bridge - R/HW2/affairs.csv"
#Put raw data into variable and read csv file
Affairs <- read.csv(theData)
Affairs[1:10, ]
## X naffairs kids vryunhap unhap avgmarr hapavg vryhap antirel notrel
## 1 1 0 0 0 0 0 1 0 0 0
## 2 2 0 0 0 0 0 1 0 0 0
## 3 3 3 0 0 0 0 1 0 0 0
## 4 4 0 1 0 0 0 1 0 1 0
## 5 5 3 1 0 0 0 0 1 0 0
## 6 6 0 1 0 0 0 0 1 0 0
## 7 7 0 0 0 0 1 0 0 0 1
## 8 8 0 0 0 0 0 0 1 0 1
## 9 9 7 1 0 1 0 0 0 0 0
## 10 10 0 0 0 0 1 0 0 0 1
## slghtrel smerel vryrel yrsmarr1 yrsmarr2 yrsmarr3 yrsmarr4 yrsmarr5 yrsmarr6
## 1 1 0 0 0 0 0 0 1 0
## 2 0 1 0 0 0 1 0 0 0
## 3 1 0 0 0 1 0 0 0 0
## 4 0 0 0 0 0 0 0 0 1
## 5 1 0 0 0 0 1 0 0 0
## 6 0 0 1 0 0 0 0 0 1
## 7 0 0 0 1 0 0 0 0 0
## 8 0 0 0 0 1 0 0 0 0
## 9 0 0 1 0 0 0 0 0 1
## 10 0 0 0 1 0 0 0 0 0
#Summary function on data
summary(Affairs)
## X naffairs kids vryunhap
## Min. : 1 Min. : 0.000 Min. :0.0000 Min. :0.00000
## 1st Qu.:151 1st Qu.: 0.000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :301 Median : 0.000 Median :1.0000 Median :0.00000
## Mean :301 Mean : 1.456 Mean :0.7155 Mean :0.02662
## 3rd Qu.:451 3rd Qu.: 0.000 3rd Qu.:1.0000 3rd Qu.:0.00000
## Max. :601 Max. :12.000 Max. :1.0000 Max. :1.00000
## unhap avgmarr hapavg vryhap
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.000
## Mean :0.1098 Mean :0.1547 Mean :0.3228 Mean :0.386
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:1.000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.000
## antirel notrel slghtrel smerel
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.07987 Mean :0.2729 Mean :0.2146 Mean :0.3161
## 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :1.00000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## vryrel yrsmarr1 yrsmarr2 yrsmarr3
## Min. :0.0000 Min. :0.00000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.00000 Median :0.0000 Median :0.0000
## Mean :0.1165 Mean :0.08652 Mean :0.1464 Mean :0.1747
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.00000 Max. :1.0000 Max. :1.0000
## yrsmarr4 yrsmarr5 yrsmarr6
## Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1364 Mean :0.1165 Mean :0.3394
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000
#Mean and median of two attributes. Amount of affairs and whether they have children.
AvgAffairs <- mean(Affairs$naffairs)
Median.Affairs <- median(Affairs$naffairs)
Kids <- mean(Affairs$kids)
median.kids <- median(Affairs$kids)
q <- c("Average Affairs", "Median", "Average Kids", "Median Kids")
a <- c(AvgAffairs, Median.Affairs, Kids, median.kids)
originalData <- data.frame("Column Names" = q, "Original" = a)
#Create subset of data. Wanted data of individuals who committed affair
CommitAffair <- filter(Affairs, naffairs != 0)
setnames(CommitAffair, old=c("X","naffairs", "vryunhap", "unhap", "hapavg", "vryhap", "antirel", "notrel", "slghtrel", "smerel", "vryrel", "yrsmarr1", "yrsmarr2", "yrsmarr3", "yrsmarr4", "yrsmarr5", "yrsmarr6"), new=c("Row_Number", "Total_affairs", "Very_Unhappy", "Unhappy", "Happy", "Very_Happy", "Anti_Religious", "Not_Religious", "Slightly_Religious", "Somewhat_Religious", "Very_Religious", "Married_Under_1yr", "Married_more_1yr", "Married_more_4yrs", "Married_more_7yrs", "Married_more_10yrs", "Married_more_15yrs"))
AvgAffairs2 <- mean(CommitAffair$Total_affairs)
Median.Affairs2 <- median(CommitAffair$Total_affairs)
Kids2 <- mean(CommitAffair$kids)
median.kids2 <- median(CommitAffair$kids)
summary(CommitAffair)
## Row_Number Total_affairs kids Very_Unhappy
## Min. : 3.0 Min. : 1.000 Min. :0.00 Min. :0.00000
## 1st Qu.:123.2 1st Qu.: 2.000 1st Qu.:1.00 1st Qu.:0.00000
## Median :291.5 Median : 7.000 Median :1.00 Median :0.00000
## Mean :294.6 Mean : 5.833 Mean :0.82 Mean :0.05333
## 3rd Qu.:476.8 3rd Qu.:10.750 3rd Qu.:1.00 3rd Qu.:0.00000
## Max. :598.0 Max. :12.000 Max. :1.00 Max. :1.00000
## Unhappy avgmarr Happy Very_Happy Anti_Religious
## Min. :0.00 Min. :0.00 Min. :0.00 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00 1st Qu.:0.00 1st Qu.:0.00 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00 Median :0.00 Median :0.00 Median :0.0000 Median :0.0000
## Mean :0.22 Mean :0.18 Mean :0.32 Mean :0.2267 Mean :0.1333
## 3rd Qu.:0.00 3rd Qu.:0.00 3rd Qu.:1.00 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.00 Max. :1.00 Max. :1.00 Max. :1.0000 Max. :1.0000
## Not_Religious Slightly_Religious Somewhat_Religious Very_Religious
## Min. :0.0000 Min. :0.0000 Min. :0.00 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00 1st Qu.:0.00000
## Median :0.0000 Median :0.0000 Median :0.00 Median :0.00000
## Mean :0.2733 Mean :0.2867 Mean :0.22 Mean :0.08667
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.00 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.0000 Max. :1.00 Max. :1.00000
## Married_Under_1yr Married_more_1yr Married_more_4yrs Married_more_7yrs
## Min. :0.00000 Min. :0.00 Min. :0.00 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00 1st Qu.:0.00 1st Qu.:0.0000
## Median :0.00000 Median :0.00 Median :0.00 Median :0.0000
## Mean :0.03333 Mean :0.08 Mean :0.18 Mean :0.1533
## 3rd Qu.:0.00000 3rd Qu.:0.00 3rd Qu.:0.00 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.00 Max. :1.00 Max. :1.0000
## Married_more_10yrs Married_more_15yrs
## Min. :0.00 Min. :0.0000
## 1st Qu.:0.00 1st Qu.:0.0000
## Median :0.00 Median :0.0000
## Mean :0.14 Mean :0.4133
## 3rd Qu.:0.00 3rd Qu.:1.0000
## Max. :1.00 Max. :1.0000
# Create Data Frame to compare the 2 attributes
y <- c(AvgAffairs2, Median.Affairs2, Kids2, median.kids2)
comparison <- data.frame("Column Names" = q ,"Original" = a, "Only Adulterer" = y)
comparison
## Column.Names Original Only.Adulterer
## 1 Average Affairs 1.4559068 5.833333
## 2 Median 0.0000000 7.000000
## 3 Average Kids 0.7154742 0.820000
## 4 Median Kids 1.0000000 1.000000
#Rename values
CommitAffair$kids[CommitAffair$kids == 0] <- "no"
CommitAffair$kids[CommitAffair$kids == 1] <- "yes"
CommitAffair$Very_Unhappy[CommitAffair$Very_Unhappy == 1] <- "yes"
CommitAffair$Very_Unhappy[CommitAffair$Very_Unhappy == 0] <- "no"
CommitAffair[1:10, ]
## Row_Number Total_affairs kids Very_Unhappy Unhappy avgmarr Happy Very_Happy
## 1 3 3 no no 0 0 1 0
## 2 5 3 yes no 0 0 0 1
## 3 9 7 yes no 1 0 0 0
## 4 15 12 yes no 1 0 0 0
## 5 18 1 no no 0 0 0 1
## 6 19 1 yes no 0 0 0 1
## 7 27 12 yes no 1 0 0 0
## 8 29 7 no no 0 0 1 0
## 9 32 2 yes no 0 0 1 0
## 10 35 3 yes no 1 0 0 0
## Anti_Religious Not_Religious Slightly_Religious Somewhat_Religious
## 1 0 0 1 0
## 2 0 0 1 0
## 3 0 0 0 0
## 4 0 0 1 0
## 5 0 0 0 1
## 6 0 1 0 0
## 7 0 0 0 1
## 8 0 1 0 0
## 9 0 1 0 0
## 10 0 0 0 1
## Very_Religious Married_Under_1yr Married_more_1yr Married_more_4yrs
## 1 0 0 1 0
## 2 0 0 0 1
## 3 1 0 0 0
## 4 0 0 0 0
## 5 0 1 0 0
## 6 0 0 1 0
## 7 0 0 0 0
## 8 0 0 1 0
## 9 0 0 0 0
## 10 0 0 0 0
## Married_more_7yrs Married_more_10yrs Married_more_15yrs
## 1 0 0 0
## 2 0 0 0
## 3 0 0 1
## 4 0 1 0
## 5 0 0 0
## 6 0 0 0
## 7 0 0 1
## 8 0 0 0
## 9 0 0 1
## 10 0 0 1