Question for analysis: What are the patterns of extramarital affairs for Men and Women?
Here are some basic stats on the full sample
library(RCurl)
## Loading required package: bitops
path <- getURL("https://raw.githubusercontent.com/jreznyc/R_HW3/master/Fair.csv")
df <- read.csv(text=path, header=TRUE)
rownames(df) <- df[,1] #assign first column as rownames / numbering
df[,1] <- NULL #remove included rownames column from inside the df
df$religious <- as.factor(df$religious)
df$occupation <- as.factor(df$occupation)
df$rate <- as.factor(df$rate)
summary(df)
## sex age ym child religious
## female:315 Min. :17.50 Min. : 0.125 no :171 1: 48
## male :286 1st Qu.:27.00 1st Qu.: 4.000 yes:430 2:164
## Median :32.00 Median : 7.000 3:129
## Mean :32.49 Mean : 8.178 4:190
## 3rd Qu.:37.00 3rd Qu.:15.000 5: 70
## Max. :57.00 Max. :15.000
##
## education occupation rate nbaffairs
## Min. : 9.00 1:113 1: 16 Min. : 0.000
## 1st Qu.:14.00 2: 13 2: 66 1st Qu.: 0.000
## Median :16.00 3: 47 3: 93 Median : 0.000
## Mean :16.17 4: 68 4:194 Mean : 1.456
## 3rd Qu.:18.00 5:204 5:232 3rd Qu.: 0.000
## Max. :20.00 6:143 Max. :12.000
## 7: 13
Here are the comparable details for only those with at least one affair
#second DF including only those with at least 1 affair
df2 <- df[which(df$nbaffairs >=1),]
summary(df2)
## sex age ym child religious
## female:72 Min. :17.50 Min. : 0.125 no : 27 1:20
## male :78 1st Qu.:27.00 1st Qu.: 4.000 yes:123 2:41
## Median :32.00 Median :10.000 3:43
## Mean :33.41 Mean : 9.532 4:33
## 3rd Qu.:37.00 3rd Qu.:15.000 5:13
## Max. :57.00 Max. :15.000
##
## education occupation rate nbaffairs
## Min. : 9.00 1:23 1: 8 Min. : 1.000
## 1st Qu.:14.00 2: 3 2:33 1st Qu.: 2.000
## Median :17.00 3:15 3:27 Median : 7.000
## Mean :16.25 4:21 4:48 Mean : 5.833
## 3rd Qu.:18.00 5:44 5:34 3rd Qu.:10.750
## Max. :20.00 6:39 Max. :12.000
## 7: 5
Here are the comparable details for only those with no affairs
#second DF including only those with at least 1 affair
df3 <- df[which(df$nbaffairs ==0),]
summary(df3)
## sex age ym child religious
## female:243 Min. :17.50 Min. : 0.125 no :144 1: 28
## male :208 1st Qu.:27.00 1st Qu.: 1.500 yes:307 2:123
## Median :32.00 Median : 7.000 3: 86
## Mean :32.18 Mean : 7.727 4:157
## 3rd Qu.:37.00 3rd Qu.:15.000 5: 57
## Max. :57.00 Max. :15.000
##
## education occupation rate nbaffairs
## Min. : 9.00 1: 90 1: 8 Min. :0
## 1st Qu.:14.00 2: 10 2: 33 1st Qu.:0
## Median :16.00 3: 32 3: 66 Median :0
## Mean :16.14 4: 47 4:146 Mean :0
## 3rd Qu.:18.00 5:160 5:198 3rd Qu.:0
## Max. :20.00 6:104 Max. :0
## 7: 8
Percentage of Men who’ve had at least one affair
par(mfrow=c(1,2) )
malep <- nrow(df[which(df$sex == "male" & df$nbaffairs >=1),])/nrow(df[which(df$sex == "male"),])
slices <- c(malep, 1-malep)
lbls <- c("Yes", "No")
pct <- round(slices/sum(slices)*100)
lbls <- paste(lbls, pct) # add percents to labels
lbls <- paste(lbls,"%",sep="") # ad % to labels
pie(slices,labels = lbls,radius=1,xlab="Men")
mtext(side=3, text="Extramarital Affairs")
femalep <- nrow(df[which(df$sex == "female" & df$nbaffairs >=1),])/nrow(df[which(df$sex == "female"),])
slices <- c(femalep, 1-femalep)
lbls <- c("Yes", "No")
pct <- round(slices/sum(slices)*100)
lbls <- paste(lbls, pct) # add percents to labels
lbls <- paste(lbls,"%",sep="") # ad % to labels
pie(slices,labels = lbls, radius=1,xlab="Women")
Do children play a role?
par(mfrow=c(1,2) )
malep <- nrow(df[which(df$sex == "male" & df$nbaffairs >=1 & df$child=="yes"),])/nrow(df[which(df$sex == "male"),])
slices <- c(malep, 1-malep)
lbls <- c("Yes", "No")
pct <- round(slices/sum(slices)*100)
lbls <- paste(lbls, pct) # add percents to labels
lbls <- paste(lbls,"%",sep="") # ad % to labels
pie(slices,labels = lbls,radius=1,xlab="Men")
mtext(side=3, text="Extramarital Affairs in Couples with Children")
femalep <- nrow(df[which(df$sex == "female" & df$nbaffairs >=1 & df$child=="yes"),])/nrow(df[which(df$sex == "female"),])
slices <- c(femalep, 1-femalep)
lbls <- c("Yes", "No")
pct <- round(slices/sum(slices)*100)
lbls <- paste(lbls, pct) # add percents to labels
lbls <- paste(lbls,"%",sep="") # ad % to labels
pie(slices,labels = lbls, radius=1,xlab="Women")
Average number of years of marriage for men & women whom have had at least one affair.
#mean years married with >=1 affair
par(mfrow=c(1,2) )
mavg <- mean(df[which(df$sex == "male" & df$nbaffairs >=1),"ym"])
favg <- mean(df[which(df$sex == "female" & df$nbaffairs >=1),"ym"])
barplot(c(mavg, favg), names.arg=c("Men","Women"), ylim=c(0,10))
mtext(side=1, text=">=1 Affair")
mtext(side=3, text="Average Years of Marriage")
mavg <- mean(df[which(df$sex == "male" & df$nbaffairs ==0),"ym"])
favg <- mean(df[which(df$sex == "female" & df$nbaffairs ==0),"ym"])
barplot(c(mavg, favg), names.arg=c("Men","Women"), ylim=c(0,10))
mtext(side=1, text="No Affairs")
Conclusions: The data shows that while Men are slightly more likely to stray, both sexes have extramarital affairs at a similar rate. Additionally, both sexes are less likely to have an extramarital affair if the individual has children. Finally, the data shows that in the case of both sexes, those who have affairs tend to be married slightly longer. The data is inconclusive as to whether this finding means that those who have affairs stay married longer or there is a threshold of length of marriage where an individual is more likely to consider having an affair.