Question for analysis: What are the patterns of extramarital affairs for Men and Women?

Here are some basic stats on the full sample

library(RCurl)
## Loading required package: bitops
path <- getURL("https://raw.githubusercontent.com/jreznyc/R_HW3/master/Fair.csv")
df <- read.csv(text=path, header=TRUE)
rownames(df) <- df[,1] #assign first column as rownames / numbering
df[,1] <- NULL  #remove included rownames column from inside the df
df$religious <- as.factor(df$religious)
df$occupation <- as.factor(df$occupation)
df$rate <- as.factor(df$rate)
summary(df)
##      sex           age              ym         child     religious
##  female:315   Min.   :17.50   Min.   : 0.125   no :171   1: 48    
##  male  :286   1st Qu.:27.00   1st Qu.: 4.000   yes:430   2:164    
##               Median :32.00   Median : 7.000             3:129    
##               Mean   :32.49   Mean   : 8.178             4:190    
##               3rd Qu.:37.00   3rd Qu.:15.000             5: 70    
##               Max.   :57.00   Max.   :15.000                      
##                                                                   
##    education     occupation rate      nbaffairs     
##  Min.   : 9.00   1:113      1: 16   Min.   : 0.000  
##  1st Qu.:14.00   2: 13      2: 66   1st Qu.: 0.000  
##  Median :16.00   3: 47      3: 93   Median : 0.000  
##  Mean   :16.17   4: 68      4:194   Mean   : 1.456  
##  3rd Qu.:18.00   5:204      5:232   3rd Qu.: 0.000  
##  Max.   :20.00   6:143              Max.   :12.000  
##                  7: 13

Here are the comparable details for only those with at least one affair

#second DF including only those with at least 1 affair
df2 <- df[which(df$nbaffairs >=1),]
summary(df2)
##      sex          age              ym         child     religious
##  female:72   Min.   :17.50   Min.   : 0.125   no : 27   1:20     
##  male  :78   1st Qu.:27.00   1st Qu.: 4.000   yes:123   2:41     
##              Median :32.00   Median :10.000             3:43     
##              Mean   :33.41   Mean   : 9.532             4:33     
##              3rd Qu.:37.00   3rd Qu.:15.000             5:13     
##              Max.   :57.00   Max.   :15.000                      
##                                                                  
##    education     occupation rate     nbaffairs     
##  Min.   : 9.00   1:23       1: 8   Min.   : 1.000  
##  1st Qu.:14.00   2: 3       2:33   1st Qu.: 2.000  
##  Median :17.00   3:15       3:27   Median : 7.000  
##  Mean   :16.25   4:21       4:48   Mean   : 5.833  
##  3rd Qu.:18.00   5:44       5:34   3rd Qu.:10.750  
##  Max.   :20.00   6:39              Max.   :12.000  
##                  7: 5

Here are the comparable details for only those with no affairs

#second DF including only those with at least 1 affair
df3 <- df[which(df$nbaffairs ==0),]
summary(df3)
##      sex           age              ym         child     religious
##  female:243   Min.   :17.50   Min.   : 0.125   no :144   1: 28    
##  male  :208   1st Qu.:27.00   1st Qu.: 1.500   yes:307   2:123    
##               Median :32.00   Median : 7.000             3: 86    
##               Mean   :32.18   Mean   : 7.727             4:157    
##               3rd Qu.:37.00   3rd Qu.:15.000             5: 57    
##               Max.   :57.00   Max.   :15.000                      
##                                                                   
##    education     occupation rate      nbaffairs
##  Min.   : 9.00   1: 90      1:  8   Min.   :0  
##  1st Qu.:14.00   2: 10      2: 33   1st Qu.:0  
##  Median :16.00   3: 32      3: 66   Median :0  
##  Mean   :16.14   4: 47      4:146   Mean   :0  
##  3rd Qu.:18.00   5:160      5:198   3rd Qu.:0  
##  Max.   :20.00   6:104              Max.   :0  
##                  7:  8

Percentage of Men who’ve had at least one affair

par(mfrow=c(1,2) ) 
malep <- nrow(df[which(df$sex == "male" & df$nbaffairs >=1),])/nrow(df[which(df$sex == "male"),])
slices <- c(malep, 1-malep)
lbls <- c("Yes", "No")
pct <- round(slices/sum(slices)*100)
lbls <- paste(lbls, pct) # add percents to labels 
lbls <- paste(lbls,"%",sep="") # ad % to labels 
pie(slices,labels = lbls,radius=1,xlab="Men")
mtext(side=3, text="Extramarital Affairs")
femalep <- nrow(df[which(df$sex == "female" & df$nbaffairs >=1),])/nrow(df[which(df$sex == "female"),])
slices <- c(femalep, 1-femalep)
lbls <- c("Yes", "No")
pct <- round(slices/sum(slices)*100)
lbls <- paste(lbls, pct) # add percents to labels 
lbls <- paste(lbls,"%",sep="") # ad % to labels 
pie(slices,labels = lbls, radius=1,xlab="Women")

Do children play a role?

par(mfrow=c(1,2) ) 
malep <- nrow(df[which(df$sex == "male" & df$nbaffairs >=1 & df$child=="yes"),])/nrow(df[which(df$sex == "male"),])
slices <- c(malep, 1-malep)
lbls <- c("Yes", "No")
pct <- round(slices/sum(slices)*100)
lbls <- paste(lbls, pct) # add percents to labels 
lbls <- paste(lbls,"%",sep="") # ad % to labels 
pie(slices,labels = lbls,radius=1,xlab="Men")
mtext(side=3, text="Extramarital Affairs in Couples with Children")
femalep <- nrow(df[which(df$sex == "female" & df$nbaffairs >=1 & df$child=="yes"),])/nrow(df[which(df$sex == "female"),])
slices <- c(femalep, 1-femalep)
lbls <- c("Yes", "No")
pct <- round(slices/sum(slices)*100)
lbls <- paste(lbls, pct) # add percents to labels 
lbls <- paste(lbls,"%",sep="") # ad % to labels 
pie(slices,labels = lbls, radius=1,xlab="Women")

Average number of years of marriage for men & women whom have had at least one affair.

#mean years married with >=1 affair
par(mfrow=c(1,2) ) 
mavg <- mean(df[which(df$sex == "male" & df$nbaffairs >=1),"ym"])
favg <- mean(df[which(df$sex == "female" & df$nbaffairs >=1),"ym"])
barplot(c(mavg, favg), names.arg=c("Men","Women"), ylim=c(0,10))
mtext(side=1, text=">=1 Affair")
mtext(side=3, text="Average Years of Marriage")
mavg <- mean(df[which(df$sex == "male" & df$nbaffairs ==0),"ym"])
favg <- mean(df[which(df$sex == "female" & df$nbaffairs ==0),"ym"])
barplot(c(mavg, favg),  names.arg=c("Men","Women"), ylim=c(0,10))
mtext(side=1, text="No Affairs")

Conclusions: The data shows that while Men are slightly more likely to stray, both sexes have extramarital affairs at a similar rate. Additionally, both sexes are less likely to have an extramarital affair if the individual has children. Finally, the data shows that in the case of both sexes, those who have affairs tend to be married slightly longer. The data is inconclusive as to whether this finding means that those who have affairs stay married longer or there is a threshold of length of marriage where an individual is more likely to consider having an affair.