Task: Using R, build a multiple regression model for data that interests you. Include in this model at least one quadratic term, one dichotomous term, and one dichotomous vs. quantitative interaction term. Interpret all coefficients. Conduct residual analysis. Was the linear model appropriate? Why or why not?

library(RCurl)
## Loading required package: bitops
library(ggplot2)
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:RCurl':
## 
##     complete
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
medicalappointment <- getURL("https://raw.githubusercontent.com/jgarcia71/Data-605-Assignments/master/Medical%20Appointment.csv")
medicalappointment_data <- read.csv(text = medicalappointment, stringsAsFactors = FALSE)
medicalappointment_data$Gender <- factor(medicalappointment_data$Gender, levels = c("M", "F"))

head(medicalappointment_data)
##   PatientId AppointmentID Gender         ScheduledDay       AppointmentDay
## 1  9.60e+13       5595387      F 2016-04-18T12:36:04Z 2016-05-05T00:00:00Z
## 2  5.27e+13       5647604      F 2016-05-02T14:01:33Z 2016-05-09T00:00:00Z
## 3  5.27e+13       5681949      F 2016-05-10T15:27:23Z 2016-05-12T00:00:00Z
## 4  4.67e+12       5668760      M 2016-05-06T09:41:40Z 2016-05-06T00:00:00Z
## 5  7.24e+12       5427853      F 2016-03-03T15:17:12Z 2016-05-17T00:00:00Z
## 6  9.82e+12       5768785      F 2016-06-03T08:01:28Z 2016-06-03T00:00:00Z
##   Age Neighbourhood Scholarship Hipertension Diabetes Alcoholism Handcap
## 1  37         QUITO           0            0        0          0       0
## 2  58         QUITO           0            0        0          0       0
## 3  58         QUITO           0            0        0          0       0
## 4  51         QUITO           0            0        0          0       0
## 5  64         QUITO           0            1        0          0       0
## 6  36         QUITO           0            0        0          0       0
##   SMS_received Medicalappt.show
## 1            0              Yes
## 2            0               No
## 3            0               No
## 4            0               No
## 5            0               No
## 6            0               No
status_table <- table(medicalappointment_data$Medicalappt.show)
status_table
## 
##    No   Yes 
## 88208 22319
ggplot(medicalappointment_data, aes(x=Medicalappt.show, fill=Medicalappt.show)) + geom_bar(colour="black") +
    scale_fill_manual(values=c("#009E73", "#E69F00"))

medicalappointment_revision <- medicalappointment_data %>% select(c("Gender", "Age", "Scholarship", "Hipertension", "Diabetes", "Alcoholism", "Handcap", "SMS_received", "Medicalappt.show"))


medicalappointment_revision[medicalappointment_revision$Medicalappt.show== "No",]$Medicalappt.show = 0
medicalappointment_revision[medicalappointment_revision$Medicalappt.show == "Yes",]$Medicalappt.show= 1

medicalappointment_revision$Medicalappt.show <- sapply(medicalappointment_revision$Medicalappt.show, as.numeric)

head(medicalappointment_revision)
##   Gender Age Scholarship Hipertension Diabetes Alcoholism Handcap
## 1      F  37           0            0        0          0       0
## 2      F  58           0            0        0          0       0
## 3      F  58           0            0        0          0       0
## 4      M  51           0            0        0          0       0
## 5      F  64           0            1        0          0       0
## 6      F  36           0            0        0          0       0
##   SMS_received Medicalappt.show
## 1            0                1
## 2            0                0
## 3            0                0
## 4            0                0
## 5            0                0
## 6            0                0
summary(medicalappointment_revision)
##  Gender         Age          Scholarship       Hipertension   
##  M:38687   Min.   : -1.00   Min.   :0.00000   Min.   :0.0000  
##  F:71840   1st Qu.: 18.00   1st Qu.:0.00000   1st Qu.:0.0000  
##            Median : 37.00   Median :0.00000   Median :0.0000  
##            Mean   : 37.09   Mean   :0.09827   Mean   :0.1972  
##            3rd Qu.: 55.00   3rd Qu.:0.00000   3rd Qu.:0.0000  
##            Max.   :115.00   Max.   :1.00000   Max.   :1.0000  
##     Diabetes         Alcoholism        Handcap         SMS_received  
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.00000   Min.   :0.000  
##  1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.000  
##  Median :0.00000   Median :0.0000   Median :0.00000   Median :0.000  
##  Mean   :0.07186   Mean   :0.0304   Mean   :0.02225   Mean   :0.321  
##  3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:1.000  
##  Max.   :1.00000   Max.   :1.0000   Max.   :4.00000   Max.   :1.000  
##  Medicalappt.show
##  Min.   :0.0000  
##  1st Qu.:0.0000  
##  Median :0.0000  
##  Mean   :0.2019  
##  3rd Qu.:0.0000  
##  Max.   :1.0000
attach(medicalappointment_revision)
medicalappointment.lm <- lm(Medicalappt.show ~ Gender + Age + Scholarship + Hipertension + Diabetes + Alcoholism + Handcap + SMS_received)
medicalappointment.lm
## 
## Call:
## lm(formula = Medicalappt.show ~ Gender + Age + Scholarship + 
##     Hipertension + Diabetes + Alcoholism + Handcap + SMS_received)
## 
## Coefficients:
##  (Intercept)       GenderF           Age   Scholarship  Hipertension  
##     0.200122      0.002627     -0.001021      0.030899     -0.009529  
##     Diabetes    Alcoholism       Handcap  SMS_received  
##     0.012640      0.020963      0.005224      0.109500
summary(medicalappointment.lm)
## 
## Call:
## lm(formula = Medicalappt.show ~ Gender + Age + Scholarship + 
##     Hipertension + Diabetes + Alcoholism + Handcap + SMS_received)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.3432 -0.2132 -0.1700 -0.1272  0.9094 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.001e-01  2.848e-03  70.267  < 2e-16 ***
## GenderF       2.627e-03  2.563e-03   1.025  0.30538    
## Age          -1.021e-03  6.102e-05 -16.733  < 2e-16 ***
## Scholarship   3.090e-02  4.073e-03   7.586 3.33e-14 ***
## Hipertension -9.529e-03  3.717e-03  -2.564  0.01036 *  
## Diabetes      1.264e-02  5.161e-03   2.449  0.01432 *  
## Alcoholism    2.096e-02  7.066e-03   2.967  0.00301 ** 
## Handcap       5.224e-03  7.437e-03   0.702  0.48241    
## SMS_received  1.095e-01  2.565e-03  42.698  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3973 on 110518 degrees of freedom
## Multiple R-squared:  0.02053,    Adjusted R-squared:  0.02045 
## F-statistic: 289.5 on 8 and 110518 DF,  p-value: < 2.2e-16
plot(fitted(medicalappointment.lm ), resid(medicalappointment.lm ))

hist(resid(medicalappointment.lm), col = "lightblue", border = "pink")

qqnorm(resid(medicalappointment.lm))
qqline(resid(medicalappointment.lm))