HW 4

Use the dataset to replicate the plot below:

load data and check data structure

dta<-read.table("C:/Users/user/Desktop/sat_gpa.txt", header=TRUE)
str(dta)
'data.frame':   6 obs. of  5 variables:
 $ College: Factor w/ 6 levels "Barnard","Bowdoin",..: 1 6 2 4 3 5
 $ SAT_No : int  1210 1243 1200 1220 1237 1233
 $ GPA_No : num  3.08 3.1 2.85 2.9 2.7 2.62
 $ SAT_Yes: int  1317 1333 1312 1280 1308 1287
 $ GPA_Yes: num  3.3 3.24 3.12 3.04 2.94 2.8
head(dta)
          College SAT_No GPA_No SAT_Yes GPA_Yes
1         Barnard   1210   3.08    1317    3.30
2    Northwestern   1243   3.10    1333    3.24
3         Bowdoin   1200   2.85    1312    3.12
4           Colby   1220   2.90    1280    3.04
5 Carnegie Mellon   1237   2.70    1308    2.94
6    Georgia Tech   1233   2.62    1287    2.80

create two seperate file for submite==No and submite==Yes rename the column then combined by row

dta.no<-dta%>%dplyr::select(College, SAT_No, GPA_No)
dta.no$submit<-c(rep("No", length(dta.no)))
names(dta.no)<-c("collge", "SAT", "GPA", "submit")
dta.yes<-dta%>%dplyr::select(College, SAT_Yes, GPA_Yes)
dta.yes$submit<-c(rep("Yes", length(dta.yes)))
names(dta.yes)<-c("collge", "SAT", "GPA", "submit")
dta2<-rbind(dta.yes, dta.no)

plot dot

plot.new()



par(mar=c(3,1,1,1)) # set the margin 

par(oma=c(8,3,3,3)) # set outter margin

with(dta2, plot(GPA~SAT, type="n", xaxs="i", yaxs="i",
                xlim=c(1150, 1400), ylim=c(2.6, 3.4))) # xaxs/yaxs="i" set tick at 0,0

# plot dot
with (dta2[dta2$submit=="No", ], points(SAT, GPA, cex=3.5, pch=19, col="black")) 
with (dta2[dta2$submit=="Yes", ], points(SAT, GPA,  cex=3.5, pch=1))

# plot segments
with(dta, segments(SAT_No, GPA_No, SAT_Yes, GPA_Yes, lty=1,  lwd=1,col="black"))

# add legend
legend(1150, 3.4, pch=c(1, 19), cex=c(1.5, 1.5),
       legend=c("Submitted SAT Scores", "Did NOT Submit SAT Score"))
text( GPA~SAT, labels=dta$College,data=dta2[dta2$submit=="Yes", ], adj=c(-0.2, -1),cex=0.9, font=2)
text<-"Figure 1.4 The mean SAT coupled with the mean first-year GPA for the class \n of 1999 at six schools shown for those who submitted SAT scores for admis- \n sion and those who did not"
# add text for the label in X axis,Y axis and the figure note 
mtext(text, side=1, line=6, cex=1.2, outer=TRUE)
mtext(text='SAT (V+M)',side=1,line=3)

mtext(text='First Year GPA',side=2,line=1, outer=TRUE)