# input data
dta <- read.table("http://www.amstat.org/publications/jse/datasets/sat.dat.txt")

#assign variable names
names(dta) <- c("State", "Expend", "Ratio", "Salary", "Frac", "Verbal", "Math",
                "Sat")

# check data structure
str(dta)
## 'data.frame':    50 obs. of  8 variables:
##  $ State : Factor w/ 50 levels "Alabama","Alaska",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ Expend: num  4.41 8.96 4.78 4.46 4.99 ...
##  $ Ratio : num  17.2 17.6 19.3 17.1 24 18.4 14.4 16.6 19.1 16.3 ...
##  $ Salary: num  31.1 48 32.2 28.9 41.1 ...
##  $ Frac  : int  8 47 27 6 45 29 81 68 48 65 ...
##  $ Verbal: int  491 445 448 482 417 462 431 429 420 406 ...
##  $ Math  : int  538 489 496 523 485 518 477 468 469 448 ...
##  $ Sat   : int  1029 934 944 1005 902 980 908 897 889 854 ...
# look at the first 6 lines
head(dta)
##        State Expend Ratio Salary Frac Verbal Math  Sat
## 1    Alabama  4.405  17.2 31.144    8    491  538 1029
## 2     Alaska  8.963  17.6 47.951   47    445  489  934
## 3    Arizona  4.778  19.3 32.175   27    448  496  944
## 4   Arkansas  4.459  17.1 28.934    6    482  523 1005
## 5 California  4.992  24.0 41.078   45    417  485  902
## 6   Colorado  5.443  18.4 34.571   29    462  518  980
# load data management and plotting package
library(tidyverse)
## -- Attaching packages ------------------------------------------ tidyverse 1.3.0 --
## √ ggplot2 3.3.0     √ purrr   0.3.3
## √ tibble  2.1.3     √ dplyr   0.8.5
## √ tidyr   1.0.2     √ stringr 1.4.0
## √ readr   1.3.1     √ forcats 0.5.0
## -- Conflicts --------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
# create a factor variable with 3 levels from Frac
dta <- mutate(dta, Fracf = cut(Frac, breaks = c(0, 22, 49, 81),
                           labels = c("Low", "Medium", "High")))

# plot
ggplot(data=dta, aes(x=Salary, y=Sat, label=State)) +
  geom_smooth(method="lm", 
             formula= y ~ x,
             se=F, 
             color="purple", 
             linetype=1, 
             size=rel(.5)) +
    geom_smooth(aes(group = Fracf),method="lm", 
             formula= y ~ x,
             se=F, 
             color="gray", 
             linetype=2, 
             size=rel(.5))+
 geom_text(aes(color=Fracf), 
           check_overlap=TRUE, 
           show.legend=FALSE, 
           size=rel(2)) +
 labs(x="Salary ($1000)", 
      y="SAT Score") +
 theme_bw()

# end