install.packages("Sleuth3")

install.packages("ggplot2")

Load the Sleuth3 package to the library.

library(Sleuth3)  
## Warning: package 'Sleuth3' was built under R version 4.4.2
#information about the sleuth3 package
help(package=Sleuth3)

data frame

case0102
##    Salary    Sex
## 1    3900 Female
## 2    4020 Female
## 3    4290 Female
## 4    4380 Female
## 5    4380 Female
## 6    4380 Female
## 7    4380 Female
## 8    4380 Female
## 9    4440 Female
## 10   4500 Female
## 11   4500 Female
## 12   4620 Female
## 13   4800 Female
## 14   4800 Female
## 15   4800 Female
## 16   4800 Female
## 17   4800 Female
## 18   4800 Female
## 19   4800 Female
## 20   4800 Female
## 21   4800 Female
## 22   4800 Female
## 23   4980 Female
## 24   5100 Female
## 25   5100 Female
## 26   5100 Female
## 27   5100 Female
## 28   5100 Female
## 29   5100 Female
## 30   5160 Female
## 31   5220 Female
## 32   5220 Female
## 33   5280 Female
## 34   5280 Female
## 35   5280 Female
## 36   5400 Female
## 37   5400 Female
## 38   5400 Female
## 39   5400 Female
## 40   5400 Female
## 41   5400 Female
## 42   5400 Female
## 43   5400 Female
## 44   5400 Female
## 45   5400 Female
## 46   5400 Female
## 47   5400 Female
## 48   5520 Female
## 49   5520 Female
## 50   5580 Female
## 51   5640 Female
## 52   5700 Female
## 53   5700 Female
## 54   5700 Female
## 55   5700 Female
## 56   5700 Female
## 57   6000 Female
## 58   6000 Female
## 59   6120 Female
## 60   6300 Female
## 61   6300 Female
## 62   4620   Male
## 63   5040   Male
## 64   5100   Male
## 65   5100   Male
## 66   5220   Male
## 67   5400   Male
## 68   5400   Male
## 69   5400   Male
## 70   5400   Male
## 71   5400   Male
## 72   5700   Male
## 73   6000   Male
## 74   6000   Male
## 75   6000   Male
## 76   6000   Male
## 77   6000   Male
## 78   6000   Male
## 79   6000   Male
## 80   6000   Male
## 81   6000   Male
## 82   6000   Male
## 83   6000   Male
## 84   6000   Male
## 85   6000   Male
## 86   6300   Male
## 87   6600   Male
## 88   6600   Male
## 89   6600   Male
## 90   6840   Male
## 91   6900   Male
## 92   6900   Male
## 93   8100   Male
View(case0102)

load plotting package, create a histogram of salary from

case0102 dataframe

library(ggplot2)
qplot(case0102$Salary, geom="histogram")
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#with() function provides shortcut for refering to column 
#  within data frame
with(case0102, qplot(Salary, geom="histogram"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#same plot as generated by with() function
qplot(Salary, data=case0102, geom="histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

other ways of creating visuals (maybe better than above)

ggplot(data = case0102, aes(x = Salary)) +
  geom_histogram() +
  xlab("Hello") +
  theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

we have 2nd variable "sex", lets subset to look at salary

distribution including subset sex

with(case0102, qplot(Salary[Sex=="Male"], geom="histogram"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

with(case0102, qplot(Salary[Sex=="Female"], geom="histogram"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

same histogram as female one directly above, but changing

how big the bins are

with(case0102, qplot(Salary[Sex=="Female"], 
                     geom="histogram",
                     breaks=c(3800,4200,4600,5000,5400,5800,
                              6200,6600)))

incase you need help with the qplot() function

help(qplot)
## starting httpd help server ... done

now plotting data using boxplot instead of histogram

can change the arguments all over

qplot(Sex, Salary, data=case0102, geom="boxplot")

qplot(Sex, Salary, geom="boxplot", data=case0102)

qplot(Salary, Sex, data=case0102, geom="boxplot")

using ggplot can do the same thing

ggplot(case0102, aes(x=Sex,y=Salary)) + geom_boxplot()

ggplot(case0102, aes(x=Sex,y=Salary)) + geom_boxplot(aes(fill=Sex))

ggplot(case0102, aes(x=Sex,y=Salary)) + 
  geom_boxplot(aes(fill=Sex)) +
  ggtitle("Starting Salaries")

base r commands, no need for packages

with(case0102, boxplot(Salary~Sex))

with(case0102, stem(Salary[Sex=="Male"]))
## 
##   The decimal point is 3 digit(s) to the right of the |
## 
##   4 | 6
##   5 | 011244444
##   5 | 7
##   6 | 00000000000003
##   6 | 666899
##   7 | 
##   7 | 
##   8 | 1
with(case0102, summary(Salary[Sex=="Female"]))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    3900    4800    5220    5139    5400    6300
with(case0102, is.factor(Sex))
## [1] TRUE
with(case0102, summary(Sex))
## Female   Male 
##     61     32
with(case0102, sd(Salary[Sex=="Female"]))
## [1] 539.8707
t.test(Salary~Sex, alternative="less", data=case0102, var.equal=TRUE)
## 
##  Two Sample t-test
## 
## data:  Salary by Sex
## t = -6.2926, df = 91, p-value = 5.378e-09
## alternative hypothesis: true difference in means between group Female and group Male is less than 0
## 95 percent confidence interval:
##       -Inf -601.9965
## sample estimates:
## mean in group Female   mean in group Male 
##             5138.852             5956.875

tinytex::install_tinytex()