install.packages("Sleuth3")
install.packages("ggplot2")
Load the Sleuth3 package to the library.
library(Sleuth3)
## Warning: package 'Sleuth3' was built under R version 4.4.2
#information about the sleuth3 package
help(package=Sleuth3)
data frame
case0102
## Salary Sex
## 1 3900 Female
## 2 4020 Female
## 3 4290 Female
## 4 4380 Female
## 5 4380 Female
## 6 4380 Female
## 7 4380 Female
## 8 4380 Female
## 9 4440 Female
## 10 4500 Female
## 11 4500 Female
## 12 4620 Female
## 13 4800 Female
## 14 4800 Female
## 15 4800 Female
## 16 4800 Female
## 17 4800 Female
## 18 4800 Female
## 19 4800 Female
## 20 4800 Female
## 21 4800 Female
## 22 4800 Female
## 23 4980 Female
## 24 5100 Female
## 25 5100 Female
## 26 5100 Female
## 27 5100 Female
## 28 5100 Female
## 29 5100 Female
## 30 5160 Female
## 31 5220 Female
## 32 5220 Female
## 33 5280 Female
## 34 5280 Female
## 35 5280 Female
## 36 5400 Female
## 37 5400 Female
## 38 5400 Female
## 39 5400 Female
## 40 5400 Female
## 41 5400 Female
## 42 5400 Female
## 43 5400 Female
## 44 5400 Female
## 45 5400 Female
## 46 5400 Female
## 47 5400 Female
## 48 5520 Female
## 49 5520 Female
## 50 5580 Female
## 51 5640 Female
## 52 5700 Female
## 53 5700 Female
## 54 5700 Female
## 55 5700 Female
## 56 5700 Female
## 57 6000 Female
## 58 6000 Female
## 59 6120 Female
## 60 6300 Female
## 61 6300 Female
## 62 4620 Male
## 63 5040 Male
## 64 5100 Male
## 65 5100 Male
## 66 5220 Male
## 67 5400 Male
## 68 5400 Male
## 69 5400 Male
## 70 5400 Male
## 71 5400 Male
## 72 5700 Male
## 73 6000 Male
## 74 6000 Male
## 75 6000 Male
## 76 6000 Male
## 77 6000 Male
## 78 6000 Male
## 79 6000 Male
## 80 6000 Male
## 81 6000 Male
## 82 6000 Male
## 83 6000 Male
## 84 6000 Male
## 85 6000 Male
## 86 6300 Male
## 87 6600 Male
## 88 6600 Male
## 89 6600 Male
## 90 6840 Male
## 91 6900 Male
## 92 6900 Male
## 93 8100 Male
View(case0102)
load plotting package, create a histogram of salary from
case0102 dataframe
library(ggplot2)
qplot(case0102$Salary, geom="histogram")
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#with() function provides shortcut for refering to column
# within data frame
with(case0102, qplot(Salary, geom="histogram"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#same plot as generated by with() function
qplot(Salary, data=case0102, geom="histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

other ways of creating visuals (maybe better than above)
ggplot(data = case0102, aes(x = Salary)) +
geom_histogram() +
xlab("Hello") +
theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

we have 2nd variable "sex", lets subset to look at salary
distribution including subset sex
with(case0102, qplot(Salary[Sex=="Male"], geom="histogram"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

with(case0102, qplot(Salary[Sex=="Female"], geom="histogram"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

same histogram as female one directly above, but changing
how big the bins are
with(case0102, qplot(Salary[Sex=="Female"],
geom="histogram",
breaks=c(3800,4200,4600,5000,5400,5800,
6200,6600)))

incase you need help with the qplot() function
help(qplot)
## starting httpd help server ... done
now plotting data using boxplot instead of histogram
can change the arguments all over
qplot(Sex, Salary, data=case0102, geom="boxplot")

qplot(Sex, Salary, geom="boxplot", data=case0102)

qplot(Salary, Sex, data=case0102, geom="boxplot")

using ggplot can do the same thing
ggplot(case0102, aes(x=Sex,y=Salary)) + geom_boxplot()

ggplot(case0102, aes(x=Sex,y=Salary)) + geom_boxplot(aes(fill=Sex))

ggplot(case0102, aes(x=Sex,y=Salary)) +
geom_boxplot(aes(fill=Sex)) +
ggtitle("Starting Salaries")

base r commands, no need for packages
with(case0102, boxplot(Salary~Sex))

with(case0102, stem(Salary[Sex=="Male"]))
##
## The decimal point is 3 digit(s) to the right of the |
##
## 4 | 6
## 5 | 011244444
## 5 | 7
## 6 | 00000000000003
## 6 | 666899
## 7 |
## 7 |
## 8 | 1
with(case0102, summary(Salary[Sex=="Female"]))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3900 4800 5220 5139 5400 6300
with(case0102, is.factor(Sex))
## [1] TRUE
with(case0102, summary(Sex))
## Female Male
## 61 32
with(case0102, sd(Salary[Sex=="Female"]))
## [1] 539.8707
t.test(Salary~Sex, alternative="less", data=case0102, var.equal=TRUE)
##
## Two Sample t-test
##
## data: Salary by Sex
## t = -6.2926, df = 91, p-value = 5.378e-09
## alternative hypothesis: true difference in means between group Female and group Male is less than 0
## 95 percent confidence interval:
## -Inf -601.9965
## sample estimates:
## mean in group Female mean in group Male
## 5138.852 5956.875
tinytex::install_tinytex()