| Title: “Homework 1” |
| Author: “Bryan Solomon” |
| Date: “01 September, 2021” |
The Histogram of the Sam Houston University staff displays the salaries of 2,225 Sam Houston University staff members. The histogram is unimodal with a single peak and is positively skewed with a majority of occurrences to the left of the graph It has a minimum salary of $6,012 and a max salary of 456,216 for the Dean of Osteopathic Medicine. The average salary of Sam Houston University employee is $63,821.58 and the median is $55,485. The mean is influenced by the outliers, which consist of salaries greater than $300,00.00. A problem with the presentation of the histogram is that it does not show specific salaries for positions, but instead plots them according to salary amount in dollars.
library(readr, quietly = TRUE)
salaries = read_csv(file = "C:\\Users\\USER\\Desktop\\Sam Houston Salaries.csv")
names(salaries) = tolower(names(salaries))
hist(salaries$annual_salary)
Greatest histogram
library(ggplot2)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
##
## col_factor
ggplot(data = salaries,mapping = aes(annual_salary))+geom_histogram(binwidth=9379.2)+ggtitle(label="Distribution of Salaries")+xlab(label="Annual Salaries")+scale_x_continuous(labels=label_dollar())
Final histogram
summary(cars$dist)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.00 26.00 36.00 42.98 56.00 120.00
summary(cars$speed)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.0 12.0 15.0 15.4 19.0 25.0
library(ggplot2)
library(scales)
ggplot(data = salaries,mapping = aes(annual_salary))+geom_histogram(binwidth=9379.2)+ggtitle(label="Distribution of Salaries")+xlab(label="Annual Salaries")+scale_x_continuous(labels=label_dollar())
change bins
library(ggplot2)
library(scales)
ggplot(data = salaries,mapping = aes(annual_salary))+geom_histogram(binwidth=9379.25)+ggtitle(label="Distribution of Salaries")+xlab(label="Annual Salaries")+scale_x_continuous(labels=label_dollar())
summary(salaries)
## position_title home_organization_desc annual_salary
## Length:2225 Length:2225 Min. : 6012
## Class :character Class :character 1st Qu.: 40950
## Mode :character Mode :character Median : 55485
## Mean : 63822
## 3rd Qu.: 76770
## Max. :456216
# data points: 2,225
# bins: take square root and round up: 48
geom_histogram(binwidth=9379.25)
## geom_bar: na.rm = FALSE, orientation = NA
## stat_bin: binwidth = 9379.25, bins = NULL, na.rm = FALSE, orientation = NA, pad = FALSE
## position_stack
ggplot(data = salaries,mapping = aes(annual_salary, stat=..density..))+geom_density()+ggtitle(label="Distribution of Salaries")+xlab(label="Annual Salaries")+scale_x_continuous(labels=label_dollar())
summary(salaries)
## position_title home_organization_desc annual_salary
## Length:2225 Length:2225 Min. : 6012
## Class :character Class :character 1st Qu.: 40950
## Mode :character Mode :character Median : 55485
## Mean : 63822
## 3rd Qu.: 76770
## Max. :456216