#TRM Pratical Data Analysis - Basic Level
###Task 1. Import the csv data set “Professorial Salaries” into R
salary3=read.csv("C:/Users/24544355/OneDrive - UTS/Desktop/Assignment/32931/Module10/Professorial Salaries.csv")
###Task 2. Distribution of numeric data ####2a.Create a graph to describe the distribution of professors’ salaries. Are professors’ salaries normally distributed?
library(ggplot2)
library(gridExtra)
p=ggplot(data=salary3,aes(x=Salary))
p1=p+geom_histogram(color="white", fill="blue")
p1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
library(ggplot2)
library(grid)
library(gridExtra)
p=ggplot(data=salary3,aes(x=Salary))
p1=p+geom_histogram(color="white", fill="blue")
p2=p+geom_histogram(aes(y=..density..),color="white", fill="blue")
p2=p2+geom_density(col="red")
grid.arrange(p1,p2,nrow=2,top=textGrob("Distribution of Professors' Salaries",gp=gpar(fontsize=20,font=1)))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
####2b.Create a graph to describe the differences in salaries between male and female professors. Were the professors’ salaries different between male and female professors?
library(ggplot2)
library(grid)
library(gridExtra)
p=ggplot(data=salary3,aes(x=Salary, fill=Sex))
p1=p+geom_histogram(position="dodge")
p1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
library(ggplot2)
library(grid)
library(gridExtra)
p=ggplot(data=salary3,aes(x=Salary, fill=Sex))
p1=p+geom_histogram(position="dodge")
p2=ggplot(data=salary3,aes(x=Salary, fill=Sex,color=Sex))+geom_density(alpha=0.1)
grid.arrange(p1,p2,nrow=2,top=textGrob("Distribution of Professors' Salaries by sex",gp=gpar(fontsize=20,font=1)))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
###Task 3. Distribution of categorical data ####3a. Create a graph to describe the distribution of professors’ rank. Write a sentence to interpret the graph.
library(ggplot2)
library(grid)
library(gridExtra)
p=ggplot(data=salary3,aes(x=Rank, fill=Rank,col=Rank))
p1=p+geom_bar(position="dodge")
p1
salary3$Prof.Rank = factor(salary3$Rank, levels = c("AsstProf", "AssocProf", "Prof"))
p = ggplot(data = salary3, aes(x = Prof.Rank, fill = Prof.Rank, col = Prof.Rank))
p2 = p + geom_bar(position = "dodge")
grid.arrange(p1, p2, nrow = 2, top = textGrob("Distribution of professors' rank", gp = gpar(fontsize = 20, font = 1)))
####3b. Create a graph to describe whether professors’ ranks differed between male and female professors. Write a sentence to interpret the graph.
library(ggplot2)
library(grid)
library(gridExtra)
p = ggplot(data = salary3, aes(x = Prof.Rank, fill = Sex, col = Sex))
p1 = p + geom_bar(position = "dodge")
p1 + ggtitle("Distribution of professors' rank by sex")
###Task 4. Comparison of numeric data ####4a. Create a graph to describe the differences in salaries between male and female professors. What are the differences between the graphs 2b and 4a?
library(ggplot2)
library(grid)
library(gridExtra)
p=ggplot(data=salary3,aes(x=Sex, y=Salary,fill=Sex,col=Sex))
p1=p+geom_boxplot(color="black")+geom_jitter(alpha=0.05)
p1+labs(x="Sex",y="Salaries(USD)")+ggtitle("Professors' Salaries by sex")+theme_bw()
####4b. Create a graph to describe the differences in salaries by professors’ rank and sex. What do you think about the graph?
library(ggplot2)
library(grid)
library(gridExtra)
p = ggplot(data = salary3, aes(x = Prof.Rank, y = Salary, fill = Prof.Rank, col = Prof.Rank))
p1 = p + geom_boxplot(col = "black") + geom_jitter(alpha = 0.05)
p1 + labs(x = "Rank", y = "Salaries (USD)") + ggtitle("Professors' salaries by rank") + theme_bw()
###Task 5. Correlation between numeric data ####5a. Create a graph to describe the correlation between professors’ salaries and their time in service. How were professors’ salaries correlated with their time in service?
library(ggplot2)
library(grid)
library(gridExtra)
p=ggplot(data=salary3,aes(x=Yrs.service, y=Salary))
p1=p+geom_point()+geom_smooth()+labs(x="Times in service (years)",y="Professors' Salaries(USD)")+ggtitle(" correlation between professors’ salaries and their time in service")+theme_bw()
p1
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
####5b. Create a graph to describe whether the correlation between professors’ salaries and their time in service differed between male and female professors. Write a sentence to interpret the graph.
library(ggplot2)
library(grid)
library(gridExtra)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(ggthemes)
p=ggplot(data=salary3,aes(x=Yrs.service, y=Salary,fill=Sex,col=Sex))
p2=p+geom_point()+geom_smooth(method="lm",formula = y~x+I(x^2)+I(x^3))+labs(x="Times in service (years)",y="Professors' Salaries(USD)")+ggtitle(" correlation between professors’ salaries and their time in service")+theme_bw()
p2