Lab 1 Practical Data Analyst

Read the file

salary = read.csv("C:\\Users\\User\\Documents\\UTS\\AUTUMN 2024\\TRM\\Data Analyst R Basic\\Professorial Salaries.csv")

View the dimension of salary

dim(salary)

## [1] 397   9

List variable name

names(salary)

## [1] "ID"            "Rank"          "Discipline"    "Yrs.since.phd"
## [5] "Yrs.service"   "Sex"           "NPubs"         "Ncits"        
## [9] "Salary"

List first six variable name

head(salary)

##   ID      Rank Discipline Yrs.since.phd Yrs.service  Sex NPubs Ncits Salary
## 1  1      Prof          B            19          18 Male    18    50 139750
## 2  2      Prof          B            20          16 Male     3    26 173200
## 3  3  AsstProf          B             4           3 Male     2    50  79750
## 4  4      Prof          B            45          39 Male    17    34 115000
## 5  5      Prof          B            40          41 Male    11    41 141500
## 6  6 AssocProf          B             6           6 Male     6    37  97000

Create variable ‘salary level’

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

salary = salary %>% mutate(salary.level = case_when(Salary >= 130000~ "High",
                                                    Salary>= 100000 & Salary< 130000~ "Medium",
                                                    Salary<100000~ "Low"))
head(salary)

##   ID      Rank Discipline Yrs.since.phd Yrs.service  Sex NPubs Ncits Salary
## 1  1      Prof          B            19          18 Male    18    50 139750
## 2  2      Prof          B            20          16 Male     3    26 173200
## 3  3  AsstProf          B             4           3 Male     2    50  79750
## 4  4      Prof          B            45          39 Male    17    34 115000
## 5  5      Prof          B            40          41 Male    11    41 141500
## 6  6 AssocProf          B             6           6 Male     6    37  97000
##   salary.level
## 1         High
## 2         High
## 3          Low
## 4       Medium
## 5         High
## 6          Low

Create variable ‘high.salary’

salary = salary %>% mutate(high_salary = case_when(Salary >= 130000~ "1", Salary< 130000~ "0",))
head(salary)

##   ID      Rank Discipline Yrs.since.phd Yrs.service  Sex NPubs Ncits Salary
## 1  1      Prof          B            19          18 Male    18    50 139750
## 2  2      Prof          B            20          16 Male     3    26 173200
## 3  3  AsstProf          B             4           3 Male     2    50  79750
## 4  4      Prof          B            45          39 Male    17    34 115000
## 5  5      Prof          B            40          41 Male    11    41 141500
## 6  6 AssocProf          B             6           6 Male     6    37  97000
##   salary.level high_salary
## 1         High           1
## 2         High           1
## 3          Low           0
## 4       Medium           0
## 5         High           1
## 6          Low           0

Create ‘salary.aud’

salary = salary %>% mutate(salary.aud = Salary*1.53)
head(salary)

##   ID      Rank Discipline Yrs.since.phd Yrs.service  Sex NPubs Ncits Salary
## 1  1      Prof          B            19          18 Male    18    50 139750
## 2  2      Prof          B            20          16 Male     3    26 173200
## 3  3  AsstProf          B             4           3 Male     2    50  79750
## 4  4      Prof          B            45          39 Male    17    34 115000
## 5  5      Prof          B            40          41 Male    11    41 141500
## 6  6 AssocProf          B             6           6 Male     6    37  97000
##   salary.level high_salary salary.aud
## 1         High           1   213817.5
## 2         High           1   264996.0
## 3          Low           0   122017.5
## 4       Medium           0   175950.0
## 5         High           1   216495.0
## 6          Low           0   148410.0

Subset of participant that include male participant who had high salaries

Men.A.High = salary %>% filter(Sex == "Male", Discipline == "A",salary.level == "High")
dim(Men.A.High)

## [1] 40 12

New data set including 4 variables

var.select1 = salary %>% select(ID, Rank, Salary, Sex)
names(var.select1)

## [1] "ID"     "Rank"   "Salary" "Sex"

Lab 1 Practical Data Analyst

2024-03-11

Read the file

View the dimension of salary

List variable name

List first six variable name

Create variable ‘salary level’

Create variable ‘high.salary’

Create ‘salary.aud’

Subset of participant that include male participant who had high salaries

New data set including 4 variables