data <- read_excel("practice_data.xlsx")
data
## # A tibble: 5 × 4
## ID Age Salary Department
## <dbl> <dbl> <dbl> <chr>
## 1 1 25 40000 HR
## 2 2 30 50000 IT
## 3 3 22 35000 IT
## 4 4 40 80000 Finance
## 5 5 35 60000 HR
data$Salary_log <- log(data$Salary)
data
## # A tibble: 5 × 5
## ID Age Salary Department Salary_log
## <dbl> <dbl> <dbl> <chr> <dbl>
## 1 1 25 40000 HR 10.6
## 2 2 30 50000 IT 10.8
## 3 3 22 35000 IT 10.5
## 4 4 40 80000 Finance 11.3
## 5 5 35 60000 HR 11.0
data$Salary_exp_log <- exp(data$Salary_log)
data
## # A tibble: 5 × 6
## ID Age Salary Department Salary_log Salary_exp_log
## <dbl> <dbl> <dbl> <chr> <dbl> <dbl>
## 1 1 25 40000 HR 10.6 40000.
## 2 2 30 50000 IT 10.8 50000
## 3 3 22 35000 IT 10.5 35000.
## 4 4 40 80000 Finance 11.3 80000.
## 5 5 35 60000 HR 11.0 60000
data$Salary_centered <- data$Salary - mean(data$Salary)
data
## # A tibble: 5 × 7
## ID Age Salary Department Salary_log Salary_exp_log Salary_centered
## <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 1 25 40000 HR 10.6 40000. -13000
## 2 2 30 50000 IT 10.8 50000 -3000
## 3 3 22 35000 IT 10.5 35000. -18000
## 4 4 40 80000 Finance 11.3 80000. 27000
## 5 5 35 60000 HR 11.0 60000 7000
data$Salary_z <- scale(data$Salary)
data
## # A tibble: 5 × 8
## ID Age Salary Department Salary_log Salary_exp_log Salary_centered
## <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 1 25 40000 HR 10.6 40000. -13000
## 2 2 30 50000 IT 10.8 50000 -3000
## 3 3 22 35000 IT 10.5 35000. -18000
## 4 4 40 80000 Finance 11.3 80000. 27000
## 5 5 35 60000 HR 11.0 60000 7000
## # ℹ 1 more variable: Salary_z <dbl[,1]>
data$Department_HR <- ifelse(data$Department == "HR", 1, 0)
data$Department_IT <- ifelse(data$Department == "IT", 1, 0)
data$Department_Finance <- ifelse(data$Department == "Finance", 1, 0)
data
## # A tibble: 5 × 11
## ID Age Salary Department Salary_log Salary_exp_log Salary_centered
## <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 1 25 40000 HR 10.6 40000. -13000
## 2 2 30 50000 IT 10.8 50000 -3000
## 3 3 22 35000 IT 10.5 35000. -18000
## 4 4 40 80000 Finance 11.3 80000. 27000
## 5 5 35 60000 HR 11.0 60000 7000
## # ℹ 4 more variables: Salary_z <dbl[,1]>, Department_HR <dbl>,
## # Department_IT <dbl>, Department_Finance <dbl>
print("Transformed Data:")
## [1] "Transformed Data:"
print(data)
## # A tibble: 5 × 11
## ID Age Salary Department Salary_log Salary_exp_log Salary_centered
## <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 1 25 40000 HR 10.6 40000. -13000
## 2 2 30 50000 IT 10.8 50000 -3000
## 3 3 22 35000 IT 10.5 35000. -18000
## 4 4 40 80000 Finance 11.3 80000. 27000
## 5 5 35 60000 HR 11.0 60000 7000
## # ℹ 4 more variables: Salary_z <dbl[,1]>, Department_HR <dbl>,
## # Department_IT <dbl>, Department_Finance <dbl>
write_xlsx(data, "practice_data_transformed.xlsx")