(a) LINEAR TRANSFORMATION

data <- read_excel("practice_data.xlsx")
data
## # A tibble: 5 × 4
##      ID   Age Salary Department
##   <dbl> <dbl>  <dbl> <chr>     
## 1     1    25  40000 HR        
## 2     2    30  50000 IT        
## 3     3    22  35000 IT        
## 4     4    40  80000 Finance   
## 5     5    35  60000 HR

(b) LOGARITHMIC TRANSFORMATION

data$Salary_log <- log(data$Salary)
data
## # A tibble: 5 × 5
##      ID   Age Salary Department Salary_log
##   <dbl> <dbl>  <dbl> <chr>           <dbl>
## 1     1    25  40000 HR               10.6
## 2     2    30  50000 IT               10.8
## 3     3    22  35000 IT               10.5
## 4     4    40  80000 Finance          11.3
## 5     5    35  60000 HR               11.0

(c) EXPONENTIATION

data$Salary_exp_log <- exp(data$Salary_log)
data
## # A tibble: 5 × 6
##      ID   Age Salary Department Salary_log Salary_exp_log
##   <dbl> <dbl>  <dbl> <chr>           <dbl>          <dbl>
## 1     1    25  40000 HR               10.6         40000.
## 2     2    30  50000 IT               10.8         50000 
## 3     3    22  35000 IT               10.5         35000.
## 4     4    40  80000 Finance          11.3         80000.
## 5     5    35  60000 HR               11.0         60000

(d) CENTERING

data$Salary_centered <- data$Salary - mean(data$Salary)
data
## # A tibble: 5 × 7
##      ID   Age Salary Department Salary_log Salary_exp_log Salary_centered
##   <dbl> <dbl>  <dbl> <chr>           <dbl>          <dbl>           <dbl>
## 1     1    25  40000 HR               10.6         40000.          -13000
## 2     2    30  50000 IT               10.8         50000            -3000
## 3     3    22  35000 IT               10.5         35000.          -18000
## 4     4    40  80000 Finance          11.3         80000.           27000
## 5     5    35  60000 HR               11.0         60000             7000

(e) STANDARDIZATION (Z-SCORE)

data$Salary_z <- scale(data$Salary)
data
## # A tibble: 5 × 8
##      ID   Age Salary Department Salary_log Salary_exp_log Salary_centered
##   <dbl> <dbl>  <dbl> <chr>           <dbl>          <dbl>           <dbl>
## 1     1    25  40000 HR               10.6         40000.          -13000
## 2     2    30  50000 IT               10.8         50000            -3000
## 3     3    22  35000 IT               10.5         35000.          -18000
## 4     4    40  80000 Finance          11.3         80000.           27000
## 5     5    35  60000 HR               11.0         60000             7000
## # ℹ 1 more variable: Salary_z <dbl[,1]>

(f) DUMMY VARIABLES

data$Department_HR <- ifelse(data$Department == "HR", 1, 0)
data$Department_IT <- ifelse(data$Department == "IT", 1, 0)
data$Department_Finance <- ifelse(data$Department == "Finance", 1, 0)
data
## # A tibble: 5 × 11
##      ID   Age Salary Department Salary_log Salary_exp_log Salary_centered
##   <dbl> <dbl>  <dbl> <chr>           <dbl>          <dbl>           <dbl>
## 1     1    25  40000 HR               10.6         40000.          -13000
## 2     2    30  50000 IT               10.8         50000            -3000
## 3     3    22  35000 IT               10.5         35000.          -18000
## 4     4    40  80000 Finance          11.3         80000.           27000
## 5     5    35  60000 HR               11.0         60000             7000
## # ℹ 4 more variables: Salary_z <dbl[,1]>, Department_HR <dbl>,
## #   Department_IT <dbl>, Department_Finance <dbl>

View transformed dataset

print("Transformed Data:")
## [1] "Transformed Data:"
print(data)
## # A tibble: 5 × 11
##      ID   Age Salary Department Salary_log Salary_exp_log Salary_centered
##   <dbl> <dbl>  <dbl> <chr>           <dbl>          <dbl>           <dbl>
## 1     1    25  40000 HR               10.6         40000.          -13000
## 2     2    30  50000 IT               10.8         50000            -3000
## 3     3    22  35000 IT               10.5         35000.          -18000
## 4     4    40  80000 Finance          11.3         80000.           27000
## 5     5    35  60000 HR               11.0         60000             7000
## # ℹ 4 more variables: Salary_z <dbl[,1]>, Department_HR <dbl>,
## #   Department_IT <dbl>, Department_Finance <dbl>

Save the transformed dataset as CSV

write_xlsx(data, "practice_data_transformed.xlsx")