This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
#Data Loading setwd(“c:/Users/this one/Downloads/R Studio”) df <- read.csv(“employee_salary_dataset.csv”)
str(df)
names(df)
head(df, 15)
categorize_experience <- function(years) { if (years < 5) { return(“Junior”) } else if (years >= 5 & years <= 10) { return(“Mid-Level”) } else { return(“Senior”) } } sapply(head(df$Experience_Years), categorize_experience)
filtered_df <- df[df\(Experience_Years > 10 & df\)Department == “IT”, ] head(filtered_df)
df_salary <- df[, c(“EmployeeID”, “Monthly_Salary”)] df_details <- df[, c(“EmployeeID”, “Experience_Years”, “Age”)] joined_df <- merge(df_details, df_salary, by = “EmployeeID”) head(joined_df)
sum(is.na(df)) df_clean <- na.omit(df)
df_clean <- df_clean[!duplicated(df_clean), ]
df_sorted <- df_clean[order(-df_clean$Monthly_Salary), ] head(df_sorted)
df_renamed <- df_sorted names(df_renamed)[names(df_renamed) == “Monthly_Salary”] <- “Salary” names(df_renamed)[names(df_renamed) == “Experience_Years”] <- “Experience” names(df_renamed)
df_final <- df_renamed df_final\(Annual_Salary <- df_final\)Salary * 12 head(df_final)
set.seed(123) sample_index <- sample(1:nrow(df_final), 0.7 * nrow(df_final)) training_set <- df_final[sample_index, ] testing_set <- df_final[-sample_index, ] dim(training_set)
summary(df_final)
salary_mean <- mean(df_final\(Salary) salary_median <- median(df_final\)Salary) salary_range <- range(df_final$Salary)
get_mode <- function(v) { uniqv <- unique(v) uniqv[which.max(tabulate(match(v, uniqv)))] } salary_mode <- get_mode(df_final$Salary)
cat(“Mean Salary:”, salary_mean, “”) cat(“Median Salary:”, salary_median, “”) cat(“Mode Salary:”, salary_mode, “”) cat(“Range Salary:”, salary_range, “”)
plot(df_final\(Experience, df_final\)Salary, col = “blue”, pch = 16, main = “Scatter Plot of Salary vs Experience”, xlab = “Experience (Years)”, ylab = “Monthly Salary”)
avg_salary_dept <- aggregate(Salary ~ Department, data = df_final, FUN = mean) barplot(avg_salary_dept\(Salary, names.arg = avg_salary_dept\)Department, col = rainbow(nrow(avg_salary_dept)), main = “Average Salary by Department”, xlab = “Department”, ylab = “Average Salary”, las = 2) # Rotate labels
correlation <- cor(df_final\(Experience, df_final\)Salary, method = “pearson”) cat(“Pearson correlation between Experience and Salary:”, correlation, “”)