library(tidyr)
## Warning: package 'tidyr' was built under R version 4.5.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.3
library(tibble)
## Warning: package 'tibble' was built under R version 4.5.3
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 4.5.3
library(plotly)
## Warning: package 'plotly' was built under R version 4.5.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ppcor)
## Warning: package 'ppcor' was built under R version 4.5.3
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:plotly':
##
## select
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.3
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
students <- read.csv("placement_realistic_2000.csv")
Understanding the Data (Basic Exploration)
str(students)
## 'data.frame': 2000 obs. of 23 variables:
## $ ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Name : chr "Vihaan Sharma" "Ishaan Mehta" "Krishna Gupta" "Vihaan Joshi" ...
## $ Gender : chr "Male" "Female" "Female" "Female" ...
## $ Age : int 22 20 21 25 25 22 25 21 22 24 ...
## $ Branch : chr "ECE" "EEE" "EEE" "EEE" ...
## $ CGPA : num 6.87 5.91 8.04 5.17 7.99 5.83 8.26 9.24 7.05 6.62 ...
## $ InternalMarks : int 50 88 97 81 79 90 63 81 61 80 ...
## $ Projects : int 5 3 1 4 5 1 5 4 4 3 ...
## $ SkillsScore : int 44 50 50 65 77 38 57 34 72 91 ...
## $ CommunicationSkills: int 53 88 82 83 82 37 73 80 95 99 ...
## $ AptitudeScore : int 90 62 68 79 44 30 36 43 58 30 ...
## $ Internship : chr "Yes" "Yes" "Yes" "No" ...
## $ Certifications : int 2 1 1 4 1 2 5 0 1 2 ...
## $ Participation : int 6 3 4 2 9 3 3 4 9 9 ...
## $ ExtraActivities : chr "Sports" "None" "None" "Music" ...
## $ ParentEducation : chr "Graduate" "Postgraduate" "Postgraduate" "Graduate" ...
## $ FamilyIncome : chr "Medium" "Low" "High" "Medium" ...
## $ StressLevel : chr "Medium" "High" "Low" "Medium" ...
## $ HealthScore : int 51 86 57 85 64 88 63 88 60 91 ...
## $ PlacementStatus : chr "Placed" "Placed" "Placed" "Placed" ...
## $ CompanyName : chr "Capgemini" "Infosys" "Accenture" "Accenture" ...
## $ Package_LPA : num 8.67 4.57 11.65 7.87 7.18 ...
## $ JobRole : chr "System Engineer" "System Engineer" "Data Analyst" "Developer" ...
dim(students)
## [1] 2000 23
Answer: The dataset contains 2000 rows and multiple variables including CGPA, SkillsScore, and PlacementStatus.
colSums(is.na(students))
## ID Name Gender Age
## 0 0 0 0
## Branch CGPA InternalMarks Projects
## 0 0 0 0
## SkillsScore CommunicationSkills AptitudeScore Internship
## 0 0 0 0
## Certifications Participation ExtraActivities ParentEducation
## 0 0 0 0
## FamilyIncome StressLevel HealthScore PlacementStatus
## 0 0 0 0
## CompanyName Package_LPA JobRole
## 0 0 0
Answer: The dataset has no missing values, ensuring complete data for analysis. This eliminates the need for imputation and allows seamless filtering, grouping.
mean(students$CGPA, na.rm=TRUE)
## [1] 7.491155
median(students$CGPA, na.rm=TRUE)
## [1] 7.45
Answer: The average and median CGPA provide an overview of the academic performance of students in the dataset.
range(students$CGPA, na.rm=TRUE)
## [1] 5 10
range(students$Package_LPA, na.rm=TRUE)
## [1] 0.00 29.99
Answer: The CGPA ranges from lower to higher values, while the package range indicates variation in salary offered to students.
sort(table(students$CompanyName), decreasing = TRUE)[1:5]
##
## Capgemini Deloitte Accenture TCS Infosys
## 348 340 313 185 165
Answer: This identifies the top recruiting companies in the dataset based on the number of students selected.
DATA EXTRACTION & FILTERING
students %>%
arrange(desc(Package_LPA)) %>%
dplyr::select(Name, Branch, CGPA, SkillsScore, Package_LPA) %>%
head(10)
## Name Branch CGPA SkillsScore Package_LPA
## 1 Aarav Patel EEE 9.62 72 29.99
## 2 Arjun Sharma CE 9.51 75 29.94
## 3 Myra Joshi CSE 8.92 74 29.90
## 4 Pari Joshi ME 9.90 99 29.89
## 5 Sara Gupta IT 5.16 97 29.79
## 6 Krishna Joshi ECE 9.61 92 29.76
## 7 Ananya Patel EEE 8.70 92 29.74
## 8 Vihaan Kumar CE 8.60 42 29.72
## 9 Ishaan Joshi ECE 8.26 95 29.71
## 10 Ishaan Sharma ME 8.18 77 29.66
Answer: The top 10 students receive the highest salary packages, indicating strong academic and skill performance.
students %>%
filter(CGPA > 7, PlacementStatus == "Not Placed") %>%
arrange(desc(CGPA)) %>%
dplyr::select(Name, Branch, CGPA, SkillsScore, Internship, PlacementStatus)
## Name Branch CGPA SkillsScore Internship PlacementStatus
## 1 Arjun Agarwal IT 7.84 32 No Not Placed
## 2 Aditya Malhotra EEE 7.76 35 No Not Placed
## 3 Reyansh Malhotra CE 7.71 30 No Not Placed
## 4 Aditya Joshi EEE 7.42 37 No Not Placed
## 5 Krishna Sharma ME 7.27 31 No Not Placed
Answer:
Some students with high CGPA are not placed, indicating that factors
like skills and communication also influence placement.
students %>%
filter(SkillsScore > 75, Package_LPA > 10) %>%
arrange(desc(Package_LPA)) %>%
dplyr::select(Name, Branch, CGPA, SkillsScore, Package_LPA)
## Name Branch CGPA SkillsScore Package_LPA
## 1 Pari Joshi ME 9.90 99 29.89
## 2 Sara Gupta IT 5.16 97 29.79
## 3 Krishna Joshi ECE 9.61 92 29.76
## 4 Ananya Patel EEE 8.70 92 29.74
## 5 Ishaan Joshi ECE 8.26 95 29.71
## 6 Ishaan Sharma ME 8.18 77 29.66
## 7 Pari Sharma CSE 8.76 76 29.66
## 8 Sara Singh EEE 9.42 83 29.60
## 9 Sai Kumar IT 8.37 85 29.48
## 10 Navya Singh CSE 9.41 96 29.46
## 11 Vihaan Mehta EEE 8.43 99 29.36
## 12 Shaurya Malhotra EEE 8.74 98 29.21
## 13 Riya Mehta IT 9.44 88 29.21
## 14 Myra Gupta CSE 8.82 99 29.20
## 15 Diya Joshi CSE 6.76 99 28.96
## 16 Aadhya Sharma EEE 9.39 77 28.93
## 17 Vivaan Mehta CSE 7.30 77 28.93
## 18 Sai Malhotra IT 8.51 89 28.86
## 19 Sai Gupta CE 8.70 96 28.84
## 20 Diya Verma IT 6.25 98 28.80
## 21 Ishaan Agarwal CSE 7.77 81 28.79
## 22 Sara Singh IT 8.49 85 28.64
## 23 Shaurya Sharma ECE 7.05 90 28.60
## 24 Myra Kumar CSE 8.28 81 28.54
## 25 Aditya Patel CSE 8.74 77 28.48
## 26 Ananya Joshi EEE 9.60 84 28.31
## 27 Navya Kumar EEE 9.25 79 28.23
## 28 Anika Verma ME 9.46 96 28.21
## 29 Aadhya Malhotra CE 6.86 76 28.21
## 30 Vihaan Mehta CE 9.16 99 28.10
## 31 Krishna Verma IT 9.09 93 28.02
## 32 Anika Kumar ME 7.09 96 27.96
## 33 Arjun Singh CSE 9.23 91 27.85
## 34 Ananya Singh ME 9.38 97 27.85
## 35 Vihaan Verma ECE 5.96 99 27.73
## 36 Navya Mehta CE 7.34 88 27.62
## 37 Diya Joshi EEE 7.33 86 27.59
## 38 Arjun Malhotra ECE 8.95 98 27.44
## 39 Navya Kumar CSE 6.35 92 27.36
## 40 Diya Mehta CSE 9.63 98 27.35
## 41 Reyansh Sharma EEE 8.08 81 27.26
## 42 Diya Joshi CSE 9.86 78 27.23
## 43 Anika Gupta CE 7.31 80 27.17
## 44 Sara Sharma ECE 6.23 95 27.15
## 45 Ananya Sharma ME 7.97 92 27.09
## 46 Sai Patel CE 7.34 78 27.06
## 47 Ira Verma CSE 8.17 95 26.93
## 48 Arjun Kumar CE 7.92 87 26.92
## 49 Vihaan Agarwal IT 9.70 93 26.88
## 50 Arjun Singh CE 7.20 96 26.88
## 51 Krishna Malhotra ME 6.66 95 26.88
## 52 Diya Sharma CSE 7.81 94 26.86
## 53 Shaurya Mehta CSE 9.29 76 26.74
## 54 Navya Joshi EEE 6.00 92 26.70
## 55 Reyansh Agarwal EEE 5.07 89 26.66
## 56 Ananya Sharma CSE 9.91 97 26.58
## 57 Sara Gupta CE 7.96 77 26.40
## 58 Shaurya Mehta CE 9.35 83 26.33
## 59 Krishna Verma IT 9.87 91 26.32
## 60 Aadhya Joshi CE 7.15 83 26.32
## 61 Sai Agarwal ME 9.87 92 26.32
## 62 Myra Kumar ECE 7.64 99 26.20
## 63 Arjun Patel CE 9.29 88 26.17
## 64 Ishaan Kumar CSE 9.72 80 26.10
## 65 Ira Verma CSE 7.98 84 26.09
## 66 Arjun Joshi ME 9.04 99 26.08
## 67 Sara Patel CE 9.73 94 26.08
## 68 Navya Kumar EEE 6.83 86 26.07
## 69 Arjun Patel EEE 7.60 92 26.05
## 70 Krishna Singh IT 8.55 94 26.01
## 71 Navya Sharma CSE 8.39 81 25.86
## 72 Arjun Agarwal CSE 7.68 81 25.85
## 73 Anika Patel ME 7.57 95 25.84
## 74 Reyansh Joshi CE 9.44 87 25.83
## 75 Shaurya Sharma CSE 9.56 96 25.83
## 76 Pari Agarwal CE 7.69 88 25.79
## 77 Aadhya Sharma CE 9.96 94 25.78
## 78 Ananya Agarwal ECE 9.85 82 25.35
## 79 Aadhya Gupta CE 8.33 81 25.28
## 80 Sara Joshi IT 6.95 97 25.19
## 81 Vihaan Joshi CE 8.02 95 25.12
## 82 Aditya Mehta EEE 8.45 78 25.08
## 83 Vivaan Verma CSE 8.03 86 25.01
## 84 Ishaan Kumar IT 8.59 98 24.94
## 85 Aditya Agarwal CSE 9.94 88 24.86
## 86 Anika Joshi ME 8.91 90 24.66
## 87 Vihaan Gupta ME 8.20 90 24.63
## 88 Reyansh Agarwal CE 9.87 84 24.62
## 89 Navya Verma ME 7.98 89 24.53
## 90 Vivaan Singh ME 5.92 99 24.52
## 91 Sara Patel ECE 8.17 91 24.49
## 92 Aadhya Singh ECE 9.96 81 24.48
## 93 Sai Kumar IT 7.54 80 24.42
## 94 Reyansh Verma IT 8.66 83 24.39
## 95 Myra Joshi CE 7.40 76 24.34
## 96 Aarav Kumar ECE 9.11 94 24.30
## 97 Reyansh Sharma ME 8.92 90 24.17
## 98 Myra Gupta EEE 9.34 81 24.17
## 99 Anika Gupta ECE 7.04 86 24.09
## 100 Myra Mehta ME 9.90 95 24.07
## 101 Arjun Verma IT 9.87 80 24.05
## 102 Ira Patel CE 9.72 77 24.02
## 103 Myra Kumar EEE 8.94 82 24.00
## 104 Ira Patel ME 7.08 90 23.98
## 105 Navya Gupta CE 8.47 92 23.86
## 106 Ira Gupta CSE 9.41 85 23.86
## 107 Navya Malhotra CE 6.82 89 23.76
## 108 Vihaan Patel CSE 7.95 80 23.73
## 109 Pari Agarwal CE 6.32 85 23.50
## 110 Navya Verma ECE 8.53 93 23.46
## 111 Aarav Mehta CE 7.76 77 23.38
## 112 Sara Gupta ME 5.55 90 23.32
## 113 Aditya Agarwal ECE 9.68 87 23.31
## 114 Navya Mehta CE 6.54 83 23.27
## 115 Myra Malhotra ECE 9.50 95 23.24
## 116 Navya Kumar EEE 7.09 96 23.19
## 117 Vivaan Patel EEE 9.76 98 23.11
## 118 Shaurya Patel ECE 6.07 82 23.04
## 119 Vihaan Agarwal CSE 9.69 86 23.04
## 120 Krishna Singh IT 9.68 79 23.02
## 121 Diya Joshi CSE 9.99 89 22.98
## 122 Navya Mehta IT 6.91 85 22.95
## 123 Vihaan Singh ME 9.19 88 22.90
## 124 Anika Sharma EEE 5.23 83 22.86
## 125 Navya Malhotra EEE 8.88 96 22.85
## 126 Shaurya Gupta ME 7.65 93 22.83
## 127 Krishna Patel CE 9.11 85 22.81
## 128 Aditya Joshi EEE 9.44 96 22.79
## 129 Ishaan Patel IT 8.20 81 22.68
## 130 Vihaan Patel EEE 6.68 96 22.66
## 131 Vihaan Gupta IT 6.78 90 22.65
## 132 Arjun Malhotra CE 9.88 81 22.51
## 133 Aarav Kumar CE 8.41 81 22.50
## 134 Ananya Verma CSE 7.94 88 22.40
## 135 Ananya Verma EEE 7.02 95 22.35
## 136 Riya Patel CE 8.66 77 22.29
## 137 Ananya Sharma ME 5.54 90 22.21
## 138 Aarav Gupta IT 9.35 99 22.18
## 139 Krishna Patel EEE 9.71 95 22.17
## 140 Riya Joshi ECE 9.84 77 22.08
## 141 Aadhya Sharma EEE 7.70 98 22.08
## 142 Navya Singh CE 9.03 97 22.02
## 143 Sara Kumar ME 8.48 95 21.97
## 144 Sai Singh ME 6.44 90 21.92
## 145 Reyansh Agarwal ECE 8.04 90 21.91
## 146 Ananya Singh IT 8.83 97 21.85
## 147 Aarav Singh EEE 7.28 83 21.83
## 148 Aarav Kumar EEE 5.13 94 21.80
## 149 Diya Joshi ME 9.22 78 21.77
## 150 Shaurya Patel ECE 9.41 76 21.74
## 151 Vivaan Verma ME 7.16 89 21.72
## 152 Reyansh Mehta CE 5.22 99 21.72
## 153 Anika Malhotra CSE 7.39 77 21.70
## 154 Arjun Kumar ECE 5.90 97 21.69
## 155 Ishaan Verma EEE 9.45 88 21.69
## 156 Aarav Gupta CSE 9.99 86 21.52
## 157 Ishaan Singh IT 6.50 93 21.47
## 158 Vihaan Gupta ME 5.43 99 21.43
## 159 Sai Gupta ME 6.19 99 21.35
## 160 Myra Patel EEE 9.63 80 21.31
## 161 Ishaan Verma ECE 9.34 80 21.25
## 162 Aditya Kumar EEE 7.36 79 21.23
## 163 Vihaan Patel ME 8.98 97 21.16
## 164 Krishna Patel ME 6.96 78 21.14
## 165 Diya Verma IT 9.35 99 21.13
## 166 Navya Joshi CE 9.31 93 21.10
## 167 Riya Verma ECE 9.21 94 20.82
## 168 Shaurya Gupta CE 6.69 99 20.70
## 169 Ira Gupta CE 9.53 89 20.59
## 170 Riya Singh EEE 9.73 98 20.55
## 171 Ishaan Mehta IT 7.76 79 20.44
## 172 Vivaan Sharma CSE 8.15 85 20.26
## 173 Reyansh Mehta CSE 6.32 91 20.10
## 174 Shaurya Patel CE 8.11 80 19.97
## 175 Ishaan Patel ME 8.12 89 19.95
## 176 Myra Patel CSE 6.57 95 19.82
## 177 Ananya Patel CSE 7.75 86 19.82
## 178 Krishna Malhotra ME 6.49 81 19.80
## 179 Diya Singh IT 7.08 93 19.77
## 180 Myra Verma IT 9.43 78 19.73
## 181 Vihaan Agarwal CE 5.90 98 19.70
## 182 Anika Singh CSE 7.48 84 19.64
## 183 Shaurya Agarwal IT 7.11 82 19.60
## 184 Vihaan Singh EEE 5.42 99 19.50
## 185 Myra Patel EEE 8.78 79 19.41
## 186 Sai Malhotra CE 9.51 77 19.38
## 187 Sai Patel ME 8.91 86 19.19
## 188 Ananya Agarwal ME 5.90 93 19.14
## 189 Ira Mehta IT 9.56 92 19.09
## 190 Pari Joshi CSE 9.90 82 19.08
## 191 Sara Verma CE 9.94 85 19.01
## 192 Pari Sharma ME 7.43 88 18.92
## 193 Aadhya Verma CSE 9.46 91 18.92
## 194 Vihaan Mehta CE 8.56 92 18.92
## 195 Shaurya Joshi EEE 7.47 98 18.91
## 196 Pari Mehta IT 8.65 88 18.91
## 197 Reyansh Sharma ME 8.79 85 18.86
## 198 Pari Agarwal IT 8.81 88 18.84
## 199 Pari Agarwal CSE 9.65 99 18.82
## 200 Diya Mehta IT 7.56 81 18.76
## 201 Navya Mehta ECE 9.84 86 18.76
## 202 Navya Verma IT 8.59 91 18.76
## 203 Shaurya Agarwal EEE 8.66 94 18.55
## 204 Ira Mehta EEE 7.14 86 18.54
## 205 Sara Verma IT 8.16 81 18.54
## 206 Vihaan Singh CE 7.30 80 18.54
## 207 Ananya Verma EEE 5.70 97 18.25
## 208 Aadhya Gupta EEE 6.16 88 18.06
## 209 Pari Singh IT 8.72 81 18.00
## 210 Pari Singh ECE 5.85 85 18.00
## 211 Aditya Patel ME 8.08 84 18.00
## 212 Navya Agarwal ME 8.04 88 17.95
## 213 Krishna Singh ECE 7.42 89 17.91
## 214 Reyansh Agarwal CSE 7.04 81 17.88
## 215 Aadhya Patel IT 9.40 79 17.84
## 216 Shaurya Joshi CE 8.64 76 17.62
## 217 Aditya Malhotra EEE 6.75 93 17.61
## 218 Aditya Gupta CSE 9.21 99 17.57
## 219 Diya Mehta IT 8.53 99 17.47
## 220 Reyansh Kumar EEE 8.71 98 17.45
## 221 Vihaan Sharma CE 9.50 78 17.44
## 222 Shaurya Gupta EEE 6.79 80 17.30
## 223 Vivaan Verma IT 9.01 78 17.28
## 224 Shaurya Patel ME 9.47 86 17.23
## 225 Aditya Singh ECE 6.80 95 17.23
## 226 Vivaan Sharma CSE 9.48 83 17.17
## 227 Diya Patel CSE 6.62 89 17.11
## 228 Pari Patel CE 6.59 93 17.09
## 229 Shaurya Singh ECE 8.14 96 17.04
## 230 Ishaan Malhotra ME 7.90 94 17.03
## 231 Krishna Gupta IT 7.25 91 16.75
## 232 Sara Sharma ME 8.49 85 16.74
## 233 Krishna Agarwal ME 9.48 79 16.62
## 234 Pari Verma ECE 7.79 91 16.55
## 235 Navya Agarwal IT 9.95 99 16.55
## 236 Aadhya Malhotra ME 8.60 91 16.48
## 237 Krishna Mehta CE 7.07 80 16.47
## 238 Aarav Joshi ECE 6.12 95 16.27
## 239 Ishaan Verma ECE 9.48 94 16.16
## 240 Aarav Gupta CE 9.25 97 16.15
## 241 Vihaan Kumar ME 7.97 99 16.05
## 242 Sara Patel IT 9.41 83 16.03
## 243 Diya Agarwal ECE 8.32 87 15.97
## 244 Ira Patel IT 9.51 76 15.91
## 245 Aadhya Gupta EEE 7.20 89 15.89
## 246 Anika Joshi IT 8.56 93 15.81
## 247 Myra Singh IT 8.42 96 15.80
## 248 Vivaan Agarwal CSE 6.04 94 15.74
## 249 Pari Gupta IT 7.09 99 15.69
## 250 Myra Sharma EEE 8.63 84 15.68
## 251 Vihaan Gupta CE 9.05 92 15.52
## 252 Sara Kumar ME 7.96 82 15.51
## 253 Sara Verma ME 6.35 96 15.44
## 254 Myra Sharma EEE 8.42 88 15.38
## 255 Shaurya Joshi ME 7.70 82 15.37
## 256 Arjun Kumar IT 5.47 98 15.36
## 257 Sai Sharma ME 9.18 88 15.36
## 258 Reyansh Gupta CSE 8.70 97 15.22
## 259 Sara Kumar CE 7.44 86 15.20
## 260 Sai Singh ECE 9.06 90 15.15
## 261 Vivaan Verma IT 7.52 98 15.09
## 262 Sara Patel EEE 7.96 89 15.09
## 263 Sara Mehta ME 8.60 94 15.03
## 264 Aadhya Patel IT 7.97 97 14.98
## 265 Anika Singh ECE 9.66 99 14.87
## 266 Diya Sharma CSE 5.91 96 14.85
## 267 Aadhya Mehta CE 9.05 93 14.79
## 268 Shaurya Patel IT 8.57 79 14.73
## 269 Reyansh Malhotra EEE 9.48 87 14.65
## 270 Vihaan Mehta EEE 8.06 94 14.64
## 271 Aditya Agarwal IT 5.56 89 14.61
## 272 Pari Gupta CSE 7.16 86 14.57
## 273 Reyansh Agarwal ECE 6.73 89 14.56
## 274 Anika Agarwal ME 9.84 77 14.40
## 275 Vivaan Agarwal IT 7.57 94 14.35
## 276 Vivaan Patel ME 7.55 90 14.35
## 277 Pari Patel ECE 8.50 99 14.33
## 278 Pari Kumar ECE 8.60 86 14.32
## 279 Aditya Gupta EEE 9.52 83 14.28
## 280 Arjun Agarwal CE 8.96 81 14.22
## 281 Aadhya Patel EEE 8.26 95 14.18
## 282 Vihaan Patel CE 7.72 99 14.18
## 283 Krishna Agarwal ECE 9.23 80 14.06
## 284 Aadhya Joshi EEE 9.44 95 14.04
## 285 Sai Joshi ME 8.95 76 14.00
## 286 Ira Kumar EEE 8.19 87 13.86
## 287 Vivaan Singh CE 8.72 84 13.84
## 288 Aadhya Joshi ME 8.24 94 13.80
## 289 Riya Mehta EEE 9.16 96 13.69
## 290 Pari Malhotra EEE 7.02 91 13.69
## 291 Vivaan Patel ME 7.11 99 13.61
## 292 Shaurya Agarwal CE 9.88 78 13.54
## 293 Pari Mehta IT 9.11 98 13.49
## 294 Sai Malhotra IT 7.45 76 13.35
## 295 Vihaan Mehta CSE 9.72 82 13.26
## 296 Navya Verma CE 9.85 81 13.24
## 297 Aarav Kumar IT 8.57 96 13.23
## 298 Diya Verma ECE 7.36 91 13.21
## 299 Riya Patel IT 8.98 85 13.00
## 300 Ishaan Agarwal ECE 7.58 94 12.97
## 301 Sai Patel CSE 7.95 84 12.93
## 302 Pari Malhotra ME 9.92 87 12.91
## 303 Anika Kumar ME 7.56 80 12.87
## 304 Sara Joshi ME 6.48 94 12.79
## 305 Ira Patel CSE 8.63 87 12.71
## 306 Ishaan Gupta IT 7.50 99 12.62
## 307 Aditya Verma EEE 8.04 77 12.48
## 308 Ananya Malhotra CSE 9.72 86 12.44
## 309 Arjun Gupta EEE 8.48 96 12.36
## 310 Riya Joshi CSE 8.53 76 12.34
## 311 Anika Agarwal EEE 8.97 89 12.25
## 312 Reyansh Verma IT 9.92 89 12.23
## 313 Pari Kumar ECE 9.47 94 12.21
## 314 Ishaan Sharma CE 8.78 81 12.20
## 315 Myra Kumar ME 8.33 88 12.16
## 316 Myra Gupta CSE 6.60 99 12.03
## 317 Aadhya Joshi EEE 7.85 98 12.00
## 318 Riya Malhotra ME 8.43 96 11.98
## 319 Aadhya Gupta ME 6.44 76 11.96
## 320 Sai Gupta CE 5.28 76 11.95
## 321 Aditya Joshi CE 7.01 86 11.90
## 322 Reyansh Singh EEE 7.42 79 11.90
## 323 Anika Patel IT 9.12 82 11.90
## 324 Aadhya Verma CSE 6.94 80 11.90
## 325 Vivaan Patel IT 9.95 83 11.90
## 326 Shaurya Joshi CSE 5.35 85 11.89
## 327 Pari Malhotra CSE 7.64 88 11.89
## 328 Riya Singh EEE 5.76 93 11.89
## 329 Ira Singh CSE 6.77 79 11.85
## 330 Riya Mehta ME 6.81 82 11.85
## 331 Ananya Verma CE 7.71 88 11.84
## 332 Ira Kumar ECE 5.13 80 11.84
## 333 Krishna Malhotra EEE 8.28 84 11.83
## 334 Diya Singh ME 9.47 77 11.81
## 335 Anika Patel CSE 8.23 96 11.80
## 336 Vihaan Verma ME 7.32 93 11.75
## 337 Arjun Gupta IT 6.04 83 11.75
## 338 Sara Singh ECE 7.01 95 11.74
## 339 Riya Verma ME 5.54 85 11.71
## 340 Myra Sharma ME 8.27 85 11.68
## 341 Shaurya Mehta ME 8.88 85 11.64
## 342 Aarav Agarwal CSE 5.96 81 11.64
## 343 Ira Malhotra IT 5.05 84 11.63
## 344 Vihaan Mehta CSE 7.71 92 11.59
## 345 Pari Mehta CSE 7.78 76 11.55
## 346 Diya Verma CSE 5.98 92 11.55
## 347 Krishna Malhotra ECE 7.95 92 11.53
## 348 Arjun Patel EEE 5.35 96 11.53
## 349 Reyansh Patel EEE 6.22 87 11.52
## 350 Aadhya Mehta ECE 6.07 78 11.48
## 351 Riya Malhotra CSE 5.42 97 11.47
## 352 Ananya Kumar EEE 5.98 85 11.47
## 353 Reyansh Mehta IT 9.81 82 11.46
## 354 Shaurya Verma ME 5.17 82 11.42
## 355 Pari Singh CE 7.70 89 11.41
## 356 Ananya Agarwal ME 7.14 96 11.41
## 357 Sara Malhotra CE 7.43 99 11.39
## 358 Aadhya Kumar EEE 9.61 97 11.38
## 359 Ananya Singh ECE 5.56 76 11.37
## 360 Vivaan Verma IT 8.80 79 11.37
## 361 Reyansh Mehta ECE 6.19 78 11.35
## 362 Vihaan Agarwal CSE 9.93 85 11.32
## 363 Aditya Verma ME 6.39 80 11.31
## 364 Sara Singh ME 8.69 98 11.24
## 365 Diya Joshi EEE 5.12 84 11.24
## 366 Vivaan Kumar ECE 9.32 91 11.21
## 367 Aadhya Patel ECE 8.56 82 11.19
## 368 Navya Agarwal IT 5.32 78 11.19
## 369 Diya Agarwal EEE 8.14 86 11.18
## 370 Krishna Patel EEE 8.22 95 11.13
## 371 Ira Verma CSE 6.52 81 11.11
## 372 Vivaan Kumar IT 6.08 97 11.10
## 373 Reyansh Patel EEE 5.43 76 11.10
## 374 Vihaan Malhotra CE 9.48 79 11.10
## 375 Anika Malhotra IT 8.69 79 11.09
## 376 Navya Malhotra ECE 6.29 78 11.08
## 377 Aditya Singh EEE 5.74 86 11.07
## 378 Diya Joshi ECE 7.74 81 11.04
## 379 Vihaan Singh IT 7.95 91 10.99
## 380 Sai Sharma ME 6.77 87 10.98
## 381 Reyansh Verma ME 7.19 85 10.98
## 382 Arjun Joshi ME 8.66 77 10.97
## 383 Myra Joshi IT 9.49 81 10.95
## 384 Ishaan Gupta CE 5.27 88 10.95
## 385 Vivaan Verma ECE 6.77 98 10.94
## 386 Shaurya Kumar ECE 7.89 93 10.91
## 387 Aadhya Verma ME 7.14 76 10.91
## 388 Anika Malhotra IT 7.58 91 10.89
## 389 Ananya Patel EEE 8.64 80 10.89
## 390 Navya Mehta IT 6.95 87 10.86
## 391 Aditya Kumar CE 8.28 87 10.85
## 392 Ira Joshi CE 5.25 89 10.83
## 393 Riya Sharma EEE 8.58 98 10.82
## 394 Arjun Singh EEE 6.13 91 10.81
## 395 Myra Kumar ME 6.41 91 10.81
## 396 Sara Mehta EEE 5.07 92 10.79
## 397 Ishaan Singh EEE 6.70 84 10.77
## 398 Aditya Mehta ECE 5.58 92 10.77
## 399 Diya Singh ECE 6.21 95 10.76
## 400 Arjun Kumar EEE 7.41 76 10.76
## 401 Anika Agarwal ECE 8.32 81 10.71
## 402 Sai Singh IT 5.23 86 10.68
## 403 Aadhya Singh ME 8.93 87 10.68
## 404 Navya Patel CE 9.64 87 10.67
## 405 Ira Joshi ECE 7.65 79 10.66
## 406 Aadhya Patel IT 8.02 86 10.66
## 407 Pari Kumar CSE 5.71 84 10.65
## 408 Arjun Agarwal CE 7.03 91 10.65
## 409 Aditya Agarwal ECE 6.15 77 10.61
## 410 Anika Singh CE 8.58 95 10.60
## 411 Aditya Patel ECE 9.84 89 10.59
## 412 Arjun Malhotra ECE 7.79 77 10.54
## 413 Aarav Verma ME 7.30 96 10.53
## 414 Diya Gupta ECE 5.70 99 10.49
## 415 Ira Singh EEE 8.88 87 10.49
## 416 Diya Sharma CSE 6.77 76 10.48
## 417 Navya Sharma CE 5.56 89 10.47
## 418 Shaurya Gupta EEE 5.73 88 10.47
## 419 Navya Joshi CSE 6.15 92 10.42
## 420 Reyansh Malhotra IT 5.48 93 10.40
## 421 Navya Kumar ECE 9.21 85 10.40
## 422 Anika Joshi CE 8.44 82 10.40
## 423 Sai Verma CSE 9.23 80 10.40
## 424 Sara Verma EEE 5.07 94 10.38
## 425 Navya Mehta ME 5.57 86 10.35
## 426 Ishaan Gupta CE 5.05 80 10.34
## 427 Anika Joshi IT 5.00 89 10.34
## 428 Ananya Malhotra EEE 6.06 80 10.34
## 429 Arjun Verma IT 7.86 90 10.34
## 430 Aarav Agarwal EEE 8.58 81 10.33
## 431 Vihaan Sharma CE 6.69 76 10.30
## 432 Reyansh Joshi CSE 6.69 88 10.22
## 433 Ira Joshi IT 5.46 95 10.22
## 434 Ira Agarwal ECE 8.93 83 10.18
## 435 Krishna Joshi EEE 8.95 83 10.17
## 436 Aditya Joshi IT 5.77 88 10.16
## 437 Arjun Mehta CE 8.60 96 10.15
## 438 Myra Verma CSE 6.39 89 10.12
## 439 Myra Malhotra ME 9.36 84 10.12
## 440 Ananya Singh IT 7.50 81 10.10
## 441 Ananya Singh EEE 5.76 86 10.08
## 442 Aadhya Joshi EEE 7.50 85 10.07
## 443 Vivaan Malhotra IT 5.64 78 10.06
## 444 Pari Kumar EEE 7.38 91 10.04
head(10)
## [1] 10
Answer: Students with higher skill scores tend to receive higher salary packages, highlighting the importance of technical expertise in placement
students <- students %>%
mutate(Readiness = case_when(
CGPA > 8 & SkillsScore > 75 ~ "Highly Ready",
CGPA > 6.5 ~ "Moderately Ready",
TRUE ~ "Low Ready"
))
table(students$Readiness)
##
## Highly Ready Low Ready Moderately Ready
## 301 599 1100
Answer:
Students are classified into readiness levels based on CGPA and skills,
helping identify their preparedness for placement opportunities.
placement_prediction <- function(cgpa, skills){
if(cgpa > 8 & skills > 75){
return("High Chance")
} else if(cgpa > 6.5){
return("Moderate Chance")
} else {
return("Low Chance")
}
}
students$PlacementChance <- mapply(placement_prediction,
students$CGPA,
students$SkillsScore)
head(students$PlacementChance,10)
## [1] "Moderate Chance" "Low Chance" "Moderate Chance" "Low Chance"
## [5] "Moderate Chance" "Low Chance" "Moderate Chance" "Moderate Chance"
## [9] "Moderate Chance" "Moderate Chance"
table(students$PlacementChance)
##
## High Chance Low Chance Moderate Chance
## 301 599 1100
Answer:
The function is applied to the entire dataset to classify all students
into different placement readiness levels.
Group-Wise Analysis
This section focuses on analyzing the dataset by grouping data based on different categories such as branch, company, and internship status. It helps in identifying patterns and relationships between variables.
students %>%
filter(PlacementStatus == "Placed") %>%
group_by(CompanyName) %>%
summarise(avg_package = mean(Package_LPA, na.rm = TRUE))
## # A tibble: 9 × 2
## CompanyName avg_package
## <chr> <dbl>
## 1 Accenture 8.99
## 2 Amazon 20.8
## 3 Capgemini 9.04
## 4 Deloitte 9.01
## 5 Google 21.2
## 6 Infosys 4.42
## 7 Microsoft 20.7
## 8 TCS 4.49
## 9 Wipro 4.48
Answer: Different companies offer varying salary packages, with top-tier companies providing higher average salaries.
placement_rate <- students %>%
count(Branch, PlacementStatus) %>%
group_by(Branch) %>%
mutate(rate = n / sum(n))
placement_rate
## # A tibble: 12 × 4
## # Groups: Branch [6]
## Branch PlacementStatus n rate
## <chr> <chr> <int> <dbl>
## 1 CE Not Placed 13 0.0368
## 2 CE Placed 340 0.963
## 3 CSE Not Placed 6 0.0177
## 4 CSE Placed 333 0.982
## 5 ECE Not Placed 5 0.0164
## 6 ECE Placed 300 0.984
## 7 EEE Not Placed 11 0.0309
## 8 EEE Placed 345 0.969
## 9 IT Not Placed 11 0.0350
## 10 IT Placed 303 0.965
## 11 ME Not Placed 6 0.0180
## 12 ME Placed 327 0.982
Answer: This calculates the proportion of placed students in each branch, helping identify branches with higher placement success.
students %>%
group_by(Branch) %>%
summarise(
avg_skills = round(mean(SkillsScore, na.rm = TRUE), 2),
total_students = n()
) %>%
arrange(desc(avg_skills)) %>%
mutate(rank = row_number())
## # A tibble: 6 × 4
## Branch avg_skills total_students rank
## <chr> <dbl> <int> <int>
## 1 IT 66.8 314 1
## 2 ME 66.2 333 2
## 3 EEE 65.5 356 3
## 4 CE 65.3 353 4
## 5 CSE 64.5 339 5
## 6 ECE 64.1 305 6
Answer:
The analysis compares the average skill scores across different
branches.
Branches with higher average skill scores indicate stronger technical
capabilities among students.
The ranking helps identify the top-performing branches in terms of
skills.
Data Visualization
This section uses graphical techniques to visualize relationships and patterns in the dataset.
ggplot(students, aes(x = CGPA, fill = PlacementStatus)) +
geom_histogram(bins = 20, alpha = 0.7) +
facet_wrap(~PlacementStatus) +
labs(title = "CGPA Distribution by Placement Status",
x = "CGPA",
y = "Frequency")
Answer: The histogram shows the distribution of CGPA among students, indicating how academic performance is spread across the dataset.
students %>%
group_by(Branch) %>%
summarise(avg_package = mean(Package_LPA, na.rm = TRUE)) %>%
ggplot(aes(x = Branch, y = avg_package, group = 1)) +
geom_line(color = "red") +
geom_point(size = 3) +
labs(title = "Average Package by Branch",
x = "Branch",
y = "Average Package (LPA)")
Answer: The line chart shows how the average salary package varies across branches, helping identify trends and differences in placement outcomes.
ggplot(students, aes(x = CGPA, y = Package_LPA, color = PlacementStatus)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "CGPA vs Salary Package with Trend Line",
x = "CGPA",
y = "Package (LPA)")
## `geom_smooth()` using formula = 'y ~ x'
Answer:
The scatter plot with a regression trend line shows a positive
relationship between CGPA and salary package.
Students with higher CGPA tend to receive better salary offers, and the
trend varies based on placement status.
result <- students %>%
filter(PlacementStatus == "Placed") %>%
mutate(Readiness = ifelse(CGPA > 8 & SkillsScore > 75,
"Highly Ready",
"Low Ready")) %>%
group_by(Branch, Readiness) %>%
summarise(
avg_package = mean(Package_LPA, na.rm = TRUE),
total_students = n(),
.groups = "drop"
) %>%
arrange(desc(avg_package))
result
## # A tibble: 12 × 4
## Branch Readiness avg_package total_students
## <chr> <chr> <dbl> <int>
## 1 CSE Highly Ready 18.2 45
## 2 IT Highly Ready 17.2 52
## 3 EEE Highly Ready 16.6 52
## 4 ME Highly Ready 16.0 50
## 5 ECE Highly Ready 16.0 42
## 6 CE Highly Ready 15.4 60
## 7 CSE Low Ready 9.91 288
## 8 EEE Low Ready 9.81 293
## 9 ME Low Ready 9.61 277
## 10 CE Low Ready 9.56 280
## 11 IT Low Ready 8.66 251
## 12 ECE Low Ready 8.65 258
ggplot(result, aes(x = Branch, y = avg_package, fill = Readiness)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Average Package by Branch and Readiness",
x = "Branch",
y = "Average Package (LPA)")
Answer: This analysis groups students based on branch and placement readiness and calculates the average salary package. It shows that students categorized as highly ready tend to receive higher salary packages across different branches, highlighting the importance of both academic performance and skills in placement outcomes.
Q1 <- quantile(students$Package_LPA, 0.25)
Q3 <- quantile(students$Package_LPA, 0.75)
iqr_val <- IQR(students$Package_LPA)
outliers <- students %>%
filter(Package_LPA > (Q3 + 1.5*iqr_val))
head(outliers,10)
## ID Name Gender Age Branch CGPA InternalMarks Projects SkillsScore
## 1 28 Reyansh Verma Female 20 CE 9.75 95 5 63
## 2 42 Ananya Sharma Female 20 ME 7.97 74 1 92
## 3 59 Arjun Gupta Male 21 EEE 9.07 79 3 55
## 4 60 Myra Malhotra Male 25 ECE 9.50 45 3 95
## 5 67 Ananya Verma Female 25 CSE 7.94 92 1 88
## 6 69 Sai Patel Male 21 CE 7.34 40 3 78
## 7 71 Anika Gupta Male 25 ECE 7.04 96 5 86
## 8 77 Anika Sharma Female 24 EEE 5.23 60 3 83
## 9 83 Navya Verma Female 21 ME 7.98 94 4 89
## 10 84 Aditya Patel Female 22 EEE 7.60 94 5 71
## CommunicationSkills AptitudeScore Internship Certifications Participation
## 1 33 81 Yes 1 1
## 2 50 88 No 5 1
## 3 67 91 Yes 0 7
## 4 36 82 No 2 8
## 5 78 80 Yes 4 6
## 6 76 74 Yes 1 2
## 7 96 96 Yes 2 1
## 8 69 94 Yes 2 4
## 9 68 74 Yes 0 4
## 10 42 95 Yes 1 3
## ExtraActivities ParentEducation FamilyIncome StressLevel HealthScore
## 1 None Postgraduate Medium High 82
## 2 Sports Postgraduate Low Medium 52
## 3 Art Postgraduate Low Low 66
## 4 Music Postgraduate High Low 82
## 5 Music Graduate Low Low 56
## 6 Music School Medium Medium 87
## 7 None Postgraduate Medium High 81
## 8 Sports Postgraduate High High 83
## 9 Sports Graduate Low Low 67
## 10 Art Postgraduate Medium Low 98
## PlacementStatus CompanyName Package_LPA JobRole Readiness
## 1 Placed Amazon 23.27 Data Analyst Moderately Ready
## 2 Placed Microsoft 27.09 Data Analyst Moderately Ready
## 3 Placed Amazon 27.79 Developer Moderately Ready
## 4 Placed Google 23.24 Software Engineer Highly Ready
## 5 Placed Google 22.40 Data Analyst Moderately Ready
## 6 Placed Google 27.06 Developer Moderately Ready
## 7 Placed Amazon 24.09 Software Engineer Moderately Ready
## 8 Placed Microsoft 22.86 Software Engineer Low Ready
## 9 Placed Amazon 24.53 Data Analyst Moderately Ready
## 10 Placed Google 27.66 Developer Moderately Ready
## PlacementChance
## 1 Moderate Chance
## 2 Moderate Chance
## 3 Moderate Chance
## 4 High Chance
## 5 Moderate Chance
## 6 Moderate Chance
## 7 Moderate Chance
## 8 Low Chance
## 9 Moderate Chance
## 10 Moderate Chance
Answer:
High salary outliers represent top-performing students or premium
company offers.
students %>%
group_by(Branch) %>%
summarise(iqr_cgpa = IQR(CGPA, na.rm = TRUE)) %>%
arrange(iqr_cgpa)
## # A tibble: 6 × 2
## Branch iqr_cgpa
## <chr> <dbl>
## 1 CSE 2.42
## 2 CE 2.48
## 3 ME 2.53
## 4 ECE 2.53
## 5 IT 2.57
## 6 EEE 2.59
Answer:
Lower IQR indicates more consistent academic performance within the
branch.
students %>%
mutate(combo = CGPA * SkillsScore) %>%
ggplot(aes(x = combo, y = Package_LPA)) +
geom_point(color = "purple") +
geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
Answer:
The interaction between CGPA and skills shows stronger influence on
salary compared to individual variables.
students %>%
filter(CGPA > 8 & Package_LPA < quantile(Package_LPA, 0.25)) %>%
dplyr::select(Name, CGPA, Package_LPA)
## Name CGPA Package_LPA
## 1 Aarav Verma 8.26 4.35
## 2 Reyansh Mehta 9.24 4.57
## 3 Sai Agarwal 8.48 4.67
## 4 Aarav Singh 8.83 3.23
## 5 Vivaan Agarwal 8.66 3.47
## 6 Krishna Agarwal 8.18 4.67
## 7 Myra Sharma 8.47 3.45
## 8 Aarav Singh 9.73 3.64
## 9 Riya Verma 8.57 5.09
## 10 Aditya Singh 8.89 3.38
## 11 Arjun Mehta 8.52 5.29
## 12 Sara Malhotra 9.08 3.63
## 13 Krishna Patel 9.64 4.05
## 14 Vihaan Mehta 8.50 4.55
## 15 Shaurya Mehta 9.27 3.25
## 16 Riya Verma 9.48 4.45
## 17 Vivaan Gupta 8.28 5.61
## 18 Shaurya Malhotra 9.19 5.01
## 19 Aarav Singh 9.49 4.79
## 20 Vivaan Singh 9.28 3.06
## 21 Myra Malhotra 9.29 3.16
## 22 Riya Agarwal 9.06 5.29
## 23 Aarav Patel 9.80 4.48
## 24 Vihaan Sharma 8.21 4.05
## 25 Diya Gupta 9.96 4.96
## 26 Aditya Patel 8.38 4.04
## 27 Vivaan Sharma 8.61 5.53
## 28 Reyansh Verma 9.24 4.22
## 29 Ananya Mehta 9.66 3.19
## 30 Pari Mehta 8.43 3.35
## 31 Riya Malhotra 8.03 5.15
## 32 Riya Agarwal 9.22 5.06
## 33 Diya Gupta 8.03 4.61
## 34 Ananya Malhotra 9.99 5.13
## 35 Aadhya Joshi 9.64 4.15
## 36 Sai Gupta 9.38 4.54
## 37 Anika Gupta 9.57 4.75
## 38 Pari Agarwal 8.17 5.60
## 39 Krishna Singh 8.26 3.77
## 40 Myra Verma 10.00 3.74
## 41 Anika Joshi 9.58 5.40
## 42 Diya Patel 8.51 4.00
## 43 Navya Sharma 9.27 3.08
## 44 Reyansh Malhotra 8.40 3.39
## 45 Arjun Malhotra 9.16 4.85
## 46 Sara Joshi 8.08 3.30
## 47 Navya Malhotra 9.37 4.86
## 48 Krishna Agarwal 9.45 5.29
## 49 Shaurya Agarwal 8.52 3.55
## 50 Aadhya Singh 8.16 3.63
## 51 Shaurya Verma 8.79 3.57
## 52 Riya Verma 8.92 3.68
## 53 Navya Singh 9.83 4.19
## 54 Aadhya Sharma 9.58 5.21
## 55 Shaurya Singh 9.08 4.76
## 56 Anika Mehta 9.85 4.62
## 57 Shaurya Malhotra 8.58 4.64
## 58 Vivaan Joshi 9.61 3.69
## 59 Reyansh Sharma 9.00 4.03
## 60 Navya Malhotra 9.18 4.73
## 61 Arjun Patel 8.26 3.62
## 62 Aditya Patel 8.09 3.73
## 63 Aarav Sharma 8.64 3.80
## 64 Ishaan Joshi 9.05 4.68
## 65 Myra Gupta 8.90 4.83
## 66 Pari Verma 9.09 5.39
## 67 Krishna Malhotra 8.40 4.55
## 68 Shaurya Singh 8.58 3.79
## 69 Krishna Patel 9.05 4.66
## 70 Pari Sharma 8.27 5.22
## 71 Vihaan Gupta 8.20 5.26
## 72 Aadhya Kumar 8.89 3.34
## 73 Vihaan Kumar 8.69 3.55
## 74 Shaurya Gupta 8.05 4.05
## 75 Sara Malhotra 9.69 4.62
## 76 Ananya Mehta 9.99 3.64
## 77 Ananya Malhotra 8.26 3.36
## 78 Aarav Verma 9.54 3.56
## 79 Aarav Malhotra 8.71 3.57
## 80 Riya Patel 8.72 4.85
## 81 Sai Kumar 8.04 3.52
## 82 Shaurya Malhotra 9.29 3.22
## 83 Sara Singh 8.75 4.34
## 84 Vihaan Verma 8.22 5.46
## 85 Krishna Joshi 9.77 3.24
## 86 Sai Sharma 8.82 4.52
## 87 Arjun Kumar 9.02 4.58
## 88 Sai Malhotra 9.86 5.58
## 89 Aarav Sharma 9.05 3.40
## 90 Reyansh Mehta 8.63 4.73
## 91 Ananya Singh 8.91 5.22
## 92 Aadhya Malhotra 8.29 3.07
## 93 Vihaan Mehta 8.75 3.49
## 94 Sara Sharma 9.62 5.01
Answer:
These cases highlight mismatch between academic performance and
placement outcome.
clean_data <- students %>%
filter(Package_LPA < quantile(Package_LPA, 0.95))
mean_original <- mean(students$Package_LPA, na.rm = TRUE)
mean_clean <- mean(clean_data$Package_LPA, na.rm = TRUE)
mean_original
## [1] 10.21996
mean_clean
## [1] 9.290011
Answer:
Outliers significantly inflate the average salary, leading to misleading
conclusions. Removing extreme values provides a more realistic estimate
of typical salary distribution.
cor.test(students$CGPA, students$Package_LPA)
##
## Pearson's product-moment correlation
##
## data: students$CGPA and students$Package_LPA
## t = 16.292, df = 1998, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3031576 0.3805574
## sample estimates:
## cor
## 0.3424384
Answer:
The Pearson correlation and p-value indicate both the strength and
statistical significance of the relationship. A significant p-value
(<0.05) confirms that CGPA has a meaningful impact on salary.
pcor.test(students$CGPA,
students$Package_LPA,
students$SkillsScore)
## estimate p.value statistic n gp Method
## 1 0.3860548 4.774671e-72 18.70178 2000 1 pearson
Answer:
Partial correlation measures the direct relationship between CGPA and
salary while removing the effect of SkillsScore. This helps identify
whether CGPA independently influences placement outcomes.
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.5.3
## corrplot 0.95 loaded
corr_mat <- cor(students[, c("CGPA","SkillsScore","Package_LPA")],
use = "complete.obs")
corrplot(corr_mat, method = "color")
Answer:
The heatmap highlights strength and direction of relationships. Stronger
correlations are shown by more intense colors, helping identify key
influencing variables.
pairs(students[, c("CGPA","SkillsScore","Package_LPA")])
Answer:
Pair plots reveal linear or non-linear relationships and help visually
confirm correlation patterns between variables.
cor_matrix <- cor(students[, c("CGPA","SkillsScore","Package_LPA")],
use = "complete.obs")
cor_matrix
## CGPA SkillsScore Package_LPA
## CGPA 1.00000000 0.01278122 0.3424384
## SkillsScore 0.01278122 1.00000000 0.4916104
## Package_LPA 0.34243837 0.49161041 1.0000000
Answer:
Correlation analysis shows that both CGPA and SkillsScore positively
influence salary. However, differences in correlation strength suggest
that one factor may dominate depending on student performance. This
indicates that placement outcomes depend on a combination of academic
and skill-based factors.
library(caret)
## Warning: package 'caret' was built under R version 4.5.3
## Loading required package: lattice
library(ggplot2)
# Train-test split
train_index <- createDataPartition(students$Package_LPA, p = 0.7, list = FALSE)
train_data <- students[train_index, ]
test_data <- students[-train_index, ]
# Model
model <- lm(Package_LPA ~ CGPA + SkillsScore, data = train_data)
# Prediction
predictions <- predict(model, newdata = test_data)
# Result comparison
result <- data.frame(
Actual = test_data$Package_LPA,
Predicted = predictions
)
# Plot
ggplot(result, aes(x = Actual, y = Predicted)) +
geom_point(color = "blue") +
geom_smooth(method = "lm", se = FALSE, color = "red") +
ggtitle("Actual vs Predicted Salary")
## `geom_smooth()` using formula = 'y ~ x'
Answer:
The model predicts salary using CGPA and SkillsScore. The comparison
between actual and predicted values shows how well the model performs. A
strong linear pattern indicates high prediction accuracy.
model_final <- lm(Package_LPA ~ CGPA + SkillsScore, data = students)
summary(model_final)
##
## Call:
## lm(formula = Package_LPA ~ CGPA + SkillsScore, data = students)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.405 -3.730 -0.922 2.325 21.573
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -11.874848 0.735028 -16.16 <2e-16 ***
## CGPA 1.539348 0.082310 18.70 <2e-16 ***
## SkillsScore 0.161500 0.005958 27.11 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.358 on 1997 degrees of freedom
## Multiple R-squared: 0.3547, Adjusted R-squared: 0.3541
## F-statistic: 548.8 on 2 and 1997 DF, p-value: < 2.2e-16
summary(model_final)$fstatistic
## value numdf dendf
## 548.841 2.000 1997.000
Answer:
Both CGPA and SkillsScore positively influence salary.
The model shows that combining academic performance and skills improves
placement outcomes significantly.
summary(model_final)$r.squared
## [1] 0.3546994
summary(model_final)$adj.r.squared
## [1] 0.3540532
Answer:
R-squared shows how much variation in salary is explained by CGPA and
skills.
Higher values indicate better predictive performance.
set.seed(1)
train <- sample(1:nrow(students), 0.7*nrow(students))
train_data <- students[train, ]
test_data <- students[-train, ]
model_cv <- lm(Package_LPA ~ CGPA + SkillsScore, data = train_data)
pred <- predict(model_cv, newdata = test_data)
mean(abs(pred - test_data$Package_LPA))
## [1] 4.118228
Answer This analysis evaluates model generalization by testing prediction error on unseen data, ensuring the model performs reliably beyond training data.
students %>%
group_by(Branch) %>%
do({
model <- lm(Package_LPA ~ CGPA + SkillsScore, data = .)
data.frame(
CGPA_coef = coef(model)[2],
Skills_coef = coef(model)[3],
R_squared = summary(model)$r.squared
)
})
## # A tibble: 6 × 4
## # Groups: Branch [6]
## Branch CGPA_coef Skills_coef R_squared
## <chr> <dbl> <dbl> <dbl>
## 1 CE 1.56 0.162 0.337
## 2 CSE 1.78 0.172 0.375
## 3 ECE 1.37 0.137 0.311
## 4 EEE 1.61 0.162 0.349
## 5 IT 1.51 0.164 0.408
## 6 ME 1.32 0.170 0.362
Answer:
The regression analysis shows that the impact of CGPA and SkillsScore on
salary varies across branches. Differences in coefficients indicate that
some branches prioritize academic performance, while others emphasize
skills. Variations in R² values suggest that the model explains salary
better in certain branches compared to others.
ggplot(students, aes(x = CGPA, y = Package_LPA)) +
geom_point(color = "blue", size = 3) +
stat_smooth(method = "lm",
formula = y ~ x,
color = "red",
size = 1.5,
se = TRUE) +
labs(title = "CGPA vs Salary Package (Linear Regression)",
x = "CGPA",
y = "Salary (LPA)")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Answer:
The scatter plot shows the relationship between CGPA and salary, while
the regression line represents the overall trend. The upward slope
indicates a positive relationship, meaning students with higher CGPA
tend to receive higher salary packages. The shaded region represents the
confidence interval, showing the reliability of the regression
estimate.