import kagglehub
path = kagglehub.dataset_download("likithagedipudi/job-board-effectiveness-for-college-students")
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
csv_file_name = "job_search_platform_efficacy_100k.csv"
full_csv_path = os.path.join(path, csv_file_name)
df = pd.read_csv(full_csv_path)
df.head();

Job Board Effectiveness for College Students Data set



Graph 1: Job Offer Rate by Platform

This graph compares job offer success rate across the platforms LinkedIn, Handshake and Indeed. LinkedIn and Handshake have the highest and identical offer rates, while Indeed falls behind. This suggest that platform chose can play a role in job offer rate.

plt.figure(figsize=(10,6))

ax = sns.barplot(
    data=df,
    x="Primary_Search_Platform",
    y="Offer_Received",
    palette="Set2",
    errorbar=None
)

for p in ax.patches:
    height = p.get_height()
    ax.annotate(
        f'{height*100:.1f}%',
        (p.get_x() + p.get_width() / 2., height),
        ha='center',
        va='bottom',
        fontsize=11
    )

plt.title("Job Offer Rate by Platform", fontsize=16, fontweight="bold")
plt.xlabel("Platform")
plt.ylabel("Offer Rate (%)")
s = plt.xticks(rotation=30)
plt.tight_layout()
plt.show();

Graph 2: Job Offer Rate by Networking Activity

This graph shows how attending networking can increase your chances of receiving a job offer. Offer rate generally increases slightly with more networking, but has a drop off at very high levels. This suggest that moderate networking is most effective.

df["Networking_Group"] = pd.cut(
    df["Networking_Events_Attended"],
    bins=[-1, 1, 3, 5, 7, 10, 15],
    labels=["0-1", "2-3", "4-5", "6-7", "8-10", "11-15"]
)

networking_rate = df.groupby("Networking_Group")["Offer_Received"].mean().reset_index()

plt.figure(figsize=(10,6))
ax = sns.barplot(
    data=networking_rate,
    x="Networking_Group",
    y="Offer_Received",
    palette="Set2",
    errorbar=None
)

for p in ax.patches:
    h = p.get_height()
    ax.annotate(
        f"{h*100:.1f}%",
        (p.get_x() + p.get_width()/2, h),
        ha="center",
        va="bottom",
        fontsize=11
    )

plt.title("Job Offer Rate by Networking Activity", fontsize=16, fontweight="bold")
plt.xlabel("Networking Events Attended")
plt.ylabel("Offer Rate")
a = plt.ylim(0, 0.5)
plt.tight_layout()
plt.show();

Graph 3: Salary Distribution by GPA Group

This graph shows that students with higher GPAs tend to receive higher salary offers on average. There is overlapping between groups, the median steadily increases as GPA rises.

df_offer = df[df["Offer_Received"] == 1].dropna(subset=["Offer_Salary"]).copy()

df_offer["GPA_Group"] = pd.cut(
    df_offer["GPA"],
    bins=[0, 2.5, 3.0, 3.5, 4.0],
    labels=["Below 2.5", "2.5-3.0", "3.0-3.5", "3.5-4.0"],
    right=False
)

plt.figure(figsize=(10,6))

sns.boxplot(
    data=df_offer,
    x="GPA_Group",
    y="Offer_Salary",
    hue="GPA_Group",
    palette="coolwarm",
    showfliers=False
)

plt.title("Salary Distribution by GPA Group", fontsize=16, fontweight="bold")
plt.xlabel("GPA Group", fontsize=12)
plt.ylabel("Salary ($)", fontsize=12)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show();

Graph 4: Impact of Internships on Salary

Moving from the next graph on how much your GPA can affect your salary. Lets look at the impact of taking a internship has on your salary. The graph shows a positive relationship between the number of internships and salary. Which seems reasonable since it increases experience.

plt.figure(figsize=(10,6))

sns.boxplot(
    data=df,
    x="Prior_Internships",
    y="Offer_Salary",
    palette="coolwarm",
    showfliers=False
)

plt.title("Impact of Internships on Salary", fontsize=16, fontweight="bold")
plt.xlabel("Number of Internships")
plt.ylabel("Salary ($)")
plt.tight_layout()
plt.show();

Graph 5: Accepted Offers by Role Relevance

Lastly this plot shows accepted offer by how relevant your degree is to the role. higher relevance scores make up a larger portion of the accepted offers. This shows that students are prioritizing jobs that align wityh their degree.

df_offer = df[df["Offer_Received"] == 1].copy()

accepted_counts = (
    df_offer[df_offer["Accepted_Offer"] == 1]["Role_Relevance"]
    .value_counts()
    .sort_index()
)

plt.figure(figsize=(8,8))

colors = sns.color_palette("Blues", len(accepted_counts))

wedges, texts, autotexts = plt.pie(
    accepted_counts,
    labels=[f"Relevance {int(x)}" for x in accepted_counts.index],
    autopct="%1.1f%%",
    startangle=90,
    counterclock=False,
    colors=colors,
    wedgeprops={"width": 0.45, "edgecolor": "white"},
    textprops={"fontsize": 11}
)

plt.title("Accepted Offers by Role Relevance", fontsize=16, fontweight="bold")
plt.tight_layout()
plt.show();