#Loading the required libraries library(ggplot2) library(plotly) library(dplyr)
2024-03-19
#Loading the required libraries library(ggplot2) library(plotly) library(dplyr)
ggplot(placement_data, aes(x = cgpa, y = placement_exam_marks)) + geom_point(color = 'red') + theme_minimal() + labs(title = "Scatter Plot of CGPA vs. Placement Exam Marks", x = "CGPA", y = "Placement Exam Marks")
In mathematics, the Simple Linear Regression model is represented as follows:
The model is defined by the equation: \[ Y = \beta_0 + \beta_1X + \epsilon \]
Here the slope and intercept of the linear regression model’s coefficients are important for understanding how the independent and dependent variables are related to one another. The method of least squares is used to estimate these. The slope $ _1 $ is calculated as:
\[ \beta_1 = \frac{\sum (X_i - \bar{X})(Y_i - \bar{Y})}{\sum (X_i - \bar{X})^2} \]
And the intercept $ _0 $ is:
\[ \beta_0 = \bar{Y} - \beta_1\bar{X} \]
ggplot(placement_data, aes(x = cgpa, y = placement_exam_marks)) +
geom_point() + geom_smooth(method = "lm", se = FALSE, color = "blue") +
theme_minimal() +
labs(title = "Regression Line with CGPA vs. Placement Exam Marks",
x = "CGPA", y = "Placement Exam Marks")
# This is a plotly for CGPA and the examm placement marks
p <- ggplot(placement_data, aes(x = cgpa, y = placement_exam_marks)) +
geom_point(color = 'orange') + theme_minimal() +
labs(title = "Interactive Scatter Plot of CGPA vs. Placement Exam Marks",
x = "CGPA", y = "Placement Exam Marks")
ggplotly(p)
fit <- lm(placement_exam_marks ~ cgpa, data = placement_data) summary(fit)
## ## Call: ## lm(formula = placement_exam_marks ~ cgpa, data = placement_data) ## ## Residuals: ## Min 1Q Median 3Q Max ## -32.099 -15.074 -3.917 11.853 66.915 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 38.1434 6.8687 5.553 3.6e-08 *** ## cgpa -0.8502 0.9829 -0.865 0.387 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 19.13 on 998 degrees of freedom ## Multiple R-squared: 0.0007492, Adjusted R-squared: -0.0002521 ## F-statistic: 0.7482 on 1 and 998 DF, p-value: 0.3872