library(readxl)
BCData <- read_excel("C:/Users/afrebanu/Desktop/BCData.xlsx")
View(BCData)
mean(BCData$radius_mean)
## [1] 14.12729
boxplot(BCData$radius_mean)
boxplot(BCData$radius_mean,horizontal = TRUE,main="Mean Radius of the Breast Cancer Infection")
mean(BCData$texture_mean)
## [1] 19.28965
boxplot(BCData$texture_mean,horizontal = TRUE,main="Mean Texture of the Breast Cancer Infection")
table(BCData$Diagnosis)
##
## B M
## 357 212
boxplot(BCData$radius_mean~BCData$Diagnosis,main="Radius Comparison for different types of Diagnosis")
boxplot(BCData$radius_mean~BCData$Diagnosis,main="Radius Comparison for different types of Diagnosis",xlab="Types of Diagnosis",ylab="Mean Radius of the Infection",col="orange",border="brown")
#This Proves that the mean radius of infection is much higher in terms of a Molecular Level Infection than that of Hepatitis B Infection.
{r}
boxplot(BCData$texture_mean~BCData$Diagnosis,main="Texture Comparison for different types of Diagnosis",xlab="Types of Diagnosis",ylab="Mean Texture of the Infection",col="orange",border="brown")
boxplot(BCData$perimeter_mean~BCData$Diagnosis,main="Perimeter Comparison for different types of Diagnosis",xlab="Types of Diagnosis",ylab="Mean Perimeter of the Infection",col="orange",border="brown")
boxplot(BCData$area_mean~BCData$Diagnosis,main="Area Comparison for different types of Diagnosis",xlab="Types of Diagnosis",ylab="Mean Area of the Infection",col="orange",border="brown")
boxplot(BCData$compactness_mean~BCData$Diagnosis,main="Compactness Comparison for different types of Diagnosis",xlab="Types of Diagnosis",ylab="Mean Compactness of the Infection",col="orange",border="brown")
boxplot(BCData$symmetry_mean~BCData$Diagnosis,main="Symmetry Comparison for different types of Diagnosis",xlab="Types of Diagnosis",ylab="Mean Symmetry of the Infection",col="orange",border="brown")
library(car)
scatterplot(BCData$radius_mean,BCData$radius_se)
scatterplot(BCData$radius_mean,BCData$radius_worst)
#There are very few negative coreealtions in the above data.
scatterplot(BCData$texture_se,BCData$texture_mean)
scatterplot(BCData$texture_worst,BCData$texture_mean)
fit3<-lm(formula = radius_mean~area_mean,data = BCData)
summary(fit3)
##
## Call:
## lm(formula = radius_mean ~ area_mean, data = BCData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.9604 -0.1801 0.1479 0.3600 0.7788
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.652e+00 4.955e-02 154.4 <2e-16 ***
## area_mean 9.887e-03 6.666e-05 148.3 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5591 on 567 degrees of freedom
## Multiple R-squared: 0.9749, Adjusted R-squared: 0.9748
## F-statistic: 2.2e+04 on 1 and 567 DF, p-value: < 2.2e-16
fit4<-lm(formula = radius_mean~smoothness_mean,data = BCData)
summary(fit4)
##
## Call:
## lm(formula = radius_mean ~ smoothness_mean, data = BCData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.0285 -2.3750 -0.5561 1.6571 13.2202
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.01 1.01 9.912 < 2e-16 ***
## smoothness_mean 42.74 10.37 4.122 4.31e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.475 on 567 degrees of freedom
## Multiple R-squared: 0.0291, Adjusted R-squared: 0.02739
## F-statistic: 16.99 on 1 and 567 DF, p-value: 4.313e-05
fit5<-lm(formula = radius_mean~compactness_mean,data = BCData)
summary(fit5)
##
## Call:
## lm(formula = radius_mean ~ compactness_mean, data = BCData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.8971 -2.0131 -0.4438 1.5317 12.3867
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.6035 0.2826 37.52 <2e-16 ***
## compactness_mean 33.7722 2.4169 13.97 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.042 on 567 degrees of freedom
## Multiple R-squared: 0.2562, Adjusted R-squared: 0.2548
## F-statistic: 195.3 on 1 and 567 DF, p-value: < 2.2e-16
fit6<-lm(formula = radius_mean~concavity_mean,data = BCData)
summary(fit6)
##
## Call:
## lm(formula = radius_mean ~ concavity_mean, data = BCData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.0295 -1.5416 -0.1348 1.5333 7.8383
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.471 0.163 70.36 <2e-16 ***
## concavity_mean 29.917 1.367 21.89 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.597 on 567 degrees of freedom
## Multiple R-squared: 0.458, Adjusted R-squared: 0.4571
## F-statistic: 479.1 on 1 and 567 DF, p-value: < 2.2e-16
fit7<-lm(formula = radius_mean~symmetry_mean,data = BCData)
summary(fit7)
##
## Call:
## lm(formula = radius_mean ~ symmetry_mean, data = BCData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.3711 -2.4426 -0.5862 1.5744 14.2934
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.6867 0.9783 10.924 < 2e-16 ***
## symmetry_mean 18.9918 5.3393 3.557 0.000406 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.488 on 567 degrees of freedom
## Multiple R-squared: 0.02183, Adjusted R-squared: 0.0201
## F-statistic: 12.65 on 1 and 567 DF, p-value: 0.0004065
fit8<-lm(formula = radius_mean~fractal_dimension_mean,data = BCData)
summary(fit8)
##
## Call:
## lm(formula = radius_mean ~ fractal_dimension_mean, data = BCData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9829 -2.4195 -0.9257 1.7852 12.8087
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.895 1.259 18.985 < 2e-16 ***
## fractal_dimension_mean -155.545 19.918 -7.809 2.8e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.352 on 567 degrees of freedom
## Multiple R-squared: 0.09711, Adjusted R-squared: 0.09552
## F-statistic: 60.99 on 1 and 567 DF, p-value: 2.795e-14
summary(fit2)
##
## Call:
## lm(formula = radius_mean ~ perimeter_mean, data = BCData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.43367 -0.10445 0.02596 0.14652 0.60550
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.8177520 0.0379227 21.56 <2e-16 ***
## perimeter_mean 0.1447176 0.0003987 362.99 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2309 on 567 degrees of freedom
## Multiple R-squared: 0.9957, Adjusted R-squared: 0.9957
## F-statistic: 1.318e+05 on 1 and 567 DF, p-value: < 2.2e-16