#Student Confirmation
#1. I hereby confirm that I have only used generative Artificial Intelligence (AI), such as ChatGPT, according to the rules set out in the instructions of this assessment. I acknowledge that using generative AI in a manner different to that stipulated will constitute an examination offence and will be formerly investigated as such.
#2. I hereby confirm that I have read the generative AI tools guidance and the academic misconduct policy (specifically, point 14i) and have used AI appropriately according to these guidelines and the rules stipulated by this assessment.
Exploring the suitability of 6 dysphonia measurements; NHR (Noise-to-Harmonics Ratio), HNR (Harmonics-to-Noise Ratio), RPDE (Recurrence Period Density Entropy), spread2, D2, PPE (Pitch Period Entropy) for telemonitoring of Parkinson’s disease.
Parkinson’s is a disease that effects over 1 million people in North America alone and has no known cure (Little M et al.). However, it can be treated particularly in its earlier stages. This makes the method of telemonitoring dysphonia critical, and therefore the investigation of new telemonitoring methods imperitive to provide the best results. In this study we look at 6 of the measurements (NHR, HNR, RPDE, spread, D2 and PPE).
parkinsons<-read.csv("parkinsons.csv", row.names = 1)
#assess visual correlation between each variable
plot(parkinsons)
cor(parkinsons)
## NHR HNR RPDE spread2 D2 PPE
## NHR 1.0000000 -0.7140724 0.3708905 0.3180990 0.4709488 0.5525913
## HNR -0.7140724 1.0000000 -0.5987363 -0.4315637 -0.6014010 -0.6928759
## RPDE 0.3708905 -0.5987363 1.0000000 0.4799045 0.2369314 0.5458857
## spread2 0.3180990 -0.4315637 0.4799045 1.0000000 0.5235317 0.6447110
## D2 0.4709488 -0.6014010 0.2369314 0.5235317 1.0000000 0.4805845
## PPE 0.5525913 -0.6928759 0.5458857 0.6447110 0.4805845 1.0000000
#plot each plot for a visual representation with line of best fit
plot(PPE~NHR ,data=parkinsons)
fit.NHR<-lm(PPE~NHR, data=parkinsons)
abline(fit.NHR, col = "red")
plot(PPE~HNR ,data=parkinsons)
fit.HNR<-lm(PPE~HNR, data=parkinsons)
abline(fit.HNR, col = "red")
plot(PPE~RPDE ,data=parkinsons)
fit.RPDE <- lm(PPE~RPDE, data=parkinsons)
abline(fit.RPDE, col = "red")
plot(PPE~spread2 ,data=parkinsons)
fit.spread2 <- lm(PPE~spread2, data=parkinsons)
abline(fit.spread2, col = "red")
plot(PPE~D2 ,data=parkinsons)
fit.D2<-lm(PPE~D2, data=parkinsons)
abline(fit.D2, col = "red")
#finding r, slope, intercept and r-squared for NHR
fit.NHR<-lm(PPE~NHR, data=parkinsons)
plot(fit.NHR$residuals ~ fit.NHR$fitted.values)
abline(h=0, col = "red")
summary(fit.NHR)
##
## Call:
## lm(formula = PPE ~ NHR, data = parkinsons)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.19286 -0.05393 -0.00577 0.04402 0.21888
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.175938 0.006335 27.774 <2e-16 ***
## NHR 1.232090 0.133764 9.211 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0753 on 193 degrees of freedom
## Multiple R-squared: 0.3054, Adjusted R-squared: 0.3018
## F-statistic: 84.84 on 1 and 193 DF, p-value: < 2.2e-16
#finding r, slope, intercept and r-squared for HNR
plot(fit.HNR$residuals ~ fit.HNR$fitted.values)
abline(h=0, col = "red")
summary(fit.HNR)
##
## Call:
## lm(formula = PPE ~ HNR, data = parkinsons)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.146053 -0.046084 -0.006357 0.038739 0.202933
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.515333 0.023596 21.84 <2e-16 ***
## HNR -0.014109 0.001057 -13.35 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06515 on 193 degrees of freedom
## Multiple R-squared: 0.4801, Adjusted R-squared: 0.4774
## F-statistic: 178.2 on 1 and 193 DF, p-value: < 2.2e-16
#finding the r RPDE
plot(fit.RPDE$residuals ~ fit.RPDE$fitted.values)
abline(h=0, col = "red")
summary(fit.RPDE)
##
## Call:
## lm(formula = PPE ~ RPDE, data = parkinsons)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.17308 -0.04932 -0.01385 0.03222 0.26055
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.02940 0.02663 -1.104 0.271
## RPDE 0.47329 0.05229 9.051 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0757 on 193 degrees of freedom
## Multiple R-squared: 0.298, Adjusted R-squared: 0.2944
## F-statistic: 81.93 on 1 and 193 DF, p-value: < 2.2e-16
#finding r, slope, intercept and r-squared for spread2
plot(fit.spread2$residuals ~ fit.spread2$fitted.values)
abline(h=0, col = "red")
summary(fit.spread2)
##
## Call:
## lm(formula = PPE ~ spread2, data = parkinsons)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.146147 -0.045599 -0.008829 0.039457 0.207481
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.04876 0.01435 3.399 0.000822 ***
## spread2 0.69661 0.05945 11.717 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06907 on 193 degrees of freedom
## Multiple R-squared: 0.4157, Adjusted R-squared: 0.4126
## F-statistic: 137.3 on 1 and 193 DF, p-value: < 2.2e-16
#finding r, slope, intercept and r-squared for D2
plot(fit.D2$residuals ~ fit.D2$fitted.values)
abline(h=0, col = "red")
summary(fit.D2)
##
## Call:
## lm(formula = PPE ~ D2, data = parkinsons)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.15632 -0.05571 -0.01118 0.04392 0.24191
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.06293 0.03585 -1.755 0.0808 .
## D2 0.11314 0.01486 7.613 1.16e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07923 on 193 degrees of freedom
## Multiple R-squared: 0.231, Adjusted R-squared: 0.227
## F-statistic: 57.96 on 1 and 193 DF, p-value: 1.158e-12
#open the parkinson_new.csv file
parkinsons_new<-read.csv("parkinsons_new.csv", row.names = 1)
predicted_ppe <- predict(fit.HNR, newdata = parkinsons_new, interval = "confidence")
#plot the new plot of PPE and HNR
plot(PPE~HNR ,data=parkinsons)
#add the values of the predicted PPE using the points function
points(parkinsons_new$HNR, predicted_ppe[, "fit"], col = "red", pch = 16)
#DO NOT DELETE THIS CHUNK
#word count chunk - the number under "words" is your official word count
#The first time you run this, you will need to install the package on your device
#remove the # from the following line of code if you have not used wordcount on this device before
#install.packages("rmdwc")
library(rmdwc)
rmdcountAddin()
## $code
## [1] "data.frame(file='STAT_3parkinsons.Rmd', lines=112, words=856, bytes=5616, chars=5616, nonws=4571)"
## attr(,"class")
## [1] "rs.scalar"
##
## $echo
## [1] FALSE
## attr(,"class")
## [1] "rs.scalar"
##
## $execute
## [1] TRUE
## attr(,"class")
## [1] "rs.scalar"
##
## $focus
## [1] TRUE
## attr(,"class")
## [1] "rs.scalar"
##
## $animate
## [1] FALSE
## attr(,"class")
## [1] "rs.scalar"
##
## $language
## [1] "R"
#BIBLIOGRAPHY
#1.Little M, McSharry P, Hunter E, Spielman J, Ramig L. Suitability of dysphonia measurements for telemonitoring of Parkinson’s disease. Nature Precedings. 2008 Sep 12;