BONUS – place the original .csv in a github file and have R read from the link.
melanoma<-read.csv("https://raw.githubusercontent.com/olgashiligin/R-assignment/master/melanoma.csv")
head(melanoma,n=30)
## X time status sex age year thickness ulcer
## 1 1 10 3 1 76 1972 6.76 1
## 2 2 30 3 1 56 1968 0.65 0
## 3 3 35 2 1 41 1977 1.34 0
## 4 4 99 3 0 71 1968 2.90 0
## 5 5 185 1 1 52 1965 12.08 1
## 6 6 204 1 1 28 1971 4.84 1
## 7 7 210 1 1 77 1972 5.16 1
## 8 8 232 3 0 60 1974 3.22 1
## 9 9 232 1 1 49 1968 12.88 1
## 10 10 279 1 0 68 1971 7.41 1
## 11 11 295 1 0 53 1969 4.19 1
## 12 12 355 3 0 64 1972 0.16 1
## 13 13 386 1 0 68 1965 3.87 1
## 14 14 426 1 1 63 1970 4.84 1
## 15 15 469 1 0 14 1969 2.42 1
## 16 16 493 3 1 72 1971 12.56 1
## 17 17 529 1 1 46 1971 5.80 1
## 18 18 621 1 1 72 1972 7.06 1
## 19 19 629 1 1 95 1968 5.48 1
## 20 20 659 1 1 54 1972 7.73 1
## 21 21 667 1 0 89 1968 13.85 1
## 22 22 718 1 1 25 1967 2.34 1
## 23 23 752 1 1 37 1973 4.19 1
## 24 24 779 1 1 43 1967 4.04 1
## 25 25 793 1 1 68 1970 4.84 1
## 26 26 817 1 0 67 1966 0.32 0
## 27 27 826 3 0 86 1965 8.54 1
## 28 28 833 1 0 56 1971 2.58 1
## 29 29 858 1 0 16 1967 3.56 0
## 30 30 869 1 0 42 1965 3.54 0
summary(melanoma)
## X time status sex
## Min. : 1 Min. : 10 Min. :1.00 Min. :0.0000
## 1st Qu.: 52 1st Qu.:1525 1st Qu.:1.00 1st Qu.:0.0000
## Median :103 Median :2005 Median :2.00 Median :0.0000
## Mean :103 Mean :2153 Mean :1.79 Mean :0.3854
## 3rd Qu.:154 3rd Qu.:3042 3rd Qu.:2.00 3rd Qu.:1.0000
## Max. :205 Max. :5565 Max. :3.00 Max. :1.0000
## age year thickness ulcer
## Min. : 4.00 Min. :1962 Min. : 0.10 Min. :0.000
## 1st Qu.:42.00 1st Qu.:1968 1st Qu.: 0.97 1st Qu.:0.000
## Median :54.00 Median :1970 Median : 1.94 Median :0.000
## Mean :52.46 Mean :1970 Mean : 2.92 Mean :0.439
## 3rd Qu.:65.00 3rd Qu.:1972 3rd Qu.: 3.56 3rd Qu.:1.000
## Max. :95.00 Max. :1977 Max. :17.42 Max. :1.000
mean(melanoma$thickness)
## [1] 2.919854
median(melanoma$thickness)
## [1] 1.94
mean(melanoma$age)
## [1] 52.46341
median(melanoma$age)
## [1] 54
melanoma_subset<-melanoma[melanoma$age>65,]
melanoma_subset
## X time status sex age year thickness ulcer
## 1 1 10 3 1 76 1972 6.76 1
## 4 4 99 3 0 71 1968 2.90 0
## 7 7 210 1 1 77 1972 5.16 1
## 10 10 279 1 0 68 1971 7.41 1
## 13 13 386 1 0 68 1965 3.87 1
## 16 16 493 3 1 72 1971 12.56 1
## 18 18 621 1 1 72 1972 7.06 1
## 19 19 629 1 1 95 1968 5.48 1
## 21 21 667 1 0 89 1968 13.85 1
## 25 25 793 1 1 68 1970 4.84 1
## 26 26 817 1 0 67 1966 0.32 0
## 27 27 826 3 0 86 1965 8.54 1
## 35 35 1041 1 1 68 1967 14.66 0
## 36 36 1055 1 0 75 1967 2.58 1
## 38 38 1075 1 1 66 1971 3.54 1
## 42 42 1271 1 0 74 1971 3.54 1
## 46 46 1499 2 1 73 1973 1.29 0
## 49 49 1510 2 0 69 1973 1.94 0
## 50 50 1512 2 0 77 1973 0.16 0
## 51 51 1516 1 1 80 1968 2.58 1
## 52 52 1525 3 0 76 1970 1.29 1
## 62 62 1634 2 0 68 1973 1.37 0
## 68 68 1654 2 0 67 1973 1.13 0
## 72 72 1690 1 1 83 1971 1.62 0
## 84 84 1836 2 0 70 1972 1.53 0
## 89 89 1860 3 1 71 1969 4.84 1
## 92 92 1914 2 0 69 1972 3.06 0
## 93 93 1919 2 1 83 1972 3.54 0
## 96 96 1933 1 0 77 1972 1.94 0
## 100 100 1958 2 0 69 1972 12.88 0
## 102 102 1970 2 1 84 1972 4.09 1
## 103 103 2005 2 0 66 1972 0.64 0
## 105 105 2011 2 0 75 1972 3.22 1
## 110 110 2059 2 1 68 1972 3.22 1
## 111 111 2061 1 1 71 1968 2.26 0
## 114 114 2085 3 0 66 1970 0.65 0
## 117 117 2104 2 0 72 1972 0.97 0
## 123 123 2209 2 0 72 1971 9.66 0
## 125 125 2227 2 1 77 1971 5.48 1
## 132 132 2403 2 0 67 1971 2.90 1
## 133 133 2426 2 0 69 1971 3.87 0
## 134 134 2426 2 0 74 1971 1.94 1
## 139 139 2493 2 1 72 1971 4.82 1
## 141 141 2542 2 1 67 1971 7.89 1
## 149 149 2782 1 1 78 1969 1.94 0
## 151 151 2984 2 1 70 1969 0.16 0
## 173 173 3384 2 0 68 1968 3.22 1
## 193 193 4001 2 0 69 1967 2.10 0
3.Create new column names for the new data frame.
colnames(melanoma_subset)<-c("ID","Time","Status","Gender","Age","Year","Thickness","Unclear")
melanoma_subset
## ID Time Status Gender Age Year Thickness Unclear
## 1 1 10 3 1 76 1972 6.76 1
## 4 4 99 3 0 71 1968 2.90 0
## 7 7 210 1 1 77 1972 5.16 1
## 10 10 279 1 0 68 1971 7.41 1
## 13 13 386 1 0 68 1965 3.87 1
## 16 16 493 3 1 72 1971 12.56 1
## 18 18 621 1 1 72 1972 7.06 1
## 19 19 629 1 1 95 1968 5.48 1
## 21 21 667 1 0 89 1968 13.85 1
## 25 25 793 1 1 68 1970 4.84 1
## 26 26 817 1 0 67 1966 0.32 0
## 27 27 826 3 0 86 1965 8.54 1
## 35 35 1041 1 1 68 1967 14.66 0
## 36 36 1055 1 0 75 1967 2.58 1
## 38 38 1075 1 1 66 1971 3.54 1
## 42 42 1271 1 0 74 1971 3.54 1
## 46 46 1499 2 1 73 1973 1.29 0
## 49 49 1510 2 0 69 1973 1.94 0
## 50 50 1512 2 0 77 1973 0.16 0
## 51 51 1516 1 1 80 1968 2.58 1
## 52 52 1525 3 0 76 1970 1.29 1
## 62 62 1634 2 0 68 1973 1.37 0
## 68 68 1654 2 0 67 1973 1.13 0
## 72 72 1690 1 1 83 1971 1.62 0
## 84 84 1836 2 0 70 1972 1.53 0
## 89 89 1860 3 1 71 1969 4.84 1
## 92 92 1914 2 0 69 1972 3.06 0
## 93 93 1919 2 1 83 1972 3.54 0
## 96 96 1933 1 0 77 1972 1.94 0
## 100 100 1958 2 0 69 1972 12.88 0
## 102 102 1970 2 1 84 1972 4.09 1
## 103 103 2005 2 0 66 1972 0.64 0
## 105 105 2011 2 0 75 1972 3.22 1
## 110 110 2059 2 1 68 1972 3.22 1
## 111 111 2061 1 1 71 1968 2.26 0
## 114 114 2085 3 0 66 1970 0.65 0
## 117 117 2104 2 0 72 1972 0.97 0
## 123 123 2209 2 0 72 1971 9.66 0
## 125 125 2227 2 1 77 1971 5.48 1
## 132 132 2403 2 0 67 1971 2.90 1
## 133 133 2426 2 0 69 1971 3.87 0
## 134 134 2426 2 0 74 1971 1.94 1
## 139 139 2493 2 1 72 1971 4.82 1
## 141 141 2542 2 1 67 1971 7.89 1
## 149 149 2782 1 1 78 1969 1.94 0
## 151 151 2984 2 1 70 1969 0.16 0
## 173 173 3384 2 0 68 1968 3.22 1
## 193 193 4001 2 0 69 1967 2.10 0
summary(melanoma_subset)
## ID Time Status Gender
## Min. : 1.00 Min. : 10.0 Min. :1.000 Min. :0.0000
## 1st Qu.: 33.00 1st Qu.: 987.2 1st Qu.:1.000 1st Qu.:0.0000
## Median : 78.00 Median :1763.0 Median :2.000 Median :0.0000
## Mean : 77.02 Mean :1633.4 Mean :1.792 Mean :0.4375
## 3rd Qu.:114.75 3rd Qu.:2089.8 3rd Qu.:2.000 3rd Qu.:1.0000
## Max. :193.00 Max. :4001.0 Max. :3.000 Max. :1.0000
## Age Year Thickness Unclear
## Min. :66.00 Min. :1965 Min. : 0.160 Min. :0.0000
## 1st Qu.:68.00 1st Qu.:1969 1st Qu.: 1.860 1st Qu.:0.0000
## Median :71.50 Median :1971 Median : 3.220 Median :1.0000
## Mean :73.10 Mean :1970 Mean : 4.193 Mean :0.5208
## 3rd Qu.:76.25 3rd Qu.:1972 3rd Qu.: 5.240 3rd Qu.:1.0000
## Max. :95.00 Max. :1973 Max. :14.660 Max. :1.0000
mean(melanoma_subset$Thickness)
## [1] 4.193125
median(melanoma_subset$Thickness)
## [1] 3.22
mean(melanoma_subset$Age)
## [1] 73.10417
median(melanoma_subset$Age)
## [1] 71.5
Thickness: mean and median of “Thickness” of melanoma_subset are higher than mean and median of the same attribute of melanoma set. It means that patients who are over 65 (melanoma_subset) in average have thicker melanoma than patients in melanoma set. Which is quite expected result.
Age: mean and median of “Age” of melanoma_subset are higher than mean and median of the same attribute of melanoma set. That is because for melanoma_subset were sected patients who are over 65, obviously that data set has higher average age compare to melanoma set with various age presented.
as.character(melanoma_subset$Unclear)
## [1] "1" "0" "1" "1" "1" "1" "1" "1" "1" "1" "0" "1" "0" "1" "1" "1" "0"
## [18] "0" "0" "1" "1" "0" "0" "0" "0" "1" "0" "0" "0" "0" "1" "0" "1" "1"
## [35] "0" "0" "0" "0" "1" "1" "0" "1" "1" "1" "0" "0" "1" "0"
changed_values<-melanoma_subset$Unclear[melanoma_subset$Unclear == "0"]<-"No"
melanoma_subset
## ID Time Status Gender Age Year Thickness Unclear
## 1 1 10 3 1 76 1972 6.76 1
## 4 4 99 3 0 71 1968 2.90 No
## 7 7 210 1 1 77 1972 5.16 1
## 10 10 279 1 0 68 1971 7.41 1
## 13 13 386 1 0 68 1965 3.87 1
## 16 16 493 3 1 72 1971 12.56 1
## 18 18 621 1 1 72 1972 7.06 1
## 19 19 629 1 1 95 1968 5.48 1
## 21 21 667 1 0 89 1968 13.85 1
## 25 25 793 1 1 68 1970 4.84 1
## 26 26 817 1 0 67 1966 0.32 No
## 27 27 826 3 0 86 1965 8.54 1
## 35 35 1041 1 1 68 1967 14.66 No
## 36 36 1055 1 0 75 1967 2.58 1
## 38 38 1075 1 1 66 1971 3.54 1
## 42 42 1271 1 0 74 1971 3.54 1
## 46 46 1499 2 1 73 1973 1.29 No
## 49 49 1510 2 0 69 1973 1.94 No
## 50 50 1512 2 0 77 1973 0.16 No
## 51 51 1516 1 1 80 1968 2.58 1
## 52 52 1525 3 0 76 1970 1.29 1
## 62 62 1634 2 0 68 1973 1.37 No
## 68 68 1654 2 0 67 1973 1.13 No
## 72 72 1690 1 1 83 1971 1.62 No
## 84 84 1836 2 0 70 1972 1.53 No
## 89 89 1860 3 1 71 1969 4.84 1
## 92 92 1914 2 0 69 1972 3.06 No
## 93 93 1919 2 1 83 1972 3.54 No
## 96 96 1933 1 0 77 1972 1.94 No
## 100 100 1958 2 0 69 1972 12.88 No
## 102 102 1970 2 1 84 1972 4.09 1
## 103 103 2005 2 0 66 1972 0.64 No
## 105 105 2011 2 0 75 1972 3.22 1
## 110 110 2059 2 1 68 1972 3.22 1
## 111 111 2061 1 1 71 1968 2.26 No
## 114 114 2085 3 0 66 1970 0.65 No
## 117 117 2104 2 0 72 1972 0.97 No
## 123 123 2209 2 0 72 1971 9.66 No
## 125 125 2227 2 1 77 1971 5.48 1
## 132 132 2403 2 0 67 1971 2.90 1
## 133 133 2426 2 0 69 1971 3.87 No
## 134 134 2426 2 0 74 1971 1.94 1
## 139 139 2493 2 1 72 1971 4.82 1
## 141 141 2542 2 1 67 1971 7.89 1
## 149 149 2782 1 1 78 1969 1.94 No
## 151 151 2984 2 1 70 1969 0.16 No
## 173 173 3384 2 0 68 1968 3.22 1
## 193 193 4001 2 0 69 1967 2.10 No