mydata <- read.csv('CigarettesB.csv')
#1 Use the summary function to gain an overview of the data set. Then display the mean and median for at least two attributes.
summary(mydata)
## X packs price income
## Length:46 Min. :4.409 Min. :-0.0326 Min. :4.529
## Class :character 1st Qu.:4.712 1st Qu.: 0.1405 1st Qu.:4.679
## Mode :character Median :4.815 Median : 0.2002 Median :4.759
## Mean :4.848 Mean : 0.2055 Mean :4.775
## 3rd Qu.:4.984 3rd Qu.: 0.2735 3rd Qu.:4.853
## Max. :5.379 Max. : 0.3640 Max. :5.103
mean(mydata$price)
## [1] 0.2055087
median(mydata$price)
## [1] 0.200205
mean(mydata$income)
## [1] 4.775455
median(mydata$income)
## [1] 4.758505
#2 Create a new data frame with a subset of the columns and rows. Make sure to rename it.
df.mydata <- data.frame(mydata)
submydata <- subset(df.mydata, subset = price < 0.1)
submydata
## X packs price income
## 12 IN 5.11129 0.08992 4.72916
## 15 KY 5.37906 -0.03260 4.64937
## 23 MO 5.06430 0.08731 4.78189
## 36 SC 5.07801 0.07944 4.62549
#3 Create new column names for the new data frame.
submydata$age = c("18", "19", "45", "61")
submydata
## X packs price income age
## 12 IN 5.11129 0.08992 4.72916 18
## 15 KY 5.37906 -0.03260 4.64937 19
## 23 MO 5.06430 0.08731 4.78189 45
## 36 SC 5.07801 0.07944 4.62549 61
#4 Use the summary function to create an overview of your new data frame. The print the mean and median for the same two attributes. Please compare.
summary(submydata)
## X packs price income
## Length:4 Min. :5.064 Min. :-0.03260 Min. :4.625
## Class :character 1st Qu.:5.075 1st Qu.: 0.05143 1st Qu.:4.643
## Mode :character Median :5.095 Median : 0.08338 Median :4.689
## Mean :5.158 Mean : 0.05602 Mean :4.696
## 3rd Qu.:5.178 3rd Qu.: 0.08796 3rd Qu.:4.742
## Max. :5.379 Max. : 0.08992 Max. :4.782
## age
## Length:4
## Class :character
## Mode :character
##
##
##
mean(submydata$price) #seeing an increase in the mean
## [1] 0.0560175
median(submydata$price) #seeing an increase in the median
## [1] 0.083375
mean(submydata$income) #seeing a decrease in the mean
## [1] 4.696478
median(submydata$income) #seeing a decrease in the median
## [1] 4.689265
#5 For at least 3 values in a column please rename so that every value in that column is renamed.For example, suppose I have 20 values of the letter “e” in one column. Rename those values so that all 20 would show as “excellent”.
submydata[submydata == "IN"] <- "Indiana"
submydata[submydata == "KY"] <- "Kentucky"
submydata[submydata == "MO"] <- "Missouri"
submydata[submydata == "SC"] <- "South Carolina"
print(submydata)
## X packs price income age
## 12 Indiana 5.11129 0.08992 4.72916 18
## 15 Kentucky 5.37906 -0.03260 4.64937 19
## 23 Missouri 5.06430 0.08731 4.78189 45
## 36 South Carolina 5.07801 0.07944 4.62549 61
#6 Display enough rows to see examples of all of steps 1-5 above.}
print(mydata)
## X packs price income
## 1 AL 4.96213 0.20487 4.64039
## 2 AZ 4.66312 0.16640 4.68389
## 3 AR 5.10709 0.23406 4.59435
## 4 CA 4.50449 0.36399 4.88147
## 5 CT 4.66983 0.32149 5.09472
## 6 DE 5.04705 0.21929 4.87087
## 7 DC 4.65637 0.28946 5.05960
## 8 FL 4.80081 0.28733 4.81155
## 9 GA 4.97974 0.12826 4.73299
## 10 ID 4.74902 0.17541 4.64307
## 11 IL 4.81445 0.24806 4.90387
## 12 IN 5.11129 0.08992 4.72916
## 13 IA 4.80857 0.24081 4.74211
## 14 KS 4.79263 0.21642 4.79613
## 15 KY 5.37906 -0.03260 4.64937
## 16 LA 4.98602 0.23856 4.61461
## 17 ME 4.98722 0.29106 4.75501
## 18 MD 4.77751 0.12575 4.94692
## 19 MA 4.73877 0.22613 4.99998
## 20 MI 4.94744 0.23067 4.80620
## 21 MN 4.69589 0.34297 4.81207
## 22 MS 4.93990 0.13638 4.52938
## 23 MO 5.06430 0.08731 4.78189
## 24 MT 4.73313 0.15303 4.70417
## 25 NE 4.77558 0.18907 4.79671
## 26 NV 4.96642 0.32304 4.83816
## 27 NH 5.10990 0.15852 5.00319
## 28 NJ 4.70633 0.30901 5.10268
## 29 NM 4.58107 0.16458 4.58202
## 30 NY 4.66496 0.34701 4.96075
## 31 ND 4.58237 0.18197 4.69163
## 32 OH 4.97952 0.12889 4.75875
## 33 OK 4.72720 0.19554 4.62730
## 34 PA 4.80363 0.22784 4.83516
## 35 RI 4.84693 0.30324 4.84670
## 36 SC 5.07801 0.07944 4.62549
## 37 SD 4.81545 0.13139 4.67747
## 38 TN 5.04939 0.15547 4.72525
## 39 TX 4.65398 0.28196 4.73437
## 40 UT 4.40859 0.19260 4.55586
## 41 VT 5.08799 0.18018 4.77578
## 42 VA 4.93065 0.11818 4.85490
## 43 WA 4.66134 0.35053 4.85645
## 44 WV 4.82454 0.12008 4.56859
## 45 WI 4.83026 0.22954 4.75826
## 46 WY 5.00087 0.10029 4.71169
print(submydata)
## X packs price income age
## 12 Indiana 5.11129 0.08992 4.72916 18
## 15 Kentucky 5.37906 -0.03260 4.64937 19
## 23 Missouri 5.06430 0.08731 4.78189 45
## 36 South Carolina 5.07801 0.07944 4.62549 61
#7BONUS – place the original .csv in a github file and have R read from the link. This will be a very useful skill as you progress in your data science education and career.
gitHubData <- read.csv("https://raw.githubusercontent.com/arinolan/Nolan_Week-2-Assignment/main/CigarettesB.csv")