Home_Work2.R

Problem 1

getwd()

## [1] "C:/Users/Al Haque/Documents"

## Problem 1 copy the whole URL of the Raw Code from Github and make the csv read the URl ##
file <- read.csv("https://raw.githubusercontent.com/alhaque1/R-Homework-2-/main/ChinaIncome.csv?token=GHSAT0AAAAAABQLSBKBPCNWCCZSLYISFPV6YO7GVKA" ,header=TRUE ,sep=",")
file1 <-data.frame(file)
summary(file1)

##        X       agriculture       commerce      construction       industry     
##  Min.   : 1   Min.   : 83.6   Min.   :100.0   Min.   : 100.0   Min.   : 100.0  
##  1st Qu.:10   1st Qu.:111.9   1st Qu.:146.6   1st Qu.: 259.0   1st Qu.: 374.9  
##  Median :19   Median :139.8   Median :199.2   Median : 421.0   Median : 863.0  
##  Mean   :19   Mean   :151.9   Mean   :261.0   Mean   : 549.9   Mean   :1244.2  
##  3rd Qu.:28   3rd Qu.:168.4   3rd Qu.:316.8   3rd Qu.: 584.1   3rd Qu.:1814.7  
##  Max.   :37   Max.   :279.4   Max.   :760.8   Max.   :1884.0   Max.   :4765.0  
##    transport     
##  Min.   : 100.0  
##  1st Qu.: 221.1  
##  Median : 370.8  
##  Mean   : 449.5  
##  3rd Qu.: 560.8  
##  Max.   :1413.6

## Find the mean and median of the agriculture column
sprintf("The mean of the agriculture column is: %.2f",mean(file1$agriculture))

## [1] "The mean of the agriculture column is: 151.95"

sprintf("The median of the agriculture column is: %.2f",median(file1$agriculture))

## [1] "The median of the agriculture column is: 139.80"

## Find the mean and median of the industry column 
sprintf("The mean of the values under the industry column is: %.2f ",mean(file1$industry))

## [1] "The mean of the values under the industry column is: 1244.24 "

sprintf("The median of the values under the industry column is %.2f ", median(file1$industry))

## [1] "The median of the values under the industry column is 863.00 "

Problem 2

## Problem 2: Make a new dataframe
## renamed the subset data of the file as data_subset
## printed the first 3 rows of the data 
data_subset <- file1[1:3,]  
data_subset

##   X agriculture commerce construction industry transport
## 1 1       100.0    100.0        100.0    100.0       100
## 2 2       101.6    133.0        138.1    133.6       120
## 3 3       103.3    136.4        133.3    159.1       136

Problem 3

##Problem 3: Create new names for the column names
#Change the name of the df data_subset but only where the original name was commerce to Commerce this method is preferred to change column names ##
names(data_subset)[1] <- "x"
names(data_subset)[2] <- "Agriculture"
names(data_subset)[names(data_subset)== "commerce"] <- "Commerce"
names(data_subset)[names(data_subset)=="construction"] <- "Construction"
names(data_subset)[names(data_subset)=="industry"] <- "Industry"
names(data_subset)[names(data_subset)=="transport"] <- "Transportation"
data_subset

##   x Agriculture Commerce Construction Industry Transportation
## 1 1       100.0    100.0        100.0    100.0            100
## 2 2       101.6    133.0        138.1    133.6            120
## 3 3       103.3    136.4        133.3    159.1            136

Problem 4

## Problem 4 
summary(data_subset)

##        x        Agriculture       Commerce      Construction      Industry    
##  Min.   :1.0   Min.   :100.0   Min.   :100.0   Min.   :100.0   Min.   :100.0  
##  1st Qu.:1.5   1st Qu.:100.8   1st Qu.:116.5   1st Qu.:116.7   1st Qu.:116.8  
##  Median :2.0   Median :101.6   Median :133.0   Median :133.3   Median :133.6  
##  Mean   :2.0   Mean   :101.6   Mean   :123.1   Mean   :123.8   Mean   :130.9  
##  3rd Qu.:2.5   3rd Qu.:102.5   3rd Qu.:134.7   3rd Qu.:135.7   3rd Qu.:146.3  
##  Max.   :3.0   Max.   :103.3   Max.   :136.4   Max.   :138.1   Max.   :159.1  
##  Transportation 
##  Min.   :100.0  
##  1st Qu.:110.0  
##  Median :120.0  
##  Mean   :118.7  
##  3rd Qu.:128.0  
##  Max.   :136.0

## Problem 4 Calculating the new values of the mean and median for the Agriculture column ##

sprintf("The new mean of the agriculture column is: %.2f", mean(data_subset$Agriculture))

## [1] "The new mean of the agriculture column is: 101.63"

sprintf("The new median of the agriculture column is: %.2f", median(data_subset$Agriculture))

## [1] "The new median of the agriculture column is: 101.60"

## Calculating the new values of the mean and median for the Transportation Column ##

sprintf("The new mean of the Transportation column is: %.2f",mean(data_subset$Transportation))

## [1] "The new mean of the Transportation column is: 118.67"

sprintf("The new median of the Transportation column is: %.2f",median(data_subset$Transportation))

## [1] "The new median of the Transportation column is: 120.00"

Problem 5

## Problem 5 rename the 3 different/distinct columns so that every value in that column is renamed changed the e to excellent, made c into Cow,D for duck, and a for Average, and x to Varx ##
names(data_subset)[names(data_subset)== "x"] <- "Varx"
names(data_subset)[names(data_subset)== "Agriculture"] <- "AVGgriculture"
names(data_subset)[names(data_subset)== "Commerce"] <- "CommercExcellent"
names(data_subset)[names(data_subset)=="Construction"] <- "CoWnstruction"
names(data_subset)[names(data_subset)=="Industry"] <- "InDuckstry"
names(data_subset)[names(data_subset)=="Transportation"] <- "TransportaVeragetion"
summary(data_subset)

##       Varx     AVGgriculture   CommercExcellent CoWnstruction     InDuckstry   
##  Min.   :1.0   Min.   :100.0   Min.   :100.0    Min.   :100.0   Min.   :100.0  
##  1st Qu.:1.5   1st Qu.:100.8   1st Qu.:116.5    1st Qu.:116.7   1st Qu.:116.8  
##  Median :2.0   Median :101.6   Median :133.0    Median :133.3   Median :133.6  
##  Mean   :2.0   Mean   :101.6   Mean   :123.1    Mean   :123.8   Mean   :130.9  
##  3rd Qu.:2.5   3rd Qu.:102.5   3rd Qu.:134.7    3rd Qu.:135.7   3rd Qu.:146.3  
##  Max.   :3.0   Max.   :103.3   Max.   :136.4    Max.   :138.1   Max.   :159.1  
##  TransportaVeragetion
##  Min.   :100.0       
##  1st Qu.:110.0       
##  Median :120.0       
##  Mean   :118.7       
##  3rd Qu.:128.0       
##  Max.   :136.0

Problem 6

## Problem 6 Write a function thats shows all the table changes from 1 to 5 

data_subset   ##creates the Table of 3 rows

##   Varx AVGgriculture CommercExcellent CoWnstruction InDuckstry
## 1    1         100.0            100.0         100.0      100.0
## 2    2         101.6            133.0         138.1      133.6
## 3    3         103.3            136.4         133.3      159.1
##   TransportaVeragetion
## 1                  100
## 2                  120
## 3                  136

summary(data_subset) ## shows the summary of the data set with the new column names

##       Varx     AVGgriculture   CommercExcellent CoWnstruction     InDuckstry   
##  Min.   :1.0   Min.   :100.0   Min.   :100.0    Min.   :100.0   Min.   :100.0  
##  1st Qu.:1.5   1st Qu.:100.8   1st Qu.:116.5    1st Qu.:116.7   1st Qu.:116.8  
##  Median :2.0   Median :101.6   Median :133.0    Median :133.3   Median :133.6  
##  Mean   :2.0   Mean   :101.6   Mean   :123.1    Mean   :123.8   Mean   :130.9  
##  3rd Qu.:2.5   3rd Qu.:102.5   3rd Qu.:134.7    3rd Qu.:135.7   3rd Qu.:146.3  
##  Max.   :3.0   Max.   :103.3   Max.   :136.4    Max.   :138.1   Max.   :159.1  
##  TransportaVeragetion
##  Min.   :100.0       
##  1st Qu.:110.0       
##  Median :120.0       
##  Mean   :118.7       
##  3rd Qu.:128.0       
##  Max.   :136.0

## As shown above the I calculated the new means of the new subset of the columns with the mean and median functions and also for #7 I copied the direct github link to the raw file of the csv and used the read csv function to make R read it directly from github ##