Read a CSV file from my desktop.
AirPort_MaxTemp <- read.csv(file="C:\\Users\\Gang Zhang\\Documents\\R\\R Workshop\\Week 2\\maxtemp.csv", header=TRUE, sep=",")
AirPort_MaxTemp
## X time value
## 1 1971 1971 34.6
## 2 1972 1972 39.3
## 3 1973 1973 40.5
## 4 1974 1974 36.8
## 5 1975 1975 39.7
## 6 1976 1976 40.5
## 7 1977 1977 41.5
## 8 1978 1978 38.2
## 9 1979 1979 41.4
## 10 1980 1980 41.5
## 11 1981 1981 41.9
## 12 1982 1982 43.5
## 13 1983 1983 43.2
## 14 1984 1984 35.1
## 15 1985 1985 42.1
## 16 1986 1986 38.1
## 17 1987 1987 39.6
## 18 1988 1988 39.9
## 19 1989 1989 37.8
## 20 1990 1990 39.0
## 21 1991 1991 41.3
## 22 1992 1992 38.7
## 23 1993 1993 37.8
## 24 1994 1994 38.9
## 25 1995 1995 39.7
## 26 1996 1996 36.1
## 27 1997 1997 41.3
## 28 1998 1998 40.6
## 29 1999 1999 37.5
## 30 2000 2000 39.6
## 31 2001 2001 38.8
## 32 2002 2002 36.6
## 33 2003 2003 44.3
## 34 2004 2004 39.7
## 35 2005 2005 43.0
## 36 2006 2006 43.1
## 37 2007 2007 41.5
## 38 2008 2008 40.5
## 39 2009 2009 46.7
## 40 2010 2010 43.9
## 41 2011 2011 39.5
## 42 2012 2012 39.7
## 43 2013 2013 41.5
## 44 2014 2014 43.7
## 45 2015 2015 42.7
## 46 2016 2016 42.9
Task 2: Create a new data frame with a subset of the columns and rows. Make sure to rename it.
library(dplyr)
AirPort_MaxTemp2 <- subset(AirPort_MaxTemp, between(time, 1980, 2000))
AirPort_MaxTemp2
## X time value
## 10 1980 1980 41.5
## 11 1981 1981 41.9
## 12 1982 1982 43.5
## 13 1983 1983 43.2
## 14 1984 1984 35.1
## 15 1985 1985 42.1
## 16 1986 1986 38.1
## 17 1987 1987 39.6
## 18 1988 1988 39.9
## 19 1989 1989 37.8
## 20 1990 1990 39.0
## 21 1991 1991 41.3
## 22 1992 1992 38.7
## 23 1993 1993 37.8
## 24 1994 1994 38.9
## 25 1995 1995 39.7
## 26 1996 1996 36.1
## 27 1997 1997 41.3
## 28 1998 1998 40.6
## 29 1999 1999 37.5
## 30 2000 2000 39.6
Task 3: Create new column names for the new data frame.
## Remove the X column as it is a duplicate data:
AirPort_MaxTemp2$X <- NULL
## Update the column names:
colnames(AirPort_MaxTemp2) <- c("Year", "Temperature")
AirPort_MaxTemp2
## Year Temperature
## 10 1980 41.5
## 11 1981 41.9
## 12 1982 43.5
## 13 1983 43.2
## 14 1984 35.1
## 15 1985 42.1
## 16 1986 38.1
## 17 1987 39.6
## 18 1988 39.9
## 19 1989 37.8
## 20 1990 39.0
## 21 1991 41.3
## 22 1992 38.7
## 23 1993 37.8
## 24 1994 38.9
## 25 1995 39.7
## 26 1996 36.1
## 27 1997 41.3
## 28 1998 40.6
## 29 1999 37.5
## 30 2000 39.6
Task 4: Use the summary function to create an overview of your new data frame. Then print the mean and median for the same two attributes. Please compare.
summary(AirPort_MaxTemp2)
## Year Temperature
## Min. :1980 Min. :35.10
## 1st Qu.:1985 1st Qu.:38.10
## Median :1990 Median :39.60
## Mean :1990 Mean :39.68
## 3rd Qu.:1995 3rd Qu.:41.30
## Max. :2000 Max. :43.50
## Mean and Median for Year column:
Year_Mean <- mean(AirPort_MaxTemp2$Year)
print(Year_Mean)
## [1] 1990
Year_Median <- median(AirPort_MaxTemp2$Year)
print(Year_Median)
## [1] 1990
## Mean and Median for Temperature column:
Temp_Mean <- mean(AirPort_MaxTemp2$Temperature)
print(Temp_Mean)
## [1] 39.67619
Temp_Median <- median(AirPort_MaxTemp2$Temperature)
print(Temp_Median)
## [1] 39.6
## Compare by a table.
comparison_table <- matrix(c(Time_Mean,Time_Median,round(Value_Mean, digits=1),Value_Median,Year_Mean,Year_Median,round(Temp_Mean, digits=1),Temp_Median),ncol=4,nrow=2,byrow=TRUE)
dimnames(comparison_table) = list(c("MaxTemp","MaxTemp_Subset"), c("Year_Mean","Year_Median","Temp_Mean","Temp_Median"))
comparison_table
## Year_Mean Year_Median Temp_Mean Temp_Median
## MaxTemp 1993.5 1993.5 40.3 40.2
## MaxTemp_Subset 1990.0 1990.0 39.7 39.6
## Compare by bar plots.
year_table <- matrix(c(Time_Mean,Time_Median,Year_Mean,Year_Median),ncol=2,nrow=2,byrow=TRUE)
dimnames(year_table) = list(c("MaxTemp","MaxTemp_Subset"), c("Mean", "Median"))
## Year Barplot
barplot(year_table,legend=T, args.legend = list(x = "topright", bty = "n", inset=c(-0.08, 0), cex=1.0),beside=T, ylim=c(1990,1995), las=2, ylab="Year", cex.names=1.4,cex.axis=1.0, cex.lab=1.2, main="Year Comparison", col=c("blue","red"))

temp_table <- matrix(c(round(Value_Mean, digits=1),Value_Median,round(Temp_Mean, digits=1),Temp_Median),ncol=2,nrow=2,byrow=TRUE)
dimnames(temp_table) = list(c("MaxTemp","MaxTemp_Subset"), c("Mean","Median"))
##Temperature Barplot
barplot(temp_table,legend=T, args.legend = list(x = "topright", bty = "n", inset=c(-0.08, 0), cex=1.0),beside=T, ylim=c(38,42), las=0.5, ylab="°F", cex.names=1.4,cex.axis=1.0, cex.lab=1.2, main="Temperature Comparison", col=c("blue","red"))

Task 5: For at least 3 values in a column please rename so that every value in that column is renamed. For example, suppose I have 20 values of the letter “e” in one column. Rename those values so that all 20 would show as “excellent”.
names(AirPort_MaxTemp2)[2] <- gsub("e", "X", names(AirPort_MaxTemp2)[2])
AirPort_MaxTemp2
## Year TXmpXraturX
## 10 1980 41.5
## 11 1981 41.9
## 12 1982 43.5
## 13 1983 43.2
## 14 1984 35.1
## 15 1985 42.1
## 16 1986 38.1
## 17 1987 39.6
## 18 1988 39.9
## 19 1989 37.8
## 20 1990 39.0
## 21 1991 41.3
## 22 1992 38.7
## 23 1993 37.8
## 24 1994 38.9
## 25 1995 39.7
## 26 1996 36.1
## 27 1997 41.3
## 28 1998 40.6
## 29 1999 37.5
## 30 2000 39.6
Task 6: Display enough rows to see examples of all of steps 1-5 above.
## I can think of two ways to do this:
examples_step1to5 <- AirPort_MaxTemp2[which(AirPort_MaxTemp2$Year >= 1985 & AirPort_MaxTemp2$Year <= 1995),]
examples_step1to5
## Year TXmpXraturX
## 15 1985 42.1
## 16 1986 38.1
## 17 1987 39.6
## 18 1988 39.9
## 19 1989 37.8
## 20 1990 39.0
## 21 1991 41.3
## 22 1992 38.7
## 23 1993 37.8
## 24 1994 38.9
## 25 1995 39.7
## Or by this way
examples_step1to5_2 <- head(AirPort_MaxTemp2, n=15)
examples_step1to5_2
## Year TXmpXraturX
## 10 1980 41.5
## 11 1981 41.9
## 12 1982 43.5
## 13 1983 43.2
## 14 1984 35.1
## 15 1985 42.1
## 16 1986 38.1
## 17 1987 39.6
## 18 1988 39.9
## 19 1989 37.8
## 20 1990 39.0
## 21 1991 41.3
## 22 1992 38.7
## 23 1993 37.8
## 24 1994 38.9
Task 7: BONUS – place the original .csv in a github file and have R read from the link. This will be a very useful skill as you progress in your data science education and career.
library("rio")
AirPort_MaxTemp3 <- import("https://raw.githubusercontent.com/SieSiongWong/CUNY-SPS-R-Workshop/master/maxtemp.csv")
AirPort_MaxTemp3
## V1 time value
## 1 1971 1971 34.6
## 2 1972 1972 39.3
## 3 1973 1973 40.5
## 4 1974 1974 36.8
## 5 1975 1975 39.7
## 6 1976 1976 40.5
## 7 1977 1977 41.5
## 8 1978 1978 38.2
## 9 1979 1979 41.4
## 10 1980 1980 41.5
## 11 1981 1981 41.9
## 12 1982 1982 43.5
## 13 1983 1983 43.2
## 14 1984 1984 35.1
## 15 1985 1985 42.1
## 16 1986 1986 38.1
## 17 1987 1987 39.6
## 18 1988 1988 39.9
## 19 1989 1989 37.8
## 20 1990 1990 39.0
## 21 1991 1991 41.3
## 22 1992 1992 38.7
## 23 1993 1993 37.8
## 24 1994 1994 38.9
## 25 1995 1995 39.7
## 26 1996 1996 36.1
## 27 1997 1997 41.3
## 28 1998 1998 40.6
## 29 1999 1999 37.5
## 30 2000 2000 39.6
## 31 2001 2001 38.8
## 32 2002 2002 36.6
## 33 2003 2003 44.3
## 34 2004 2004 39.7
## 35 2005 2005 43.0
## 36 2006 2006 43.1
## 37 2007 2007 41.5
## 38 2008 2008 40.5
## 39 2009 2009 46.7
## 40 2010 2010 43.9
## 41 2011 2011 39.5
## 42 2012 2012 39.7
## 43 2013 2013 41.5
## 44 2014 2014 43.7
## 45 2015 2015 42.7
## 46 2016 2016 42.9