Import Bike Sharing Data (CSV)

bike_csv <- read.csv("bike_sharing_data.csv")
kable(head(bike_csv))
datetime season holiday workingday weather temp atemp humidity windspeed casual registered count sources
1/1/2011 0:00 1 0 0 1 9.84 14.395 81 0.0000 3 13 16 ad campaign
1/1/2011 1:00 1 0 0 1 9.02 13.635 80 0.0000 8 32 40 www.yahoo.com
1/1/2011 2:00 1 0 0 1 9.02 13.635 80 0.0000 5 27 32 www.google.fi
1/1/2011 3:00 1 0 0 1 9.84 14.395 75 0.0000 3 10 13 AD campaign
1/1/2011 4:00 1 0 0 1 9.84 14.395 75 0.0000 0 1 1 Twitter
1/1/2011 5:00 1 0 0 2 9.84 12.880 75 6.0032 0 1 1 www.bing.com
str(bike_csv)
## 'data.frame':    17379 obs. of  13 variables:
##  $ datetime  : chr  "1/1/2011 0:00" "1/1/2011 1:00" "1/1/2011 2:00" "1/1/2011 3:00" ...
##  $ season    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ holiday   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ workingday: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ weather   : int  1 1 1 1 1 2 1 1 1 1 ...
##  $ temp      : num  9.84 9.02 9.02 9.84 9.84 ...
##  $ atemp     : num  14.4 13.6 13.6 14.4 14.4 ...
##  $ humidity  : chr  "81" "80" "80" "75" ...
##  $ windspeed : num  0 0 0 0 0 ...
##  $ casual    : int  3 8 5 3 0 0 2 1 1 8 ...
##  $ registered: int  13 32 27 10 1 1 0 2 7 6 ...
##  $ count     : int  16 40 32 13 1 1 2 3 8 14 ...
##  $ sources   : chr  "ad campaign" "www.yahoo.com" "www.google.fi" "AD campaign" ...
summary(bike_csv)
##    datetime             season         holiday          workingday    
##  Length:17379       Min.   :1.000   Min.   :0.00000   Min.   :0.0000  
##  Class :character   1st Qu.:2.000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Mode  :character   Median :3.000   Median :0.00000   Median :1.0000  
##                     Mean   :2.502   Mean   :0.02877   Mean   :0.6827  
##                     3rd Qu.:3.000   3rd Qu.:0.00000   3rd Qu.:1.0000  
##                     Max.   :4.000   Max.   :1.00000   Max.   :1.0000  
##     weather           temp           atemp         humidity        
##  Min.   :1.000   Min.   : 0.82   Min.   : 0.00   Length:17379      
##  1st Qu.:1.000   1st Qu.:13.94   1st Qu.:16.66   Class :character  
##  Median :1.000   Median :20.50   Median :24.24   Mode  :character  
##  Mean   :1.425   Mean   :20.38   Mean   :23.79                     
##  3rd Qu.:2.000   3rd Qu.:27.06   3rd Qu.:31.06                     
##  Max.   :4.000   Max.   :41.00   Max.   :50.00                     
##    windspeed          casual         registered        count    
##  Min.   : 0.000   Min.   :  0.00   Min.   :  0.0   Min.   :  1  
##  1st Qu.: 7.002   1st Qu.:  4.00   1st Qu.: 36.0   1st Qu.: 42  
##  Median :12.998   Median : 16.00   Median :116.0   Median :141  
##  Mean   :12.737   Mean   : 34.48   Mean   :152.5   Mean   :187  
##  3rd Qu.:16.998   3rd Qu.: 46.00   3rd Qu.:217.0   3rd Qu.:277  
##  Max.   :56.997   Max.   :367.00   Max.   :886.0   Max.   :977  
##    sources         
##  Length:17379      
##  Class :character  
##  Mode  :character  
##                    
##                    
## 

Import Bike Sharing Data (TXT)

bike_txt <- read_tsv("bike_sharing_data.txt")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 17379 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr  (2): datetime, sources
## dbl (11): season, holiday, workingday, weather, temp, atemp, humidity, winds...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
kable(head(bike_txt))
datetime season holiday workingday weather temp atemp humidity windspeed casual registered count sources
1/1/2011 0:00 1 0 0 1 9.84 14.395 81 0.0000 3 13 16 ad campaign
1/1/2011 1:00 1 0 0 1 9.02 13.635 80 0.0000 8 32 40 www.yahoo.com
1/1/2011 2:00 1 0 0 1 9.02 13.635 80 0.0000 5 27 32 www.google.fi
1/1/2011 3:00 1 0 0 1 9.84 14.395 75 0.0000 3 10 13 AD campaign
1/1/2011 4:00 1 0 0 1 9.84 14.395 75 0.0000 0 1 1 Twitter
1/1/2011 5:00 1 0 0 2 9.84 12.880 75 6.0032 0 1 1 www.bing.com

Import Bike Sharing Data with data.table

bike_dt <- fread("bike_sharing_data.csv")
kable(head(bike_dt))
datetime season holiday workingday weather temp atemp humidity windspeed casual registered count sources
1/1/2011 0:00 1 0 0 1 9.84 14.395 81 0.0000 3 13 16 ad campaign
1/1/2011 1:00 1 0 0 1 9.02 13.635 80 0.0000 8 32 40 www.yahoo.com
1/1/2011 2:00 1 0 0 1 9.02 13.635 80 0.0000 5 27 32 www.google.fi
1/1/2011 3:00 1 0 0 1 9.84 14.395 75 0.0000 3 10 13 AD campaign
1/1/2011 4:00 1 0 0 1 9.84 14.395 75 0.0000 0 1 1 Twitter
1/1/2011 5:00 1 0 0 2 9.84 12.880 75 6.0032 0 1 1 www.bing.com

Q1: Business Intelligence

Business Intelligence combines tools, databases, and methodologies to analyze historical and current data for decision-making. Answer: A. True

Q2: Data Structures

Atomic vector → contains a sequence of same data type values (e.g., c(1,2,3)) Matrix → a 2D structure of the same data type List → can contain objects of different types (including vectors or other lists) Data frame → list of equal-length vectors with row/column structure

Q3: Function Calls

A function call consists of function name + arguments inside parentheses. Answer: A. True

Q4: Importing Bike Sharing Dataset

# Import examples
bike1 <- read.table("bike_sharing_data.csv", sep=",", header=TRUE)
bike2 <- read.table("bike_sharing_data.txt", sep="\t", header=TRUE)
bike3 <- read.csv("bike_sharing_data.csv")
bike4 <- read.delim("bike_sharing_data.txt")

# Preview dataset
head(bike1)
##        datetime season holiday workingday weather temp  atemp humidity
## 1 1/1/2011 0:00      1       0          0       1 9.84 14.395       81
## 2 1/1/2011 1:00      1       0          0       1 9.02 13.635       80
## 3 1/1/2011 2:00      1       0          0       1 9.02 13.635       80
## 4 1/1/2011 3:00      1       0          0       1 9.84 14.395       75
## 5 1/1/2011 4:00      1       0          0       1 9.84 14.395       75
## 6 1/1/2011 5:00      1       0          0       2 9.84 12.880       75
##   windspeed casual registered count       sources
## 1    0.0000      3         13    16   ad campaign
## 2    0.0000      8         32    40 www.yahoo.com
## 3    0.0000      5         27    32 www.google.fi
## 4    0.0000      3         10    13   AD campaign
## 5    0.0000      0          1     1       Twitter
## 6    6.0032      0          1     1  www.bing.com

Q5: Observations and Variables

# Total rows and columns in the bike sharing dataset
dim(bike_csv)
## [1] 17379    13

Q6: Data type of humidity

# (bike1 or bike3 from your Q4 chunk)
str(bike1$humidity)     # should display 'int'
##  chr [1:17379] "81" "80" "80" "75" "75" "75" "80" "86" "75" "76" "76" "81" ...
typeof(bike1$humidity)  # should display 'integer'
## [1] "character"

Q7: Season in row 6251

# Season value in row 6251
bike_csv[6251, "season"]
## [1] 4

Q8: Count of winter rows

table(bike_csv$season)                 # shows counts for 1..4
## 
##    1    2    3    4 
## 4242 4409 4496 4232
sum(bike_csv$season == 4)              # 4232
## [1] 4232
# or, as in your quiz feedback:
dim(subset(bike_csv, season == 4))[1]  # 4232
## [1] 4232

Q9: Example of multiple conditions

# Example: all winter rows with windspeed > 0.3
head(subset(bike_csv, season == 1 & windspeed > 0.3))
##          datetime season holiday workingday weather  temp  atemp humidity
## 6   1/1/2011 5:00      1       0          0       2  9.84 12.880       75
## 11 1/1/2011 10:00      1       0          0       1 15.58 19.695       76
## 12 1/1/2011 11:00      1       0          0       1 14.76 16.665       81
## 13 1/1/2011 12:00      1       0          0       1 17.22 21.210       77
## 14 1/1/2011 13:00      1       0          0       2 18.86 22.725       72
## 15 1/1/2011 14:00      1       0          0       2 18.86 22.725       72
##    windspeed casual registered count          sources
## 6     6.0032      0          1     1     www.bing.com
## 11   16.9979     12         24    36     www.bing.com
## 12   19.0012     26         30    56    www.yahoo.com
## 13   19.0012     29         55    84    www.google.fi
## 14   19.9995     47         47    94      AD campaign
## 15   19.0012     35         71   106 www.google.co.uk

Q10: High wind during winter/spring

nrow(subset(bike_csv, windspeed >= 40 & season %in% c(1, 4)))   # 63
## [1] 46
# Optional breakdown by season:
table(subset(bike_csv, windspeed >= 40 & season %in% c(1, 4))$season)
## 
##  1  4 
## 41  5