#R version 4.0.2 (2020-06-22) -- "Taking Off Again"
#Copyright (C) 2020 The R Foundation for Statistical Computing
#Platform: x86_64-w64-mingw32/x64 (64-bit)
Sys.time()
## [1] "2021-03-07 15:03:12 GMT"
library(tidyverse)
library(reshape2)
library(lubridate)
library(kableExtra)
names <- c('Day', 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December')
Daylight.times.curated <- read.csv("Daylight-times-curated.tex", header=FALSE,col.names =  names)

head(Daylight.times.curated) %>% 
  kbl() %>% 
  kable_styling()
Day January February March April May June July August September October November December
1 07:30 08:53 10:47 13:01 15:06 16:45 17:02 15:46 13:44 11:36 09:27 07:48
2 07:31 08:57 10:51 13:06 15:10 16:47 17:01 15:42 13:40 11:32 09:23 07:46
3 07:33 09:01 10:56 13:10 15:14 16:49 17:00 15:38 13:35 11:28 09:19 07:44
4 07:34 09:04 11:00 13:14 15:18 16:51 16:59 15:35 13:31 11:23 09:15 07:42
5 07:36 09:08 11:04 13:19 15:22 16:53 16:57 15:31 13:27 11:19 09:11 07:40
6 07:38 09:12 11:09 13:23 15:25 16:55 16:56 15:28 13:23 11:15 09:08 07:38

How is the data distributed?

Repeating the data

Time <- Data$Time.h. 
day <- Data$day

plot(Time,type = "p", col = "red", lwd = 0.01)

Fitting a function()


General sinusoidal equation: \(H(t)=A\sin[Bt-c]+D,A\cos[Bt-c]+D\)

Linear approximation: \(f(x)\approx f(x_{0})+f'(x_{0})(x-x_{0})\)


ssp <- spectrum(Time)  

per <- 1/ssp$freq[ssp$spec==max(ssp$spec)]
reslm <- lm(Time ~ sin(2*pi/per*day)+cos(2*pi/per*day))
summary(reslm)
## 
## Call:
## lm(formula = Time ~ sin(2 * pi/per * day) + cos(2 * pi/per * 
##     day))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.38098 -0.15505  0.00624  0.15578  0.49011 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           12.252957   0.005883  2082.8   <2e-16 ***
## sin(2 * pi/per * day)  1.090685   0.008302   131.4   <2e-16 ***
## cos(2 * pi/per * day) -4.546970   0.008336  -545.5   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1946 on 1091 degrees of freedom
## Multiple R-squared:  0.9965, Adjusted R-squared:  0.9965 
## F-statistic: 1.575e+05 on 2 and 1091 DF,  p-value: < 2.2e-16


rg <- diff(range(Time))
plot(Time~day,ylim=c(min(Time)-0.1*rg,max(Time)+0.1*rg),cex  = 0.001,col  = 1,pch=".")
#lines(fitted(reslm)~day,col=4,lty=2)   # dashed blue line is sin fit, omitted for clarity
# including 2nd harmonic improves the fit marginally
reslm2 <- lm(Time ~ sin(2*pi/per*day)+cos(2*pi/per*day)+sin(4*pi/per*day)+cos(4*pi/per*day))
summary(reslm2)
## 
## Call:
## lm(formula = Time ~ sin(2 * pi/per * day) + cos(2 * pi/per * 
##     day) + sin(4 * pi/per * day) + cos(4 * pi/per * day))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.38692 -0.15234  0.00569  0.15559  0.48432 
## 
## Coefficients:
##                        Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)           12.253026   0.005877 2084.859   <2e-16 ***
## sin(2 * pi/per * day)  1.090691   0.008295  131.491   <2e-16 ***
## cos(2 * pi/per * day) -4.546755   0.008328 -545.951   <2e-16 ***
## sin(4 * pi/per * day)  0.008381   0.008296    1.010   0.3126    
## cos(4 * pi/per * day)  0.014633   0.008327    1.757   0.0791 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1944 on 1089 degrees of freedom
## Multiple R-squared:  0.9966, Adjusted R-squared:  0.9965 
## F-statistic: 7.89e+04 on 4 and 1089 DF,  p-value: < 2.2e-16
lines(fitted(reslm2)~day,col=2,lty=3)
legend(x=240,y=18,c("original values","sinusoidal fitting"),cex=.8,col=c("black","red"),pch=c("_","-"))


\[H(t)∼1.1\sin[(2π/375)×(t)]- 4.5\cos[(2π/375)×(t)]+12.3\]



## [1] "The mean amount of daylight is  12.2945063985375"
## [1] "The min amount of daylight is  7.38"
## [1] "The max amount of daylight is  17.13"

It is interesting to note the average amount of daylight is not exactly 12 hours; there are myriad reasons for this, but this is largely because of the way sunrise and sunset are recorded coupled with the spherical nature of the sun with a non-zero diameter.

Given, we have fitted the sinusoidal function with second harmonic with little effort, the linear approximation of a non-linear function should be revisited in the following section.


Additional scope

I would expect that at the equator there is very little variation, the further away one is from the equator the greater the amplitude of the sinusoidal function.

knitr::include_graphics("sensecheck.png")

Given a function f(x), we can find its tangent at x=a. The equation of the tangent line is: L(x)=f(a)+f’(a)(x-a) Linear approximations are ‘good’ at approximating values of f(x) near x=a; the further away x=a, the worse the approximation.

An example of a good linear approximation would be sin(theta) at theta=0, which in this case: sin(theta)~theta

For extreme and equatorial latitudes, a linear approximation would capture most of the function’s behaviour.

To reduce the distance between the tangent line and the function, you could use piecewise linear approximation. I’ll have a go at this using my own functions.

Look at the problem as an infinite sum of sin waves - see Fourier series.

Calculate geometric linear approximation.

Explore the fundamental reasons for sinusoidal behaviour, i.e the earths axis tilt is \(\sim 23.5^{\circ}\).

Is the equinox a true point of inflection?