/*PREPARE FOR GLM MODEL*/


*these rows are for running glm estimates when agestratum has been collapsed
*e.g. for study population estimates
capture confirm variable agestratum
if _rc != 0 {
		/*this is needed to generate weeksfrom var below*/
		gen agestratumdummy = 15 
		gen genderstratumdummy = 1
		}
	else {
		drop if agestratum == 99 /*this used to include people aged 18-39*/
		drop if agestratum < 5 /*i.e. less than 40*/
		}

capture confirm variable `strata'
if _rc !=0 gen `strata'dummy = 1

*Indicator for pandemic year

gen year2020 = year==2020
drop if year == 2014

if "`strata'" != "agestratum" & "`strata'" != "`agelinear'" & "`strata'" != "genderstratum" {
	qui sum `strata' /*hs added 01.03.2021 and replaced 1 with `r(min)' below.*/ 
	gen weekfromjan2015 = 1 if agestratum==15 & genderstratum ==1  & `strata' == `r(min)'
	gsort year week -agestratum genderstratum `strata' /*HS changed -`strata' to `strata' 22April21*/
	}
else {
	sum agestratum
	gen weekfromjan2015 = 1 if agestratum==`r(min)' & genderstratum == 1
	gsort year week agestratum genderstratum
	}

replace weekfrom=sum(weekfrom)
capture drop *dummy
pause
gen denommillions = denom/1000000

*Drop 2015 data from before the start date (matches 2020 Wave 1 day and month)
drop if year == 2015 & week < $wave1start

*Drop 2020 data after end week of full data
local endday = ${studyend_`linkage'} - d(01/01/2020)
local endweek = floor(`endday'/7)
di as yellow "end week: `endweek'"
drop if year==2020 & week > `endweek'


*pandemic term
gen pandemic = 0
replace pandemic = 1 if year == 2020 & week >= $wave1start
/*note this includes time after wave 1 - need to drop these weeks for glm model
analysis but keep for time series graphs*/

*Gen fourier terms to model seasonality
gen degrees=(week/52)*360
su degrees
fourier degrees, n(3)
su sin* cos*

*Gen quadratic terms for year to model long term trends
gen year_c = year - 2017
gen year_c2 = year_c^2

