capture log close
x running for CPRD GOLD, diab_perm, primary care only as practice
log using "$logdir\an_smr_estimates.txt", replace text

/*******************************************************************************
ESTIMATE SMRS AND CONFIDENCE INTERVALS
*******************************************************************************/

use "$datadir/cr_weeklycounts_diab_perm_gold_primary.dta", clear

tab agestratum, m
drop if agestratum==99 /*overall*/

*create period variable where 1 = pre 2020, 2 = 2020
gen period = 1+(year==2020)

*end week of study period
preserve
local endday = $studyend_primary - d(01/01/2020)
local endweek = floor(`endday'/7)
di as yellow "end week: `endweek'"
restore
drop if week>`endweek'

*collapse pre-pandemic years into one 
collapse (sum) deaths denominator, by(period week age gender)

*calculate weekly expected and observed deaths for each age and gender categories
gen rate = deaths/denom
reshape wide deaths denominator rate, i(week age gender) j(period)


rename rate1 prepandemicrateperpwk 
drop deaths1 denominator1 rate2
rename deaths2 observeddeaths
rename denominator2 postpandemicdenom

gen expecteddeaths = prepandemicrateperpwk * postpandemicdenom


drop prepandemicrateperpwk postpandemicdenom

*reduce number of age categories

*check that I haven't changed the age bands in weekly counts
qui summ agestratum
assert r(max) == 15


gen startageband = .
replace startageband = 18 if agestratum < 5
replace startageband = 40 if agestratum == 5 | agestratum == 6
replace startageband = 50 if agestratum == 7 | agestratum == 8
replace startageband = 60 if agestratum == 9 | agestratum == 10
replace startageband = 70 if agestratum == 11| agestratum == 12
replace startageband = 80 if agestratum >=13

drop agestr

collapse (sum) observed expected, by(startageband gender week)
summ expected, d

*collapse to months
gen month = ceil(week/4)
*recode startageband 20/69=20 70/100=70

collapse (sum) observed expected, by(startageband gender month)

*calculate SMR
gen smr = obs/exp


*Use Poisson to get CIs (nb fully saturated model (equiv to ALL interactions) to be equiv to manual smr
gen n = _n 
poisson observed i.n , exp(expected) irr 

lincom _cons, eform
gen smr_p = r(estimate)
gen smr_lci = r(lb)
gen smr_uci = r(ub)

qui summ n
local rows = r(max) /*hs added*/

forvalues i=2/`r(max)'{
	lincom _cons+`i'.n, eform
	replace smr_p = r(estimate) if _n==`i'
	replace smr_lci = r(lb) if _n==`i'
	replace smr_uci = r(ub) if _n==`i'
}
assert (smr-smr_p)<.00001


save "$estimatedir\an_smr_estimates_diab_perm_gold_primary.dta", replace





