capture log close
args database linkage

log using "$logdir\an_table1_`database'_`linkage'.txt", text replace

/****************************************************************************
TABLE 1 = COUNTS OF DENOMINATOR WEEKS AND EXPECTED, OBSERVED AND EXCESS
DEATHS FOR EACH COVARIATE
****************************************************************************/

use "$estimatedir\an_timeseries_processout_`database'_`linkage'.dta", clear

*gen excessstr = string(totalexcess, "%9.0f") /*+ " (" + string(p5excess, "%9.0f") + " - " + string(p95excess, "%9.0f") + ")"*/

/*
ADD A FOOTNOTE - THESE ARE ALL IN NORTHERN IRELAND
*additional row for missing deprivation data
bysort group strata (substrata): egen totaldenom = total(denommillions)
bysort group strata (substrata): egen totaldeaths = total(deaths)

gen order = _n
expand 2 if strata == "deprivation" & substrata == 5, gen(dup)
sort order dup
replace substrataname = "missing" if dup == 1
replace substrata = 99 if dup == 1

summ denommillions if strata == "studypop"
replace denommillions =  `r(max)' - totaldenom if dup == 1

summ deaths if strata == "studypop"
replace deaths =  `r(max)' - totaldeaths if dup == 1

foreach var in n_usingPREdata xb pooledse_xb pooledse_n {
	replace `var' = . if dup == 1
	}
	
drop order dup
*/
	
*denominator (weeks)
summ denommillions if strata == "studypop"
gen percentweeks = (denommillions/`r(max)')*100
gen denomresult = string(denommill, "%9.1fc") + " (" + string(percentweeks, "%9.1fc") + ")"
label var denomresult "Person weeks in millions (% weeks in study population)"

*total deaths
rename deaths observed
qui summ observed if strata == "studypop"
gen observedpercent = (observed /`r(max)')*100
gen observedresult = string(observed, "%9.1fc") + " (" + string(observedpercent, "%9.1fc") + ")"
label var observedresult "Observed deaths (% deaths in study population)"

*Observed per million
gen observedpermill = observed/denommillions /*should be same as number from weekly counts*/
label var observedpermill "Observed deaths per million"

*Expected per millions
/* below doesn't work - as suspected can't sum log values and then exponentiate
gen expxb = exp(xb)
assert expxb == n_usingPREdata
replace pooledse_xb = exp(pooledse_xb)
*/
gen expectedpermill_point = n_usingPREdata/denommillions
gen pooledsemill = pooledse_n/denommillions

gen expectedpermill_lci = (expectedpermill_point-1.96*pooledsemill)
gen expectedpermill_uci = (expectedpermill_point+1.96*pooledsemill)
	
gen expectedpermill_str  = string(expectedpermill_point, "%9.0fc") + " (" + string(expectedpermill_lci, "%9.0fc") + "-" + string(expectedpermill_uci, "%9.0fc") + ")"
label var expectedpermill_str "Expected deaths per million weeks (95% CI)"

*Excess per million
gen excesspermill_point = observedpermill - expectedpermill_point if expectedpermill_point !=0
gen excesspermill_lci = (excesspermill_point-1.96*pooledsemill)
gen excesspermill_uci = (excesspermill_point+1.96*pooledsemill)

/*
gen excesspermill_lci = observedpermill - expectedpermill_uci if expectedpermill_point !=0
gen excesspermill_uci = observedpermill - expectedpermill_lci if expectedpermill_point !=0
*/

gen percentexcess_point = (excesspermill_point/expectedpermill_point)*100 if excesspermill_point !=.
gen percentexcess_lci = (excesspermill_lci/expectedpermill_point)*100 if excesspermill_point !=.
gen percentexcess_uci = (excesspermill_uci/expectedpermill_point)*100 if excesspermill_point !=.

	
gen excesspermill_str = string(excesspermill_point, "%9.0fc") + " (" + string(excesspermill_lci, "%9.0fc") + "-" + string(excesspermill_uci, "%9.0fc") + ")"
gen percentexcess_str = string(percentexcess_point, "%9.1fc") + " (" + string(percentexcess_lci, "%9.1fc") + "-" + string(percentexcess_uci, "%9.1fc") + ")"

label var excesspermill_str "Excess deaths per million weeks (95% CI)"
label var percentexcess_str "Percentage increase versus expected (95% CI)"

***single column for names
gen _ind = 1 if substrataname == ""
replace substrataname = strataname if _ind == 1
replace strataname = groupname if _ind == 1
replace groupname = "Comorbidity" if _ind == 1 & strata != "studypop"
drop _ind
drop group strata
replace substrata = 2 if substrataname == "Other"
sort groupname strataname substrata	

*add extra rows with strata name
by groupname strataname: ingap, gapindicator(gap)
replace substrata = -1 if gap == 1
replace substrataname = strataname if gap == 1
drop strataname gap

*add extra rows with group name
by groupname: ingap, gapindicator(gap)
replace substrata = -2 if gap == 1
replace substrataname = groupname if gap == 1
drop groupname gap

gen _order = _n
drop if substrataname == "" & substrata < 0
replace _order = 0 if substrataname == "Study population"
sort _order

rename substrataname riskfactor
label variable riskfactor "Stratifying factor"
drop _order substrata
keep riskfactor denomresult observedresult observedpermill expectedpermill_str excesspermill_str percentexcess_str
order riskfactor denomresult observedresult observedpermill expectedpermill_str excesspermill_str percentexcess_str

export excel using "$resultdir\an_table1_`database'_`linkage'.xlsx", firstrow(varlabels) replace



