capture log close

log using "$logdir\an_adhoc_analyses.do", replace


/*******************************************************************************
AD HOC ANALYSES TO SUPPORT DECISION MAKING
*******************************************************************************/

/*number of weeks and deaths in full time period*/
use "$estimatedir/an_timeseries_estimates_demographics_studypop_combined_primary.dta", clear /*week level dataset*/
collapse (sum) deaths denominator (count) week
gen denommills = denom/1000000
list

/*which region to use as base category*/
use "$datadir/cr_weeklycounts_region_aurum_primary.dta", replace
gen database = "aurum"
append using "$datadir/cr_weeklycounts_region_gold_primary.dta"
replace database = "gold" if database == ""
collapse (sum) denominator, by(region year database)
reshape wide denominator, i(year region) j(database) string

sort year denominatorgold
by year: egen rankaurum = rank(denominatoraurum)
by year: egen rankgold = rank(denominatorgold)

gen denomtotal = denominatoraurum + denominatorgold
by year: egen ranktotal = rank(denomtotal)
sort year ranktotal

list if year == 2020
table ranktotal year, content(median region)
table rankaurum year, content(median region)
table rankgold year, content(median region)
* London (9) = most common region overall and in aurum, and highly ranked in gold
* has to be in England for linked analysis and because aurum doesn't include Scotland and Wales

log close

/*how common is mixed_urban rural? - only used in NI*/
tab urban_rural country
tab urban_rural country if database == "aurum"
tab urban_rural country if database == "gold"
* 2 NI practices - most are 1 = urban

/*how does practice size and age structure change over time?*/

cap postutil clear
tempfile results  
postfile results str12 database int year agecat /*sex*/ min p5 p25 p50 p75 p95 max mean using `results'

foreach database in aurum gold {
	use "$datadir\cr_studypopulation_foraggregate_`database'_primary.dta", clear

	if "`database'" == "gold" gen pracid = mod(patid, 1000)
	if "`database'" == "aurum" gen pracid = substr(patid, -5, .) 

	forvalues x = 2015/2020 {
		di "`x'"
		gen denom = 0
		replace denom = 1 if startfup < d(01/01/`x') & endfup > d(01/01/`x')
		gen age = `x' - yob if denom == 1
		assert age >=40
		gen agecat = age
		recode agecat 40/49 = 1 50/59 = 2 60/69 = 3 70/79 = 4 80/89 = 5 90/115 = 6
		bysort pracid /*gender*/ agecat: egen count = total(denom)
		by pracid /*gender*/ agecat: replace count = . if _n>1
		*forvalues sex = 1/2 {
			forvalues age = 1/6 {
			summ count if /*gender == `sex' &*/ agecat == `age', d
			post results ("`database'") (`x') (`age') (`r(min)') (`r(p5)') (`r(p25)') (`r(p50)') (`r(p75)') (`r(p95)') (`r(max)') (`r(mean)') /*(`sex')*/
			}
		drop count
		bysort pracid: egen count = total(denom)
		by pracid: replace count = . if _n>1
		summ count, d
		post results ("`database'") (`x') (99) (`r(min)') (`r(p5)') (`r(p25)') (`r(p50)') (`r(p75)') (`r(p95)') (`r(max)') (`r(mean)') /*(`sex')*/
		drop denom count age agecat
		}
	
}

		
	postclose results

	use `results', clear
	label define agecatlab 1 "40 to 49" 2 "50 to 59" 3 "60 to 69" 4 "70 to 79" 5 "80 to 89" 6 "90 plus", replace
	label values agecat agecatlab
	save "$estimatedir\an_adhoc_analyses_practicesize.dta", replace

	use "$estimatedir\an_adhoc_analyses_practicesize.dta", clear

	list if agecat == 99
	drop if agecat == 99
	
	rename p50 median
	
	foreach database in aurum gold {

		twoway (line median agecat if year ==2015 & database == "`database'") ///
		(line median agecat if year ==2016 & database == "`database'") ///
		(line median agecat if year ==2017 & database == "`database'") ///
		(line median agecat if year ==2018 & database == "`database'") /// 
		(line median agecat if year ==2019 & database == "`database'") ///
		, legend(label(1 "2015") label(2 "2016") label(3 "2017") label(4 "2018") label(5 "2019")) ///
		title("`database'") ///
		ytitle("practice size") ///
		xlabel(1(1)6, valuelabel) ///
		name("`database'", replace)
		
		}
	
	graph combine gold aurum
	graph export "$resultdir\an_adhoc_analyses_practicesize.emf", replace


/*population of GOLD and Aurum practices by ethnicity and deprivation*/


tempfile temp
foreach database in gold aurum {
	
	use "$datadir\cr_studypopulation_foraggregate_`database'_primary.dta", clear

	if "`database'"=="aurum" {
		gen pracid_str=substr(patid,-5,5)
		destring pracid_str, gen(pracid)
		drop pracid_str
		}

	if "`database'"=="gold" {
		gen pracid = mod(patid,1000)
		destring patid, replace
		}
	
	merge 1:1 patid using "$datadir//cr_strata_denoms_ethnicity_`database'_primary.dta", keep(1 3) nogen noreport
	
	merge m:1 pracid using "$rawdatadir\\`database'_linked\\`database'_20_163R_Practice_RuralUrban_set$linkageset.dta", keep(1 3) nogen noreport
	
	merge m:1 pracid using "$rawdatadir\\`database'_linked\\`database'_20_163R_Practice_Carstairs_set$linkageset.dta", keep(1 3) nogen noreport
	
	keep patid region eth5 urban_rural carstairs2011_5
	if "`database'" == "gold" tostring patid, replace
	gen database = "`database'"
	
	if "`database'" == "aurum" append using `temp'
	save `temp', replace
	}
	
use `temp', clear
tab eth5 database, m col
tab eth5 database, col
forvalues region = 1/13 {
	di "`region'"
	tab eth5 database if region == `region', col
	}

tab carstairs2011_5 database, m col
tab carstairs2011_5 database, col
forvalues region = 1/13 {
	di "`region'"
	tab carstairs2011_5 database if region == `region', col
	}
	
tab urban_rural database, m col
tab urban_rural database, col
forvalues region = 1/13 {
	di "`region'"
	tab urban_rural database if region == `region', col
	}
	

/*
label define regionlb 	///
1   "North East" 	///
2   "North West" 	///
3	"Yorkshire And The Humber" ///
4	"East Midlands" ///
5	"West Midlands" ///
6	"East of England" ///
7	"South West" 	///
8	"South Central" ///
9	"London" 		///
10	"South East Coast" ///
11	"Northern Ireland" ///
12	"Scotland" 		///
13	"Wales"
	
*/

/*FINDINGS
ETHNICITY
more missing in gold than aurum (50% vs. 19.75%),
when recorded
- more white in gold than aurum (93% vs. 85%)
- London, higher proportion of white ethnicity in gold (77%) vs aurum (61%)
- inversed for 8 = South Central (gold 80%, aurum 90%)
- smaller differences in other regions

DEPRIVATION
Overall - more missing in gold than aurum (8% vs. 0.4%), otherwise fairly similar
Large variation within regions

URBAN RURAL
Overall - more rural gold practices (16.19% vs 13.38% for aurum)
Large variations within regions
*/
			
**reduction in cancer deaths over time
use "$datadir\cr_strata_denoms_cancer_gold_primary.dta", clear
gen database = "gold"
tostring patid, replace
append using "$datadir\cr_strata_denoms_cancer_aurum_primary.dta"
replace database = "aurum" if database == ""

summ evdate, d format
gen evyear = year(evdate)
tab type evyear if evyear >= 2014
*in study pop
tab type evyear if evyear >= 2014 & startfup <= evdate & endfup > evdate
format endfup %td

/*
format startfup endfup %td
replace endfup = min(endfup, (evdate + 365.25))
drop if endfup <= startfup
drop if evyear < 2014

forvalues year = 2014/2020 {
	gen timesincediag`year' 
*/
tabstat endfup if startfup <=evdate & evyear >=2014, by(evyear) stats(min max) format

use "$datadir/cr_weeklycounts_cancer_cancer1yr_aurum_primary", clear
drop if agestratum == 99
drop if agestratum <5

/*from cr_studypopulation_foraggregate.do"
gen endfup = min(tod, lcd, deathdate)
gen died = 0
replace died = 1 if deathdate == endfup
*/


estimates use "$estimatedir\inc_an_timeseries_estimates_cancer1yr"
estimates replay, eform

estimates use "$estimatedir\an_glm_estimates_cancer_cancer1yr_all_combined_primary"
estimates replay, eform

*run an_glm_estimates_binary for cancer only - how can I add interactions between cos and sin vars and cancer1yr?
glm deaths year_c year_c2 ib10.agestr ib2.gender ib0.cancer1yr##i.pandemic, family(nb ml) link(log) exposure(denominator) eform base

glm deaths year_c year_c2 ib0.cancer1yr##c.sin_1 ib0.cancer1yr##c.sin_2 ib0.cancer1yr##c.sin_3 ib0.cancer1yr##c.cos_1 ib0.cancer1yr##c.cos_2 ib0.cancer1yr##c.cos_3 ib10.agestr ib2.gender ib0.cancer1yr##i.pandemic, family(nb ml) link(log) exposure(denominator) eform base
