capture log close
args database linkage strata
*log using "$logdir\cr_strata_denoms_asthma_`database'_`linkage'.txt", replace text

/*
args:
database = gold or aurum
linkage = primary or linked
*/

/*******************************************************************************
CREATE DENOMINATOR FILES FOR ASHTMA COMORBIDITIES FOR EACH DATABASE LINKED AND
UNLINKED
= PATIENT LEVEL DATABASE WITH THE FOLLOWING VARIABLES 
patid gender yob startfup endfup died for denominator where startfup and endfup
includes the diagnosis date

NB Asthma defined as at least one record of asthma in the last 3 years
*******************************************************************************/
if "`database'"=="gold" {

	use "$datadir\cr_importmedicalcodes_`database'_medcodes.dta", clear
	count
	
	/* NOT USING LINKED DATA BECAUSE HS DIDN'T REQUEST THE FIELD THAT WOULD
	ALLOW US TO RESTRICT THIS TO PRIMARY DIAGNOSES 
	if "`linkage'" == "linked" {
	append using "$rawdatadir\\`database'_linked\\hes_diagnosis_epi_20_163R"
	}
	*/

	keep if asthma == 1

	drop medcode
	duplicates drop

	*keep if recorded three years prior to study start or later
	keep if evdate > ${startstart_`linkage'} - (365.25*3)

	*drop if any records without event date
	bysort patid (evdate): egen maxevdate = max(evdate)
	drop if maxevdate == .
	drop maxevdate

	/*
	/*add easy to read number to represent patid to help with trouble shooting*/
	bysort patid (evdate): gen _temp1 = 1 if _n == 1 /*first record for patient*/
	bysort _temp1: gen _temp2 = _n if _temp1 == 1 /*sequential numbers for first record of each patid*/
	bysort patid (evdate): egen _newid = min(_temp2) /*fill to subsequent records for each patid*/
	drop _temp*
	save "$datadir\_temp.dta", replace
	*/

	/*create separate records for asthma diagnoses adding a gap where there is more
	than 3 years between diagnostic codes*/

	gen _enddate = evdate + (365.25*3)
	format _enddate %dD/N/CY

	/*_start = start of an asthma "episode" i.e first ever code or new code following
	3 year gap*/
	bysort patid (evdate): gen _start = 1 if _n==1
	bysort patid (evdate): replace _start = 1 if evdate > _enddate[_n-1]
	/*_end = last code before a 3 year gap*/
	bysort patid (evdate): gen _end = 1 if _enddate < evdate[_n + 1]
	/*drop interim records*/
	keep if _start == 1 | _end == 1
	/*replace end date with the _endate of the last record in an "episode"*/
	bysort patid (evdate): replace _enddate = _enddate[_n+1] if _start == 1 & _end == . 
	keep if _start == 1
	drop _start _end

	merge m:1 patid using "$datadir\cr_studypopulation_foraggregate_`database'_`linkage'.dta"
	drop if _merge == 1 /*in study population denominator but not aggregate data denominator*/
	keep if _merge == 3

	replace startfup = max(evdate, startfup)
	replace died = 0 if _enddate < endfup /*HS added 20 Nov 2020*/
	replace endfup = min(endfup, _enddate)
	drop if startfup >= endfup 

	keep patid gender yob startfup endfup died

	distinct patid
	save "$datadir\cr_strata_denoms_asthma_`database'_`linkage'.dta", replace
}




if "`database'"=="aurum" {
		use "$datadir\cr_importasthmacodes_`database'_medcodes.dta", clear
		
		
		/* NOT USING LINKED DATA BECAUSE HS DIDN'T REQUEST THE FIELD THAT WOULD
		ALLOW US TO RESTRICT THIS TO PRIMARY DIAGNOSES 
		gen asthma=1
		
		if "`linkage'" == "linked" {
		append using "$rawdatadir\\`database'_linked\\hes_diagnosis_epi_20_163R"
		keep if asthma == 1
		}
		*/
		
		drop medcode
		duplicates drop

		*keep if recorded three years prior to study start or later
		keep if evdate > ${startstart_`linkage'} - (365.25*3)

		*drop if any records without event date
		bysort patid (evdate): egen maxevdate = max(evdate)
		drop if maxevdate == .
		drop maxevdate

		/*
		/*add easy to read number to represent patid to help with trouble shooting*/
		bysort patid (evdate): gen _temp1 = 1 if _n == 1 /*first record for patient*/
		bysort _temp1: gen _temp2 = _n if _temp1 == 1 /*sequential numbers for first record of each patid*/
		bysort patid (evdate): egen _newid = min(_temp2) /*fill to subsequent records for each patid*/
		drop _temp*
		save "$datadir\_temp.dta", replace
		*/

		/*create separate records for asthma diagnoses adding a gap where there is more
		than 3 years between diagnostic codes*/

		gen _enddate = evdate + (365.25*3)
		format _enddate %dD/N/CY

		/*_start = start of an asthma "episode" i.e first ever code or new code following
		3 year gap*/
		bysort patid (evdate): gen _start = 1 if _n==1
		bysort patid (evdate): replace _start = 1 if evdate > _enddate[_n-1]
		/*_end = last code before a 3 year gap*/
		bysort patid (evdate): gen _end = 1 if _enddate < evdate[_n + 1]
		/*drop interim records*/
		keep if _start == 1 | _end == 1
		/*replace end date with the _endate of the last record in an "episode"*/
		bysort patid (evdate): replace _enddate = _enddate[_n+1] if _start == 1 & _end == . 
		keep if _start == 1
		drop _start _end


		merge m:1 patid using "$datadir\cr_studypopulation_foraggregate_`database'_`linkage'.dta"
		drop if _merge == 1 /*in study population denominator but not aggregate data denominator*/
		keep if _merge == 3

		replace startfup = max(evdate, startfup)
		replace died = 0 if _enddate < endfup /*HS added 20 Nov 2020*/
		replace endfup = min(endfup, _enddate)
		drop if startfup >= endfup 

		keep patid gender yob startfup endfup died
		distinct patid
		
		save "$datadir\cr_strata_denoms_asthma_`database'_`linkage'.dta", replace
	}
		
capture log close