capture log close
args database linkage 
log using "$logdir\cr_strata_denoms_cancer_`database'_`linkage'.txt", replace text


/*
args:
database = gold or aurum
linkage = primary or linked
*/

/*******************************************************************************
CREATE DENOMINATOR FILES FOR CANCER (EVER) FOR EACH DATABASE LINKED AND
UNLINKED
= PATIENT LEVEL DATABASE WITH THE FOLLOWING VARIABLES 
patid gender yob startfup endfup died for denominator where startfup and endfup
includes the diagnosis date
*******************************************************************************/
if "`database'"=="gold" {

	use "$datadir\cr_importmedicalcodes_`database'_medcodes.dta", clear
		keep if cancer == 1
	
	merge m:1 medcode using "J:\EHR-Working\Krishnan\allcancercodelist\codelist\cr_allcancercodelist_JUL2019.dta", keepusing(icdcode malignancy)
		keep if _merge == 3
		drop _merge
	
	
	if "`linkage'" == "linked" {
		assert malignancy != .
		append using "$rawdatadir\\`database'_linked\\hes_diagnosis_epi_20_163R"
		keep if cancer == 1	
		gen icd10=""
		replace icd10=substr(icd10_code,-4,1)
		drop if icd10=="D"
		drop icd10

		replace icdcode=substr(icd10_code,-4,3) if icdcode==""
		replace malignancy = 3 if malignancy == .
		}	
	
	keep patid medcode evdate cancer malignancy icdcode
	
	
	/*variable for 20 most common cancers*/
	gen type=""
	replace	type="ora"	if	icdcode=="C00"				
	replace	type="ora"	if	icdcode=="C01"				
	replace	type="ora"	if	icdcode=="C02"				
	replace	type="ora"	if	icdcode=="C03"				
	replace	type="ora"	if	icdcode=="C04"				
	replace	type="ora"	if	icdcode=="C05"				
	replace	type="ora"	if	icdcode=="C06"				
	replace	type="oes"	if	icdcode=="C15"				
	replace	type="gas"	if	icdcode=="C16"				
	replace	type="col"	if	icdcode=="C18"	|	icdcode=="C19"	|	icdcode=="C20"
	replace	type="liv"	if	icdcode=="C22"				
	replace	type="gal"	if	icdcode=="C23"				
	replace	type="pan"	if	icdcode=="C25"				
	replace	type="lun"	if	icdcode=="C34"				
	replace	type="mel"	if	icdcode=="C43"				
	replace	type="bre"	if	icdcode=="C50"				
	replace	type="cer"	if	icdcode=="C53"				
	replace	type="ute"	if	icdcode=="C54"				
	replace	type="ute"	if	icdcode=="C55"				
	replace	type="ova"	if	icdcode=="C56"				
	replace	type="pro"	if	icdcode=="C61"				
	replace	type="kid"	if	icdcode=="C64"				
	replace	type="bla"	if	icdcode=="C67"				
	replace	type="cns"	if	icdcode=="C71"				
	replace	type="cns"	if	icdcode=="C72"				
	replace	type="thy"	if	icdcode=="C73"				
	replace	type="nhl"	if	icdcode=="C82"				
	replace	type="nhl"	if	icdcode=="C83"				
	replace	type="nhl"	if	icdcode=="C84"				
	replace	type="nhl"	if	icdcode=="C85"				
	replace	type="mye"	if	icdcode=="C90"				
	replace	type="leu"	if	icdcode=="C91"				
	replace	type="leu"	if	icdcode=="C92"				
	replace	type="leu"	if	icdcode=="C93"				
	replace	type="leu"	if	icdcode=="C94"				
	replace	type="leu"	if	icdcode=="C95"

	tab type malignancy, m
	keep if malignancy == 3 | malignancy == 6 | malignancy == 90 /*malignancy, secondary or history*/
	drop if icdcode =="C44" /*Other and unspecified malignant neoplasm of skin*/

	gen haem = 0
	replace haem = 1 if type == "leu" | type == "mye" | type == "nhl"
	tab haem
	replace haem = 1 if icdcode == "C81" | icdcode == "C86" | icdcode == "C88" | icdcode == "C96"
	tab haem, m

	*drop if any records without event date
	bysort patid (evdate): egen maxevdate = max(evdate)
	format maxevdate %dD/N/CY
	drop if maxevdate == .
	drop maxevdate

	*keep first record of cancer if no prior history or secondaries
	bysort patid (evdate): keep if _n == 1
	keep if malignancy == 3 /*1st record of cancer == malignant*/
	replace type = "other" if type == ""

	merge m:1 patid using "$datadir\cr_studypopulation_foraggregate_`database'_`linkage'.dta"
	keep if _merge == 3

	replace startfup = max(evdate, startfup)
	drop if startfup >= endfup
	rename endfup endfupever /*vary endfup in cr_weeklycounts for cancer in last 1 and 5 years*/

	keep patid gender yob startfup endfup died cancer type haem evdate

	distinct patid

	save "$datadir\cr_strata_denoms_cancer_`database'_`linkage'.dta", replace
}




if "`database'"=="aurum" {
	*cap append using "$datadir\cr_strata_denoms_cancer_`database'_`linkage'.dta"
	*cap erase "$datadir\cr_strata_denoms_cancer_`database'_`linkage'.dta"
	
	*foreach n of numlist 1/10 {

		*use "$datadir\cr_importmedicalcodes_`database'_medcodes_`n'.dta", clear
		use "G:\cr_importmedicalcodes_aurum_medcodes.dta", clear
			keep if cancer == 1
			keep patid medcode evdate cancer
		
		/*count
		if r(N)==0 {
		continue
		}*/

		merge m:1 medcodeid using "$projectdir\codelists\othersource\allcancercodelist_AurumFINAL_202010_SHARE.dta", keepusing(icdcode malignancy)
			keep if _merge == 3
			drop _merge

		if "`linkage'" == "linked" {
			assert malignancy != .
			append using "$rawdatadir\\`database'_linked\\hes_diagnosis_epi_20_163R"
			keep if cancer == 1	
			gen icd10=""
			replace icd10=substr(icd10_code,-4,1)
			drop if icd10=="D"
			drop icd10

			replace icdcode=substr(icd10_code,-4,3) if icdcode==""
			replace malignancy = 3 if malignancy == .
			}	
			

		/*variable for 20 most common cancers*/
		gen type=""
		replace	type="ora"	if	icdcode=="C00"				
		replace	type="ora"	if	icdcode=="C01"				
		replace	type="ora"	if	icdcode=="C02"				
		replace	type="ora"	if	icdcode=="C03"				
		replace	type="ora"	if	icdcode=="C04"				
		replace	type="ora"	if	icdcode=="C05"				
		replace	type="ora"	if	icdcode=="C06"				
		replace	type="oes"	if	icdcode=="C15"				
		replace	type="gas"	if	icdcode=="C16"				
		replace	type="col"	if	icdcode=="C18"	|	icdcode=="C19"	|	icdcode=="C20"
		replace	type="liv"	if	icdcode=="C22"				
		replace	type="gal"	if	icdcode=="C23"				
		replace	type="pan"	if	icdcode=="C25"				
		replace	type="lun"	if	icdcode=="C34"				
		replace	type="mel"	if	icdcode=="C43"				
		replace	type="bre"	if	icdcode=="C50"				
		replace	type="cer"	if	icdcode=="C53"				
		replace	type="ute"	if	icdcode=="C54"				
		replace	type="ute"	if	icdcode=="C55"				
		replace	type="ova"	if	icdcode=="C56"				
		replace	type="pro"	if	icdcode=="C61"				
		replace	type="kid"	if	icdcode=="C64"				
		replace	type="bla"	if	icdcode=="C67"				
		replace	type="cns"	if	icdcode=="C71"				
		replace	type="cns"	if	icdcode=="C72"				
		replace	type="thy"	if	icdcode=="C73"				
		replace	type="nhl"	if	icdcode=="C82"				
		replace	type="nhl"	if	icdcode=="C83"				
		replace	type="nhl"	if	icdcode=="C84"				
		replace	type="nhl"	if	icdcode=="C85"				
		replace	type="mye"	if	icdcode=="C90"				
		replace	type="leu"	if	icdcode=="C91"				
		replace	type="leu"	if	icdcode=="C92"				
		replace	type="leu"	if	icdcode=="C93"				
		replace	type="leu"	if	icdcode=="C94"				
		replace	type="leu"	if	icdcode=="C95"

		tab type malignancy, m
		keep if malignancy == 3 | malignancy == 6 | malignancy == 90 /*malignancy, secondary or history*/
		drop if icdcode =="C44" /*Other and unspecified malignant neoplasm of skin*/

		gen haem = 0
		replace haem = 1 if type == "leu" | type == "mye" | type == "nhl"
		tab haem
		replace haem = 1 if icdcode == "C81" | icdcode == "C86" | icdcode == "C88" | icdcode == "C96"
		tab haem, m

		*drop if any records without event date
		bysort patid (evdate): egen maxevdate = max(evdate)
		format maxevdate %dD/N/CY
		drop if maxevdate == .
		drop maxevdate

		*keep first record of cancer if no prior history or secondaries
		bysort patid (evdate): keep if _n == 1
		keep if malignancy == 3 /*1st record of cancer == malignant*/
		replace type = "other" if type == ""

		merge m:1 patid using "$datadir\cr_studypopulation_foraggregate_`database'_`linkage'.dta"
		keep if _merge == 3

		replace startfup = max(evdate, startfup)
		drop if startfup >= endfup
		rename endfup endfupever /*vary endfup in cr_weeklycounts for cancer in last 1 and 5 years*/

		keep patid gender yob startfup endfup died cancer type haem evdate

		distinct patid
		
		cap append using "$datadir\cr_strata_denoms_cancer_`database'_`linkage'.dta"
		bysort patid (evdate): keep if _n == 1
		save "$datadir\cr_strata_denoms_cancer_`database'_`linkage'.dta", replace
}



capture log close