* Most analysis can be run on grouped binomial data;

proc import 
datafile="cr_chinatrial_miss10percent.csv"
out = work.china
dbms = csv
replace;
getnames = yes;
run;

proc sort data = work.china;
 BY arm countyid  ;
RUN;




************************************;
************************************;

* Cluster level analyses;

************************************;
************************************;



proc means data=work.china nway;
var patients totalmissed_cm10;
output out=work.tempmean(drop = _type_ _freq_) mean=/autoname;
run;

proc means data=work.china nway;
var patients totalmissed_cm10;
output out=work.tempsum(drop = _type_ _freq_) sum=/autoname;
run;

proc means data=work.china nway;
var logodds;
output out=work.temp2(drop = _type_ _freq_) var=/autoname;
run;

proc means data=work.temp2 nway;
var logodds_Var;
output out = work.temp3(drop = _type_ _freq_) mean= totalvar;
run;

DATA work.china;
set work.china;
	mergeby = 1;
run;
DATA work.tempmean;
set work.tempmean;
mergeby = 1;
run;
DATA work.tempsum;
set work.tempsum;
	mergeby = 1;
run;
DATA work.temp3;
set work.temp3;
	mergeby = 1;
run;

DATA work.china;
   MERGE  work.china work.tempmean ;
   BY mergeby;

   MERGE  work.china work.tempsum ;
	BY mergeby;
   MERGE  work.china work.temp3 ;
	BY mergeby;

	sigmab = totalvar - 1/(totalmissed_cm10_Sum/patients_Sum * (1 - totalmissed_cm10_Sum/patients_Sum) * patients_Mean); 

run;


DATA work.china;
   set work.china ;

   rho = sigmab /  (sigmab + 1/(totalmissed_cm10_Sum/patients_Sum * (1 - totalmissed_cm10_Sum/patients_Sum)));

   w = patients / (1 + (patients - 1) * rho) ;

RUN;



***********************************;
* Unweighted analysis;
proc reg data = work.china;

	model logodds = arm ;
run;

* Weighted analysis;
proc reg data = work.china;

	weight  w;
	model logodds = arm ;
run;

		


************************************;
************************************;

* Mixed effect model;

************************************;
************************************;



** PQL ;

proc glimmix data = work.china method = rspl;
	class countyid;
	model totalmissed_cm10/patients = arm / dist=binomial link=logit solution ddfm = bw;
	random int / subject = countyid;
run;

proc glimmix data = work.china method = rspl;
	class countyid;
	model totalmissed_cm10/patients = arm / dist=binomial link=logit solution ddfm = sat;
	random int / subject = countyid;
run;


proc glimmix data = work.china method = rspl;
	class countyid;
	model totalmissed_cm10/patients = arm / dist=binomial link=logit solution ddfm = kr2;
	random int / subject = countyid;
run;

** QUAD ;

proc glimmix data = work.china method = quad;
	class countyid;
	model totalmissed_cm10/patients = arm / dist=binomial link=logit solution ddfm = bw;
	random int / subject = countyid;
run;


************************************;
************************************;

* GEE Independence models;

************************************;
************************************;

* NOTE: specification in this way is using independence estimating equations;
* Since there is only one observation per cluster;


* fg;
proc glimmix data = work.china empirical = firoeeq;
	class countyid;
	model totalmissed_cm10/patients = arm / dist=binomial link=logit solution ddfm=bw;
	random _residual_ / subject = countyid type = vc;
run;

*kc;
proc glimmix data = work.china empirical = root;
	class countyid;
	model totalmissed_cm10/patients = arm / dist=binomial link=logit solution ddfm=bw;
	random _residual_ / subject = countyid type = vc;
run;




************************************;
************************************;

* Exchangeable GEE needs expanded data;

************************************;
************************************;

proc import 
datafile="Chinatrial.txt"
out = work.china
dbms = csv
replace;
getnames = yes;
run;

data work.china;
set work.china;
if arm = 1 | arm = 3;
if arm = 1 then arm = 0;
run; 

data work.china;
set work.china;
if arm = 3 then arm = 1;
run; 


* GEE Ex;

* fg;

proc glimmix data = work.china empirical = firoeeq;
	class countyid;
	model totalmissed_cm10 = arm / dist=binomial link=logit solution ddfm = bw;
	random _residual_ / subject = countyid type = cs;
run;

* kc;

proc glimmix data = work.china empirical = root;
	class countyid;
	model totalmissed_cm10 = arm / dist=binomial link=logit solution ddfm = bw;
	random _residual_ / subject = countyid type = cs;
run;