/*-----------------------------------------------------------------------------------------*/ /* */ /* TITLE: LCR */ /* ---------- */ /* A SAS Macro for Latent Class Regression using PROC IML. */ /* Requires SAS/IML */ /* */ /* DATE OF CODING */ /* -------------- */ /* 31 January 1997 */ /* Updated: January 3, 1999 */ /* */ /* SUPPORT */ /* -------- */ /* Diana L. Miglioretti */ /* Center for Health Studies, Group Health Cooperative */ /* 1730 Minor Ave., Suite 1600 */ /* Seattle, WA 98103 */ /* */ /* Please send any suggestions or corrections to miglioretti.d@ghc.org */ /* */ /* DESCRIPTION */ /* ----------- */ /* This program contains a macro for fitting LCA and LCR models and an example. */ /* To fit a standard latent class model, only include an intercept in the model. */ /* */ /* The macro uses the algorithm describe in */ /* Bandeen-Roche, K; Miglioretti, DL; Zeger, SL; Rathouz, P, */ /* "Latent variable regression for multiple discrete outcomes," */ /* JASA, In Press (1997) */ /* to fit latent class regression models. */ /* */ /* NO MISSING VALUES! This macro can NOT handle missing values. Therefore, it is necessary */ /* to delete any observations with missing values prior to running the macro. */ /* */ /* INITIAL ESTIMATES */ /* ----------------- */ /* The algorithm used requires initial parameter estimates for mean eta (mean class */ /* prevalences) and pi (response probabilities). If initial estimates are not entered as */ /* an option, they will be calculated. However, to improve performance for regression */ /* models, we recommend using the values estimated from the standard latent class model */ /* (without covariates). In addition, since it is not uncommon for the likelihood to have */ /* multiple local maxima, we recommended performing several parameter estimations using */ /* different initial values. */ /* */ /* Initial esimates for beta (regression parameters) may also be enterred as an option. */ /* For models with several covariates, it may be helpful to add the covariates one at */ /* a time, using the previous beta estimates as initial values for covariates in the */ /* previous model and using zero for the initial estimates for the new covariate. */ /* When using this method, covariate values should be centered. */ /* */ /* OUTPUT */ /* ------ */ /* In addition to printing results in the output window, this macro creates 4 data sets: */ /* eta=subject-specific prior class probabilities */ /* theta=subject-specific posterior class probabilities */ /* pi=response probabilities */ /* beta=regression paramater estimates */ /* To create permanent data sets, you may specify a permanent library as an option. */ /* */ /* SYNTAX (DEFAULT VALUES IN {}) */ /* ----------------------------- */ /* %macro lcr( */ /* dataset = lib.dataset containing response variables and covariates {work.dataset}, */ /* outlib = library in which output will be saved {work} , */ /* resp_var = response variables {y1 y2 y3 y4} , */ /* cov = covariates, must include an intercept for all subjects */ /* covariates may be categorical or continuous {int} , */ /* n_class = number of latent classes {2} , */ /* beta = initial beta (regression paramaters) estimates; 0=none {0} , */ /* eta = initial eta (class prevalence) estimates; 0=none {0} , */ /* pi = initial pi (response probability) estimates; 0=none {0} , */ /* crit = convergence criterion {.0000001} , */ /* maxiter = maximum number of iterations {2500} , */ /* ); */ /* */ /*-----------------------------------------------------------------------------------------*/ /*-----------------------------------------------------------------------------------------*/ /* MACRO */ /*-----------------------------------------------------------------------------------------*/ %macro lcr( dataset=work.dataset, outlib=work, resp_var=y1 y2 y3 y4, cov=int, n_class=2, beta=0, eta=0, pi=0, crit=.0000001, maxiter=2500); proc iml; use &dataset; read all var {&resp_var} into ldata [colname=varname]; read all var {&cov} into x [colname=covname]; close &dataset; if ((ldata=.)[+])>0 then print "ERROR: MISSING VALUES! DELETE MISSING VALUES FROM DATASET"; c=j(ncol(ldata),1); *** Observed likelihood Module ***; start obslik(x) global(scor,snum); score=x[,1]=1; do i=2 to ncol(x); score=score+x[,i]#(2##(i-1)); end; sscore=score; sscore[rank(score)]=score; snum=1; scor=sscore[1]; do i=1 to nrow(score)-1; if sscore[i]=sscore[i+1] then do; snum[nrow(snum)]=snum[nrow(snum)]+1; end; else do; scor=scor//sscore[i+1]; snum=snum//1; end; end; return(2#(snum#(log(snum)-log(nrow(x))))[+,]); finish obslik; *** Class prob|data (posterior probabilities) Module***; start robraise(ag,rs); agu=ag#rs + 1#(1-rs); return(agu##rs); finish robraise; start theta(y,pi) global(eta); n=nrow(y); bigj=ncol(eta); p1=repeat(pi[,1]`,n,1); *** bmat=((p1##y # (1-p1)##(1-y) ) [,#]) ***; bmat=((robraise(p1,y) # robraise(1-p1,1-y))[,#]); do i=2 to bigj; t_pi=repeat(pi[,i]`,n,1); *** bmat=bmat || ( (t_pi##y # (1-t_pi)##(1-y) )[,#])***; bmat=bmat || (( robraise(t_pi,y) # robraise(1-t_pi,1-y) )[,#]); end; return (bmat); finish theta; *** Calculate Initial Estimates ***; *** Note: It is better to get initial estimates from standard LCA model ***; *** Set cutpoints for initial esimates, stopping criterion, and max number of iterations ***; if ({&pi}[+])=0 | ({&eta}[+])=0 then do; q_cutpt=1/&n_class; cutpt=j(1,&n_class-1,.); do i=1 to (&n_class-1); cutpt[,i]=i*q_cutpt; end; t=ldata[,+] < (c[+]*cutpt)[1,1]; do i=2 to nrow(cutpt`); t=t||((ldata[,+] < (c[+]*cutpt)[1,i]))-((ldata[,+] < (c[+]*cutpt)[1,i-1])); end; t=t||(ldata[,+] >= (c[+]*cutpt)[1,nrow(cutpt`)]); st=t[+,]; eta=(st/nrow(ldata)); eta=repeat(eta,nrow(ldata),1); pi=j(ncol(ldata),ncol(eta),.); do i=1 to nrow(cutpt`)+1 ; pi[,i]=((t[,i]#ldata)[+,]/t[+,i])`; end ; if loc(pi=0) then pi[loc(pi=0)]=.01; if loc(pi=1) then pi[loc(pi=1)]=.99; end; else do; pi=shape({&pi},ncol(ldata),&n_class); eta=repeat({&eta},nrow(ldata),1); end; mattrib pi rowname=varname; *** Calculate initial posterior class prob|data ***; top=theta(ldata,pi); bottom=(eta#top)[,+]; h=eta#top#(bottom##-1); *** Calculate initial Betas ***; if ({&beta}[+])=0 then do; beta=log(eta[1,1:(ncol(eta)-1)]/eta[1,ncol(eta)]); do i=2 to ncol(x); beta=beta//j(1,ncol(eta)-1,0); end; beta=shape(beta`,0,1); end; else do; beta={&beta}`; end; print 'number of obs' (nrow(ldata)); print 'initial Estimates:'; print 'mean eta' (eta[:,]`); print pi; print beta; **** Iteration Loop ****; obslik=obslik(ldata); dfsat=2##nrow(pi)-1; diff=1; count=1; do count=1 to &maxiter until (count>3 & diff < &crit); piold=pi; etaold=eta; betaold=beta; beta2=shape(beta,ncol(eta)-1,ncol(x))`||j(ncol(x),1,0); eta=(exp(x*beta2))/repeat((exp(x*beta2)[,+]),1,ncol(eta)); pi=j(ncol(ldata),ncol(eta),.); do i=1 to ncol(eta); pi[,i]=(ldata#h[,i])[+,]`/(h[,i][+]); end; pi=fuzz(pi); diff=sqrt(ssq(((shape(beta,0,1)//shape(pi,0,1))-(shape(betaold,0,1)//shape(piold,0,1))))); *** Update posterior distribution ***; top=theta(ldata,pi); bottom=(eta#top)[,+]; h=eta#top#(bottom##-1); loglik=2#((log(bottom))[+,]); *** Update betas ***; sbeta=x`*(h-eta); sbeta=shape(sbeta[,1:ncol(eta)-1]`,0,1); dsdbeta=j(ncol(x)#(ncol(eta)-1),ncol(x)#(ncol(eta)-1),.); q=j(nrow(h),1,1); do i=1 to ncol(eta)-1; do j=1 to ncol(eta)-1; do k=1 to ncol(x); do l=1 to ncol(x); if i=j then dsdbeta[(i-1)#ncol(x)+k,(j-1)#ncol(x)+l]= (x[,k]#x[,l])`*(h[,i]-h[,i]#h[,i]-eta[,i]+eta[,i]#eta[,i]); if i^=j then dsdbeta[(i-1)#ncol(x)+k,(j-1)#ncol(x)+l]= (x[,k]#x[,l])`*(eta[ ,i]#eta[,j]-h[,i]#h[,j]); end; end; end; end; beta=betaold-inv(dsdbeta)*sbeta; if count=1 then iter=((count-1)||diff); else iter=iter//((count-1)||diff); end; if count > &maxiter then print 'WARNING: Did not converge within limit of iteration!'; print iter[ colname={'iteration' 'diff'}]; *** Variance Estimation ***; d2dpt=j(ncol(eta)#ncol(ldata),ncol(eta)#ncol(ldata),.); do i=1 to ncol(eta); do j=1 to ncol(eta); do k=1 to ncol(ldata); do l=1 to ncol(ldata); if i=j then dij=1; else dij=0; if k=l then dkl=1; else dkl=0; if pi[k,i]^=0 & pi[k,i]^=1 & pi[l,j]^=0 & pi[l,j]^=1 then do; d2dpt[(i-1)#ncol(ldata)+k,(j-1)#ncol(ldata)+l]= ((ldata[,k]-pi[k,i])#(ldata[,l]-pi[l,j])#h[,i]#(dij#(1-dkl)-h[,j]))[+,] /(pi[k,i]#(1-pi[k,i])#pi[l,j]#(1-pi[l,j])); end; * for if; end; * for l; end; * for k; end; * for j; end; * for i; d2dp=d2dpt[loc(vecdiag(d2dpt)^=.),loc(vecdiag(d2dpt)^=.)]; d2dpdbt=j(ncol(eta)#ncol(ldata),ncol(x)#(ncol(eta)-1),.); do i=1 to ncol(eta); do j=1 to ncol(eta)-1; do k=1 to ncol(ldata); do l=1 to ncol(x); if i=j then dij=1; else dij=0; if pi[k,i]^=0 & pi[k,i]^=1 then do; d2dpdbt[(i-1)#ncol(ldata)+k,(j-1)#ncol(x)+l]= ((x[,l]#(ldata[,k]-pi[k,i])#h[,j]#(dij-h[,i]))[+,])/(pi[k,i]#(1-pi[k,i])); end; * for if; end; * for l; end; * for k; end; * for j; end; * for l; d2dpdb=d2dpdbt[loc(d2dpdbt[,1]^=.),]; d2dbdp=d2dpdb`; inform=(dsdbeta||d2dbdp)//(d2dpdb||d2dp); var=inv(-inform); sebeta=sqrt(vecdiag(var)[1:nrow(beta)]); varbeta=vecdiag(var)[1:nrow(beta)]; piname='junk'; do i=1 to ncol(eta); do k=1 to ncol(ldata); if pi[k,i]^=0 then do; piname=piname||rowcatc(trim(left('p'))||trim(left(char(i)))||trim(left(char(k)))); end; end; end; piname=t(piname[2:ncol(piname)]); betaname='junk'; do j=1 to ncol(eta)-1; do l=1 to ncol(x); betaname=betaname||rowcatc(trim(left('b'))||trim(left(char(j)))||trim(left(char(l)))); end; end; betaname=t(betaname[2:ncol(betaname)]); parm=betaname||piname; /* thetanam='theta1'; etaname='eta1'; do j=2 to ncol(eta); etaname=etaname||rowcatc(trim(left('eta'))||trim(left(char(j)))); thetanam=thetanam||rowcatc(trim(left('theta'))||trim(left(char(j)))); end; */ thetanam='theta1':rowcatc(trim(left('theta'))||trim(left(char(ncol(eta))))); etaname='eta1':rowcatc(trim(left('eta'))||trim(left(char(ncol(eta))))); /* pi2name='pi1'; do j=2 to ncol(pi); pi2name=pi2name||rowcatc(trim(left('pi'))||trim(left(char(j)))); end; */ pi2name='pi1':rowcatc(trim(left('pi'))||trim(left(char(ncol(pi))))); p_yname='p_y_1':rowcatc(trim(left('p_y_'))||trim(left(char(nrow(pi))))); beta2=shape(beta,ncol(eta)-1,ncol(x))`||j(ncol(x),1,0); eta=(exp(x*beta2))/repeat((exp(x*beta2)[,+]),1,ncol(eta)); print 'number of obs' (nrow(ldata)); print 'mean eta' (eta[:,]`); print pi; eigenval=eigval(inform); if any(eigenval=0) then do; print 'WARNING: INFORMATION MATRIX IS SINGULAR'; print 'Covariates' (covname`) beta; end; else do; print 'Covariates' (covname`) beta sebeta; end; if any(vecdiag(var)<0) then do; print 'ERROR: SOME VARIANCES ARE LESS THAN ZERO! MODEL MAY NOT HAVE CONVERGED.'; print 'Covariates' (covname`) beta; end; q=(fuzz(pi)=1)[+,+]+(fuzz(pi)=0)[+,+]; dfm=nrow(pi)#ncol(pi)+nrow(beta)-q; df=dfsat-dfm; g2=obslik-loglik; print obslik 'df' dfsat; print loglik 'df' dfm; if ncol(x)=1 then do; pval=1-probchi((obslik-loglik),(dfsat-dfm)); print 'likelihood ratio test (G-Squared):' (obslik-loglik) df pval; end; p_y=j(nrow(h),nrow(pi),.); do m=1 to nrow(pi); p_y[,m]=eta*(pi[m,])`; end; /* print 'eigenvalues' eigenval; */ create &outlib..beta from beta [colname='value' rowname=betaname]; append from beta [rowname=betaname]; close &outlib..beta; create &outlib..eta from eta [colname=etaname]; append from eta; close &outlib..eta; create &outlib..theta from h [colname=thetanam]; append from h; close &outlib..theta; create &outlib..pi from pi [colname=pi2name rowname=varname]; append from pi [rowname=varname]; close &outlib..pi; create &outlib..var from var [colname=parm rowname=parm]; append from var [rowname=parm]; close &outlib..var; create &outlib..eigenval from eigenval [colname='eigenval']; append from eigenval; close &outlib..eigenval; create &outlib..p_y from p_y [colname=p_yname]; append from p_y; close &outlib..p_y; /* title2 'Eigen values'; proc univariate data=&outlib..eigenval plot; var eigenval; quit; title2; */ title; %mend lcr; /*-----------------------------------------------------------------------------------------*/ /* */ /* EXAMPLE */ /* */ /*-----------------------------------------------------------------------------------------*/ /*-----------------------------------------------------------------------------------------*/ /* */ /* STANDARD LATENT CLASS ANALYSIS WITHOUT COVARIATES */ /* */ /*-----------------------------------------------------------------------------------------*/ data a; input Y1 Y2 Y3 Y4 Y5 COV1 COV2; cards; 0 1 1 1 1 0 -25 1 1 1 1 1 0 -24 1 1 1 1 1 0 -23 1 1 1 1 1 0 -22 0 1 1 1 1 0 -21 1 1 1 1 1 0 -20 1 1 1 1 1 0 -19 1 1 1 1 1 0 -18 1 1 1 1 1 0 -17 0 1 1 1 1 0 -16 0 1 1 1 1 0 -15 0 1 1 1 1 0 -14 1 1 1 1 1 0 -13 1 1 1 1 1 0 -12 1 1 1 1 1 0 -11 1 1 0 1 1 0 -10 1 1 1 1 1 0 -9 1 0 1 1 1 0 -8 1 0 1 0 1 0 -7 1 0 1 0 0 0 -6 0 1 1 1 1 0 -5 1 1 1 1 1 0 -4 1 1 1 1 1 0 -3 1 1 1 1 1 0 -2 0 1 0 0 1 0 -1 1 1 1 1 1 0 0 0 0 1 1 1 0 1 1 1 1 1 1 0 2 0 0 0 0 0 0 3 1 1 1 1 0 0 4 0 1 0 0 0 0 5 0 1 0 0 0 0 6 0 0 0 0 0 0 7 1 0 0 1 1 0 8 0 1 0 0 0 0 9 0 0 0 0 0 0 10 0 1 1 0 1 0 11 0 1 0 1 0 0 12 0 0 0 0 0 0 13 0 0 0 0 0 0 14 0 0 1 0 0 0 15 0 0 0 0 0 0 16 0 0 1 0 0 0 17 0 0 0 0 0 0 18 0 0 1 1 0 0 19 0 0 0 0 1 0 20 0 1 0 1 0 0 21 0 0 0 0 1 0 22 0 0 0 0 0 0 23 0 0 0 0 0 0 24 1 1 1 1 1 1 -25 0 1 1 1 0 1 -24 1 1 0 1 1 1 -23 0 0 1 1 0 1 -22 1 1 0 1 1 1 -21 0 0 1 1 1 1 -20 1 0 1 1 1 1 -19 0 0 0 0 0 1 -18 0 0 0 0 0 1 -17 0 0 1 1 1 1 -16 0 1 0 1 0 1 -15 1 1 1 1 1 1 -14 1 1 0 1 1 1 -13 0 0 0 1 0 1 -12 0 0 0 0 0 1 -11 0 0 0 0 0 1 -10 0 0 0 0 0 1 -9 0 0 0 1 0 1 -8 0 0 0 0 0 1 -7 0 0 1 1 0 1 -6 0 1 0 0 0 1 -5 0 1 0 1 1 1 -4 0 0 0 0 0 1 -3 0 0 0 0 0 1 -2 0 0 0 0 1 1 -1 0 0 0 0 1 1 0 0 0 1 0 0 1 1 0 0 0 0 0 1 2 1 1 0 0 1 1 3 0 0 0 0 0 1 4 0 1 0 0 0 1 5 0 0 0 0 1 1 6 0 1 0 1 1 1 7 0 0 1 0 0 1 8 1 0 1 0 0 1 9 0 0 0 1 0 1 10 0 0 1 0 1 1 11 0 0 0 1 0 1 12 0 0 1 0 0 1 13 0 0 0 0 1 1 14 0 0 0 1 0 1 15 0 1 0 0 1 1 16 0 1 0 0 1 1 17 1 1 0 1 0 1 18 0 0 0 0 0 1 19 0 0 0 0 0 1 20 0 1 0 0 1 1 21 0 0 0 0 0 1 22 0 0 0 0 0 1 23 0 1 0 0 0 1 24 ; run; data dataset; set a; if y1=. | y2=. | y3=. | y4=. | y5=. then delete; int=1; run; /*-----------------------------------------------------------------------------------------*/ /* */ /* STANDARD LATENT CLASS ANALYSIS WITHOUT COVARIATES */ /* */ /*-----------------------------------------------------------------------------------------*/ title 'STANDARD LATENT CLASS ANALYSIS WITHOUT COVARIATES'; %lcr ( dataset=work.dataset, outlib=work, resp_var=y1 y2 y3 y4 y5, cov=int, n_class=2, beta=0, eta=0, pi=0, crit=.000001, maxiter=500 ); /************************ RESULTS ********************** STANDARD LATENT CLASS ANALYSIS WITHOUT COVARIATES number of obs 100 initial Estimates: #TEM1002 mean eta 0.58 0.42 PI Y1 0.0344828 0.6904762 Y2 0.2241379 0.8333333 Y3 0.1896552 0.7857143 Y4 0.1896552 0.9285714 Y5 0.1896552 0.9285714 BETA 0.3227734 ITER iteration diff 0 0.0305552 . . . 31 8.9057E-7 #TEM1001 number of obs 100 #TEM1002 mean eta 0.6059265 0.3940735 PI Y1 0.046648 0.7149294 Y2 0.2432197 0.8440728 Y3 0.215196 0.7856581 Y4 0.2119842 0.9428523 Y5 0.2205498 0.929682 #TEM1001 BETA SEBETA Covariates INT 0.4302225 0.2455965 OBSLIK DFSAT -525.7823 df 31 LOGLIK DFM -552.6407 df 11 #TEM1003 DF PVAL likelihood ratio test (G-Squared): 26.858421 20 0.1392944 ********************************************************/; /*-----------------------------------------------------------------------------------------*/ /* */ /* Add 1st covariate using LCR eta and pi values as initials */ /* */ /*-----------------------------------------------------------------------------------------*/ title 'LATENT CLASS REGRESSION WITH 1 COVARIATE'; %lcr ( dataset=work.dataset, outlib=work, resp_var=y1 y2 y3 y4 y5, cov=int cov1, n_class=2, eta=0.6059265 0.3940735, pi= 0.046648 0.7149294 0.2432197 0.8440728 0.215196 0.7856581 0.2119842 0.9428523 0.2205498 0.929682, crit=.0000001, maxiter=500 ); /************************ RESULTS ********************** LATENT CLASS REGRESSION WITH 1 COVARIATE number of obs 100 initial Estimates: #TEM1002 mean eta 0.6059265 0.3940735 PI Y1 0.046648 0.7149294 Y2 0.2432197 0.8440728 Y3 0.215196 0.7856581 Y4 0.2119842 0.9428523 Y5 0.2205498 0.929682 BETA 0.4302213 0 ITER iteration diff 0 6.4783E-7 . . . 44 9.2173E-8 #TEM1001 number of obs 100 #TEM1002 mean eta 0.6381066 0.3618934 PI Y1 0.0614156 0.7483151 Y2 0.2683055 0.8532691 Y3 0.2213267 0.8255744 Y4 0.2427196 0.9536482 Y5 0.2420123 0.9548953 #TEM1001 BETA SEBETA Covariates INT -0.248578 0.3123104 COV1 1.8922928 0.5594732 OBSLIK DFSAT -525.7823 df 31 LOGLIK DFM -538.3358 df 12 ********************************************************/; /*-----------------------------------------------------------------------------------------*/ /* */ /* Add 2nd covariate using LCR eta, pi, and beta values as initials */ /* */ /*-----------------------------------------------------------------------------------------*/ title 'LATENT CLASS REGRESSION WITH 2 COVARIATES'; %lcr ( dataset=work.dataset, outlib=work, resp_var=y1 y2 y3 y4, cov=int cov1 cov2, n_class=2, beta=-0.248578 1.8922928 0, eta=0.6381066 0.3618934, pi= 0.0614156 0.7483151 0.2683055 0.8532691 0.2213267 0.8255744 0.2427196 0.9536482 0.2420123 0.9548953, crit=.0000001, maxiter=500 ); /************************ RESULTS ********************** LATENT CLASS REGRESSION WITH 2 COVARIATES number of obs 100 initial Estimates: mean eta 0.6381066 0.3618934 PI Y1 0.0614156 0.7483151 Y2 0.2683055 0.8532691 Y3 0.2213267 0.8255744 Y4 0.2427196 0.9536482 BETA -0.248578 1.8922928 0 ITER iteration diff 0 0.0365817 . . . 17 6.2925E-8 number of obs 100 #TEM1002 mean eta 0.6250863 0.3749137 PI Y1 0.0661482 0.7165694 Y2 0.2867693 0.8021697 Y3 0.1713254 0.887956 Y4 0.2324933 0.9460088 #TEM1001 BETA SEBETA Covariates INT -1.126534 0.8349867 COV1 8.6064888 2.9452147 COV2 0.4695086 0.1475921 OBSLIK DFSAT -439.8981 df 15 LOGLIK DFM -374.7258 df 11 *********************************************************/; title;