--- imach/src/imach.c 2002/02/20 17:12:32 1.16 +++ imach/src/imach.c 2002/03/11 14:17:15 1.32 @@ -1,34 +1,42 @@ - -/*********************** Imach ************************************** - This program computes Healthy Life Expectancies from cross-longitudinal - data. Cross-longitudinal consist in a first survey ("cross") where - individuals from different ages are interviewed on their health status - or degree of disability. At least a second wave of interviews - ("longitudinal") should measure each new individual health status. - Health expectancies are computed from the transistions observed between - waves and are computed for each degree of severity of disability (number - of life states). More degrees you consider, more time is necessary to - reach the Maximum Likelihood of the parameters involved in the model. - The simplest model is the multinomial logistic model where pij is - the probabibility to be observed in state j at the second wave conditional - to be observed in state i at the first wave. Therefore the model is: - log(pij/pii)= aij + bij*age+ cij*sex + etc , where 'age' is age and 'sex' - is a covariate. If you want to have a more complex model than "constant and - age", you should modify the program where the markup - *Covariates have to be included here again* invites you to do it. - More covariates you add, less is the speed of the convergence. - - The advantage that this computer programme claims, comes from that if the - delay between waves is not identical for each individual, or if some - individual missed an interview, the information is not rounded or lost, but - taken into account using an interpolation or extrapolation. - hPijx is the probability to be - observed in state i at age x+h conditional to the observed state i at age - x. The delay 'h' can be split into an exact number (nh*stepm) of - unobserved intermediate states. This elementary transition (by month or - quarter trimester, semester or year) is model as a multinomial logistic. - The hPx matrix is simply the matrix product of nh*stepm elementary matrices - and the contribution of each individual to the likelihood is simply hPijx. +/* $Id: imach.c,v 1.32 2002/03/11 14:17:15 brouard Exp $ + Interpolated Markov Chain + + Short summary of the programme: + + This program computes Healthy Life Expectancies from + cross-longitudinal data. Cross-longitudinal data consist in: -1- a + first survey ("cross") where individuals from different ages are + interviewed on their health status or degree of disability (in the + case of a health survey which is our main interest) -2- at least a + second wave of interviews ("longitudinal") which measure each change + (if any) in individual health status. Health expectancies are + computed from the time spent in each health state according to a + model. More health states you consider, more time is necessary to reach the + Maximum Likelihood of the parameters involved in the model. The + simplest model is the multinomial logistic model where pij is the + probabibility to be observed in state j at the second wave + conditional to be observed in state i at the first wave. Therefore + the model is: log(pij/pii)= aij + bij*age+ cij*sex + etc , where + 'age' is age and 'sex' is a covariate. If you want to have a more + complex model than "constant and age", you should modify the program + where the markup *Covariates have to be included here again* invites + you to do it. More covariates you add, slower the + convergence. + + The advantage of this computer programme, compared to a simple + multinomial logistic model, is clear when the delay between waves is not + identical for each individual. Also, if a individual missed an + intermediate interview, the information is lost, but taken into + account using an interpolation or extrapolation. + + hPijx is the probability to be observed in state i at age x+h + conditional to the observed state i at age x. The delay 'h' can be + split into an exact number (nh*stepm) of unobserved intermediate + states. This elementary transition (by month or quarter trimester, + semester or year) is model as a multinomial logistic. The hPx + matrix is simply the matrix product of nh*stepm elementary matrices + and the contribution of each individual to the likelihood is simply + hPijx. Also this programme outputs the covariance matrix of the parameters but also of the life expectancies. It also computes the prevalence limits. @@ -48,6 +56,7 @@ #include #define MAXLINE 256 +#define GNUPLOTPROGRAM "..\\gp37mgw\\wgnuplot" #define FILENAMELENGTH 80 /*#define DEBUG*/ #define windows @@ -67,6 +76,7 @@ #define AGEBASE 40 +int erreur; /* Error number */ int nvar; int cptcovn, cptcovage=0, cptcoveff=0,cptcov; int npar=NPARMAX; @@ -84,8 +94,8 @@ int **dh; /* dh[mi][i] is number of step double jmean; /* Mean space between 2 waves */ double **oldm, **newm, **savm; /* Working pointers to matrices */ double **oldms, **newms, **savms; /* Fixed working pointers to matrices */ -FILE *fic,*ficpar, *ficparo,*ficres, *ficrespl, *ficrespij, *ficrest,*ficresf; -FILE *ficgp, *fichtm,*ficresprob,*ficpop; +FILE *fic,*ficpar, *ficparo,*ficres, *ficrespl, *ficrespij, *ficrest,*ficresf,*ficrespop; +FILE *ficgp,*ficresprob,*ficpop; FILE *ficreseij; char filerese[FILENAMELENGTH]; FILE *ficresvij; @@ -113,7 +123,7 @@ FILE *ficreseij; static double maxarg1,maxarg2; #define FMAX(a,b) (maxarg1=(a),maxarg2=(b),(maxarg1)>(maxarg2)? (maxarg1):(maxarg2)) #define FMIN(a,b) (maxarg1=(a),maxarg2=(b),(maxarg1)<(maxarg2)? (maxarg1):(maxarg2)) - + #define SIGN(a,b) ((b)>0.0 ? fabs(a) : -fabs(a)) #define rint(a) floor(a+0.5) @@ -129,6 +139,7 @@ int m,nb; int *num, firstpass=0, lastpass=4,*cod, *ncodemax, *Tage; double **agev,*moisnais, *annais, *moisdc, *andc,**mint, **anint; double **pmmij, ***probs, ***mobaverage; +double dateintmean=0; double *weight; int **s; /* Status */ @@ -139,14 +150,18 @@ double ftol=FTOL; /* Tolerance for compu double ftolhess; /* Tolerance for computing hessian */ /**************** split *************************/ -static int split( char *path, char *dirc, char *name ) +static int split( char *path, char *dirc, char *name, char *ext, char *finame ) { char *s; /* pointer */ int l1, l2; /* length counters */ l1 = strlen( path ); /* length of path */ if ( l1 == 0 ) return( GLOCK_ERROR_NOPATH ); +#ifdef windows s = strrchr( path, '\\' ); /* find last / */ +#else + s = strrchr( path, '/' ); /* find last / */ +#endif if ( s == NULL ) { /* no directory, so use current */ #if defined(__bsd__) /* get current working directory */ extern char *getwd( ); @@ -169,7 +184,18 @@ static int split( char *path, char *dirc dirc[l1-l2] = 0; /* add zero */ } l1 = strlen( dirc ); /* length of directory */ +#ifdef windows if ( dirc[l1-1] != '\\' ) { dirc[l1] = '\\'; dirc[l1+1] = 0; } +#else + if ( dirc[l1-1] != '/' ) { dirc[l1] = '/'; dirc[l1+1] = 0; } +#endif + s = strrchr( name, '.' ); /* find last / */ + s++; + strcpy(ext,s); /* save extension */ + l1= strlen( name); + l2= strlen( s)+1; + strncpy( finame, name, l1-l2); + finame[l1-l2]= 0; return( 0 ); /* we're done */ } @@ -717,7 +743,7 @@ double **pmij(double **ps, double *cov, s2 += x[(i-1)*nlstate*ncovmodel+(j-2)*ncovmodel+nc+(i-1)*(ndeath-1)*ncovmodel]*cov[nc]; /*printf("Int j>i s1=%.17e, s2=%.17e %lx %lx\n",s1,s2,s1,s2);*/ } - ps[i][j]=(s2); + ps[i][j]=s2; } } /*ps[3][2]=1;*/ @@ -900,7 +926,7 @@ void mlikeli(FILE *ficres,double p[], in powell(p,xi,npar,ftol,&iter,&fret,func); printf("\n#Number of iterations = %d, -2 Log likelihood = %.12f\n",iter,func(p)); - fprintf(ficres,"#Number of iterations = %d, -2 Log likelihood = %.12f ",iter,func(p)); + fprintf(ficres,"#Number of iterations = %d, -2 Log likelihood = %.12f \n",iter,func(p)); } @@ -1150,18 +1176,18 @@ void lubksb(double **a, int n, int *indx } /************ Frequencies ********************/ -void freqsummary(char fileres[], int agemin, int agemax, int **s, double **agev, int nlstate, int imx, int *Tvar, int **nbcode, int *ncodemax, int fprev1,int lprev1) +void freqsummary(char fileres[], int agemin, int agemax, int **s, double **agev, int nlstate, int imx, int *Tvar, int **nbcode, int *ncodemax,double **mint,double **anint, double dateprev1,double dateprev2,double jprev1, double mprev1,double anprev1,double jprev2, double mprev2,double anprev2) { /* Some frequencies */ - int i, m, jk, k1, i1, j1, bool, z1,z2,j; + int i, m, jk, k1,i1, j1, bool, z1,z2,j; double ***freq; /* Frequencies */ double *pp; - double pos; + double pos, k2, dateintsum=0,k2cpt=0; FILE *ficresp; char fileresp[FILENAMELENGTH]; - + pp=vector(1,nlstate); - probs= ma3x(1,130 ,1,8, 1,8); + probs= ma3x(1,AGESUP,1,NCOVMAX, 1,NCOVMAX); strcpy(fileresp,"p"); strcat(fileresp,fileres); if((ficresp=fopen(fileresp,"w"))==NULL) { @@ -1183,7 +1209,9 @@ void freqsummary(char fileres[], int ag for (jk=-1; jk<=nlstate+ndeath; jk++) for(m=agemin; m <= agemax+3; m++) freq[i][jk][m]=0; - + + dateintsum=0; + k2cpt=0; for (i=1; i<=imx; i++) { bool=1; if (cptcovn>0) { @@ -1191,15 +1219,26 @@ void freqsummary(char fileres[], int ag if (covar[Tvaraff[z1]][i]!= nbcode[Tvaraff[z1]][codtab[j1][z1]]) bool=0; } - if (bool==1) { - for(m=fprev1; m<=lprev1; m++){ - if(agev[m][i]==0) agev[m][i]=agemax+1; - if(agev[m][i]==1) agev[m][i]=agemax+2; - freq[s[m][i]][s[m+1][i]][(int)agev[m][i]] += weight[i]; - freq[s[m][i]][s[m+1][i]][(int) agemax+3] += weight[i]; + if (bool==1) { + for(m=firstpass; m<=lastpass; m++){ + k2=anint[m][i]+(mint[m][i]/12.); + if ((k2>=dateprev1) && (k2<=dateprev2)) { + if(agev[m][i]==0) agev[m][i]=agemax+1; + if(agev[m][i]==1) agev[m][i]=agemax+2; + freq[s[m][i]][s[m+1][i]][(int)agev[m][i]] += weight[i]; + freq[s[m][i]][s[m+1][i]][(int) agemax+3] += weight[i]; + if ((agev[m][i]>1) && (agev[m][i]< (agemax+3))) { + dateintsum=dateintsum+k2; + k2cpt++; + } + + } } } } + + fprintf(ficresp, "#Count between %.lf/%.lf/%.lf and %.lf/%.lf/%.lf\n",jprev1, mprev1,anprev1,jprev2, mprev2,anprev2); + if (cptcovn>0) { fprintf(ficresp, "\n#********** Variable "); for (z1=1; z1<=cptcoveff; z1++) fprintf(ficresp, "V%d=%d ",Tvaraff[z1],nbcode[Tvaraff[z1]][codtab[j1][z1]]); @@ -1258,24 +1297,26 @@ void freqsummary(char fileres[], int ag } } } + dateintmean=dateintsum/k2cpt; fclose(ficresp); free_ma3x(freq,-1,nlstate+ndeath,-1,nlstate+ndeath,(int) agemin,(int) agemax+3); free_vector(pp,1,nlstate); -} /* End of Freq */ + /* End of Freq */ +} /************ Prevalence ********************/ -void prevalence(int agemin, int agemax, int **s, double **agev, int nlstate, int imx, int *Tvar, int **nbcode, int *ncodemax, int fprev1,int lprev1) +void prevalence(int agemin, float agemax, int **s, double **agev, int nlstate, int imx, int *Tvar, int **nbcode, int *ncodemax,double **mint,double **anint, double dateprev1,double dateprev2, double calagedate) { /* Some frequencies */ int i, m, jk, k1, i1, j1, bool, z1,z2,j; double ***freq; /* Frequencies */ double *pp; - double pos; + double pos, k2; pp=vector(1,nlstate); - probs= ma3x(1,130 ,1,8, 1,8); + probs= ma3x(1,AGESUP,1,NCOVMAX, 1,NCOVMAX); freq=ma3x(-1,nlstate+ndeath,-1,nlstate+ndeath,agemin,agemax+3); j1=0; @@ -1290,31 +1331,34 @@ void prevalence(int agemin, int agemax, for (i=-1; i<=nlstate+ndeath; i++) for (jk=-1; jk<=nlstate+ndeath; jk++) for(m=agemin; m <= agemax+3; m++) - freq[i][jk][m]=0; - + freq[i][jk][m]=0; + for (i=1; i<=imx; i++) { bool=1; if (cptcovn>0) { for (z1=1; z1<=cptcoveff; z1++) if (covar[Tvaraff[z1]][i]!= nbcode[Tvaraff[z1]][codtab[j1][z1]]) bool=0; - } - if (bool==1) { - for(m=fprev1; m<=lprev1; m++){ - if(agev[m][i]==0) agev[m][i]=agemax+1; - if(agev[m][i]==1) agev[m][i]=agemax+2; - freq[s[m][i]][s[m+1][i]][(int)agev[m][i]] += weight[i]; - freq[s[m][i]][s[m+1][i]][(int) agemax+3] += weight[i]; + } + if (bool==1) { + for(m=firstpass; m<=lastpass; m++){ + k2=anint[m][i]+(mint[m][i]/12.); + if ((k2>=dateprev1) && (k2<=dateprev2)) { + if(agev[m][i]==0) agev[m][i]=agemax+1; + if(agev[m][i]==1) agev[m][i]=agemax+2; + freq[s[m][i]][s[m+1][i]][(int)(agev[m][i]+1-((int)calagedate %12)/12.)] += weight[i]; + /* freq[s[m][i]][s[m+1][i]][(int)(agemax+3+1)] += weight[i]; */ + } } } } - for(i=(int)agemin; i <= (int)agemax+3; i++){ - for(jk=1; jk <=nlstate ; jk++){ - for(m=-1, pp[jk]=0; m <=nlstate+ndeath ; m++) - pp[jk] += freq[jk][m][i]; - } - for(jk=1; jk <=nlstate ; jk++){ - for(m=-1, pos=0; m <=0 ; m++) + for(i=(int)agemin; i <= (int)agemax+3; i++){ + for(jk=1; jk <=nlstate ; jk++){ + for(m=-1, pp[jk]=0; m <=nlstate+ndeath ; m++) + pp[jk] += freq[jk][m][i]; + } + for(jk=1; jk <=nlstate ; jk++){ + for(m=-1, pos=0; m <=0 ; m++) pos += freq[jk][m][i]; } @@ -1333,7 +1377,7 @@ void prevalence(int agemin, int agemax, } } - } + } } } @@ -1342,6 +1386,7 @@ void prevalence(int agemin, int agemax, free_vector(pp,1,nlstate); } /* End of Freq */ + /************* Waves Concatenation ***************/ void concatwav(int wav[], int **dh, int **mw, int **s, double *agedc, double **agev, int firstpass, int lastpass, int imx, int nlstate, int stepm) @@ -1398,7 +1443,7 @@ void concatwav(int wav[], int **dh, int if (j >= jmax) jmax=j; if (j <= jmin) jmin=j; sum=sum+j; - /* if (j<10) printf("j=%d num=%d ",j,i); */ + /*if (j<0) printf("j=%d num=%d \n",j,i); */ } } else{ @@ -1406,7 +1451,7 @@ void concatwav(int wav[], int **dh, int k=k+1; if (j >= jmax) jmax=j; else if (j <= jmin)jmin=j; - /* if (j<10) printf("j=%d jmin=%d num=%d ",j,jmin,i); */ + /* if (j<10) printf("j=%d jmin=%d num=%d ",j,jmin,i); */ sum=sum+j; } jk= j/stepm; @@ -1482,7 +1527,7 @@ void tricode(int *Tvar, int **nbcode, in void evsij(char fileres[], double ***eij, double x[], int nlstate, int stepm, int bage, int fage, double **oldm, double **savm, int ij) { /* Health expectancies */ - int i, j, nhstepm, hstepm, h; + int i, j, nhstepm, hstepm, h, nstepm, k; double age, agelim,hf; double ***p3mat; @@ -1493,34 +1538,44 @@ void evsij(char fileres[], double ***eij fprintf(ficreseij," %1d-%1d",i,j); fprintf(ficreseij,"\n"); - hstepm=1*YEARM; /* Every j years of age (in month) */ - hstepm=hstepm/stepm; /* Typically in stepm units, if j= 2 years, = 2/6 months = 4 */ + k=1; /* For example stepm=6 months */ + hstepm=k*YEARM; /* (a) Every k years of age (in months), for example every k=2 years 24 m */ + hstepm=stepm; /* or (b) We decided to compute the life expectancy with the smallest unit */ + /* hstepm beeing the number of stepms, if hstepm=1 the length of hstepm is stepm. + nhstepm is the number of hstepm from age to agelim + nstepm is the number of stepm from age to agelin. + Look at hpijx to understand the reason of that which relies in memory size + and note for a fixed period like k years */ + /* We decided (b) to get a life expectancy respecting the most precise curvature of the + survival function given by stepm (the optimization length). Unfortunately it + means that if the survival funtion is printed only each two years of age and if + you sum them up and add 1 year (area under the trapezoids) you won't get the same + results. So we changed our mind and took the option of the best precision. + */ + hstepm=hstepm/stepm; /* Typically in stepm units, if k= 2 years, = 2/6 months = 4 */ agelim=AGESUP; for (age=bage; age<=fage; age ++){ /* If stepm=6 months */ /* nhstepm age range expressed in number of stepm */ - nhstepm=(int) rint((agelim-age)*YEARM/stepm); - /* Typically if 20 years = 20*12/6=40 stepm */ + nstepm=(int) rint((agelim-age)*YEARM/stepm); + /* Typically if 20 years nstepm = 20*12/6=40 stepm */ if (stepm >= YEARM) hstepm=1; - nhstepm = nhstepm/hstepm;/* Expressed in hstepm, typically 40/4=10 */ + nhstepm = nstepm/hstepm;/* Expressed in hstepm, typically nhstepm=40/4=10 */ p3mat=ma3x(1,nlstate+ndeath,1, nlstate+ndeath, 0,nhstepm); /* Computed by stepm unit matrices, product of hstepm matrices, stored in an array of nhstepm length: nhstepm=10, hstepm=4, stepm=6 months */ hpxij(p3mat,nhstepm,age,hstepm,x,nlstate,stepm,oldm, savm, ij); - - + hf=hstepm*stepm/YEARM; /* Duration of hstepm expressed in year unit. */ for(i=1; i<=nlstate;i++) for(j=1; j<=nlstate;j++) - for (h=0, eij[i][j][(int)age]=0; h<=nhstepm; h++){ - eij[i][j][(int)age] +=p3mat[i][j][h]; + for (h=0, eij[i][j][(int)age]=0; h<=nhstepm-1; h++){ + eij[i][j][(int)age] += (p3mat[i][j][h]+p3mat[i][j][h+1])/2.0*hf; + /* if((int)age==70)printf("i=%2d,j=%2d,h=%2d,age=%3d,%9.4f,%9.4f,%9.4f\n",i,j,h,(int)age,p3mat[i][j][h],hf,eij[i][j][(int)age]);*/ } - - hf=1; - if (stepm >= YEARM) hf=stepm/YEARM; - fprintf(ficreseij,"%.0f",age ); + fprintf(ficreseij,"%3.0f",age ); for(i=1; i<=nlstate;i++) for(j=1; j<=nlstate;j++){ - fprintf(ficreseij," %.4f", hf*eij[i][j][(int)age]); + fprintf(ficreseij," %9.4f", eij[i][j][(int)age]); } fprintf(ficreseij,"\n"); free_ma3x(p3mat,1,nlstate+ndeath,1, nlstate+ndeath, 0,nhstepm); @@ -1577,7 +1632,7 @@ void varevsij(char fileres[], double *** for(i=1; i<=nlstate;i++) prlim[i][i]=probs[(int)age][i][ij]; } - + for(j=1; j<= nlstate; j++){ for(h=0; h<=nhstepm; h++){ for(i=1, gp[h][j]=0.;i<=nlstate;i++) @@ -1589,7 +1644,7 @@ void varevsij(char fileres[], double *** xp[i] = x[i] - (i==theta ?delti[theta]:0); hpxij(p3mat,nhstepm,age,hstepm,xp,nlstate,stepm,oldm,savm, ij); prevalim(prlim,nlstate,xp,age,oldm,savm,ftolpl,ij); - + if (popbased==1) { for(i=1; i<=nlstate;i++) prlim[i][i]=probs[(int)age][i][ij]; @@ -1641,7 +1696,7 @@ void varevsij(char fileres[], double *** free_ma3x(trgradg,0,nhstepm,1,nlstate,1,npar); free_ma3x(p3mat,1,nlstate+ndeath,1, nlstate+ndeath, 0,nhstepm); } /* End age */ - + free_vector(xp,1,npar); free_matrix(doldm,1,nlstate,1,npar); free_matrix(dnewm,1,nlstate,1,nlstate); @@ -1833,145 +1888,648 @@ if (i== 4) fprintf(ficresprob,"%.3e %.3e } free_vector(xp,1,npar); fclose(ficresprob); - exit(0); -} - -/***********************************************/ -/**************** Main Program *****************/ -/***********************************************/ -/*int main(int argc, char *argv[])*/ -int main() -{ +} - int i,j, k, n=MAXN,iter,m,size,cptcode, cptcod; - double agedeb, agefin,hf; - double agemin=1.e20, agemax=-1.e20; +/******************* Printing html file ***********/ +void printinghtml(char fileres[], char title[], char datafile[], int firstpass, int lastpass, int stepm, int weightopt, char model[],int imx,int jmin, int jmax, double jmeanint,char optionfile[],char optionfilehtm[],char rfileres[] ){ + int jj1, k1, i1, cpt; + FILE *fichtm; + /*char optionfilehtm[FILENAMELENGTH];*/ - double fret; - double **xi,tmp,delta; + strcpy(optionfilehtm,optionfile); + strcat(optionfilehtm,".htm"); + if((fichtm=fopen(optionfilehtm,"w"))==NULL) { + printf("Problem with %s \n",optionfilehtm), exit(0); + } - double dum; /* Dummy variable */ - double ***p3mat; - int *indx; - char line[MAXLINE], linepar[MAXLINE]; - char title[MAXLINE]; - char optionfile[FILENAMELENGTH], datafile[FILENAMELENGTH], filerespl[FILENAMELENGTH], optionfilehtm[FILENAMELENGTH]; - char fileres[FILENAMELENGTH], filerespij[FILENAMELENGTH], filereso[FILENAMELENGTH], fileresf[FILENAMELENGTH]; - char filerest[FILENAMELENGTH]; - char fileregp[FILENAMELENGTH]; - char popfile[FILENAMELENGTH]; - char path[80],pathc[80],pathcd[80],pathtot[80],model[20]; - int firstobs=1, lastobs=10; - int sdeb, sfin; /* Status at beginning and end */ - int c, h , cpt,l; - int ju,jl, mi; - int i1,j1, k1,k2,k3,jk,aa,bb, stepsize, ij; - int jnais,jdc,jint4,jint1,jint2,jint3,**outcome,**adl,*tab; - int mobilav=0, fprev, lprev ,fprevfore=1, lprevfore=1,nforecast,popforecast=0; - int hstepm, nhstepm; - int *popage; + fprintf(fichtm,"