--- imach/src/imach.c 2003/05/16 16:49:47 1.77 +++ imach/src/imach.c 2003/06/17 13:12:43 1.85 @@ -1,4 +1,34 @@ -/* $Id: imach.c,v 1.77 2003/05/16 16:49:47 brouard Exp $ +/* $Id: imach.c,v 1.85 2003/06/17 13:12:43 brouard Exp $ + $State: Exp $ + $Log: imach.c,v $ + Revision 1.85 2003/06/17 13:12:43 brouard + * imach.c (Repository): Check when date of death was earlier that + current date of interview. It may happen when the death was just + prior to the death. In this case, dh was negative and likelihood + was wrong (infinity). We still send an "Error" but patch by + assuming that the date of death was just one stepm after the + interview. + (Repository): Because some people have very long ID (first column) + we changed int to long in num[] and we added a new lvector for + memory allocation. But we also truncated to 8 characters (left + truncation) + (Repository): No more line truncation errors. + + Revision 1.84 2003/06/13 21:44:43 brouard + * imach.c (Repository): Replace "freqsummary" at a correct + place. It differs from routine "prevalence" which may be called + many times. Probs is memory consuming and must be used with + parcimony. + Version 0.95a2 (should output exactly the same maximization than 0.8a2) + + Revision 1.83 2003/06/10 13:39:11 lievre + *** empty log message *** + + Revision 1.82 2003/06/05 15:57:20 brouard + Add log in imach.c and fullversion number is now printed. + +*/ +/* Interpolated Markov Chain Short summary of the programme: @@ -58,6 +88,7 @@ read parameterfile read datafile concatwav + freqsummary if (mle >= 1) mlikeli print results files @@ -94,9 +125,9 @@ #define MAXLINE 256 #define GNUPLOTPROGRAM "gnuplot" /*#define GNUPLOTPROGRAM "..\\gp37mgw\\wgnuplot"*/ -#define FILENAMELENGTH 80 +#define FILENAMELENGTH 132 /*#define DEBUG*/ -#define windows +/*#define windows*/ #define GLOCK_ERROR_NOPATH -1 /* empty path */ #define GLOCK_ERROR_GETCWD -2 /* cannot get cwd */ @@ -111,15 +142,19 @@ #define YEARM 12. /* Number of months per year */ #define AGESUP 130 #define AGEBASE 40 -#ifdef windows -#define DIRSEPARATOR '\\' -#define ODIRSEPARATOR '/' -#else +#ifdef unix #define DIRSEPARATOR '/' #define ODIRSEPARATOR '\\' +#else +#define DIRSEPARATOR '\\' +#define ODIRSEPARATOR '/' #endif -char version[80]="Imach version 0.95a, May 2003, INED-EUROREVES "; +/* $Id: imach.c,v 1.85 2003/06/17 13:12:43 brouard Exp $ */ +/* $State: Exp $ */ + +char version[]="Imach version 0.95a2, June 2003, INED-EUROREVES "; +char fullversion[]="$Revision: 1.85 $ $Date: 2003/06/17 13:12:43 $"; int erreur; /* Error number */ int nvar; int cptcovn=0, cptcovage=0, cptcoveff=0,cptcov; @@ -142,6 +177,12 @@ double **oldm, **newm, **savm; /* Workin double **oldms, **newms, **savms; /* Fixed working pointers to matrices */ FILE *fic,*ficpar, *ficparo,*ficres, *ficrespl, *ficrespij, *ficrest,*ficresf,*ficrespop; FILE *ficlog, *ficrespow; +int globpr; /* Global variable for printing or not */ +double fretone; /* Only one call to likelihood */ +long ipmx; /* Number of contributions */ +double sw; /* Sum of weights */ +char fileresilk[FILENAMELENGTH]; /* File of individual contributions to the likelihood */ +FILE *ficresilk; FILE *ficgp,*ficresprob,*ficpop, *ficresprobcov, *ficresprobcor; FILE *ficresprobmorprev; FILE *fichtm; /* Html File */ @@ -199,7 +240,8 @@ int estepm; /* Estepm, step in month to interpolate survival function in order to approximate Life Expectancy*/ int m,nb; -int *num, firstpass=0, lastpass=4,*cod, *ncodemax, *Tage; +long *num; +int firstpass=0, lastpass=4,*cod, *ncodemax, *Tage; double **agev,*moisnais, *annais, *moisdc, *andc,**mint, **anint; double **pmmij, ***probs; double dateintmean=0; @@ -239,11 +281,12 @@ static int split( char *path, char *dirc dirc[l1-l2] = 0; /* add zero */ } l1 = strlen( dirc ); /* length of directory */ -#ifdef windows + /*#ifdef windows if ( dirc[l1-1] != '\\' ) { dirc[l1] = '\\'; dirc[l1+1] = 0; } #else if ( dirc[l1-1] != '/' ) { dirc[l1] = '/'; dirc[l1+1] = 0; } #endif + */ ss = strrchr( name, '.' ); /* find last / */ ss++; strcpy(ext,ss); /* save extension */ @@ -327,31 +370,31 @@ void free_vector(double*v, int nl, int n } /************************ivector *******************************/ -char *cvector(long nl,long nh) +int *ivector(long nl,long nh) { - char *v; - v=(char *) malloc((size_t)((nh-nl+1+NR_END)*sizeof(char))); - if (!v) nrerror("allocation failure in cvector"); + int *v; + v=(int *) malloc((size_t)((nh-nl+1+NR_END)*sizeof(int))); + if (!v) nrerror("allocation failure in ivector"); return v-nl+NR_END; } /******************free ivector **************************/ -void free_cvector(char *v, long nl, long nh) +void free_ivector(int *v, long nl, long nh) { free((FREE_ARG)(v+nl-NR_END)); } -/************************ivector *******************************/ -int *ivector(long nl,long nh) +/************************lvector *******************************/ +long *lvector(long nl,long nh) { - int *v; - v=(int *) malloc((size_t)((nh-nl+1+NR_END)*sizeof(int))); + long *v; + v=(long *) malloc((size_t)((nh-nl+1+NR_END)*sizeof(long))); if (!v) nrerror("allocation failure in ivector"); return v-nl+NR_END; } -/******************free ivector **************************/ -void free_ivector(int *v, long nl, long nh) +/******************free lvector **************************/ +void free_lvector(long *v, long nl, long nh) { free((FREE_ARG)(v+nl-NR_END)); } @@ -410,7 +453,7 @@ double **matrix(long nrl, long nrh, long for (i=nrl+1; i<=nrh; i++) m[i]=m[i-1]+ncol; return m; - /* print *(*(m+1)+70) ou print m[1][70]; print m+1 or print &(m[1]) + /* print *(*(m+1)+70) or print m[1][70]; print m+1 or print &(m[1]) */ } @@ -1163,7 +1206,42 @@ double func( double *x) ll[s[mw[mi][i]][i]] += 2*weight[i]*lli; } /* end of wave */ } /* end of individual */ - }else{ /* ml=4 no inter-extrapolation */ + }else if (mle==4){ /* ml=4 no inter-extrapolation */ + for (i=1,ipmx=0, sw=0.; i<=imx; i++){ + for (k=1; k<=cptcovn;k++) cov[2+k]=covar[Tvar[k]][i]; + for(mi=1; mi<= wav[i]-1; mi++){ + for (ii=1;ii<=nlstate+ndeath;ii++) + for (j=1;j<=nlstate+ndeath;j++){ + oldm[ii][j]=(ii==j ? 1.0 : 0.0); + savm[ii][j]=(ii==j ? 1.0 : 0.0); + } + for(d=0; d nlstate){ + lli=log(out[s1][s2] - savm[s1][s2]); + }else{ + lli=log(out[s[mw[mi][i]][i]][s[mw[mi+1][i]][i]]); /* Original formula */ + } + ipmx +=1; + sw += weight[i]; + ll[s[mw[mi][i]][i]] += 2*weight[i]*lli; +/* printf("i=%6d s1=%1d s2=%1d mi=%1d mw=%1d dh=%3d prob=%10.6f w=%6.4f out=%10.6f sav=%10.6f\n",i,s1,s2,mi,mw[mi][i],dh[mi][i],exp(lli),weight[i],out[s1][s2],savm[s1][s2]); */ + } /* end of wave */ + } /* end of individual */ + }else{ /* ml=5 no inter-extrapolation no jackson =0.8a */ for (i=1,ipmx=0, sw=0.; i<=imx; i++){ for (k=1; k<=cptcovn;k++) cov[2+k]=covar[Tvar[k]][i]; for(mi=1; mi<= wav[i]-1; mi++){ @@ -1185,10 +1263,13 @@ double func( double *x) oldm=newm; } /* end mult */ + s1=s[mw[mi][i]][i]; + s2=s[mw[mi+1][i]][i]; lli=log(out[s[mw[mi][i]][i]][s[mw[mi+1][i]][i]]); /* Original formula */ ipmx +=1; sw += weight[i]; ll[s[mw[mi][i]][i]] += 2*weight[i]*lli; + /*printf("i=%6d s1=%1d s2=%1d mi=%1d mw=%1d dh=%3d prob=%10.6f w=%6.4f out=%10.6f sav=%10.6f\n",i,s1,s2,mi,mw[mi][i],dh[mi][i],exp(lli),weight[i],out[s1][s2],savm[s1][s2]);*/ } /* end of wave */ } /* end of individual */ } /* End of if */ @@ -1198,6 +1279,111 @@ double func( double *x) return -l; } +/*************** log-likelihood *************/ +double funcone( double *x) +{ + int i, ii, j, k, mi, d, kk; + double l, ll[NLSTATEMAX], cov[NCOVMAX]; + double **out; + double lli; /* Individual log likelihood */ + int s1, s2; + double bbh, survp; + /*extern weight */ + /* We are differentiating ll according to initial status */ + /* for (i=1;i<=npar;i++) printf("%f ", x[i]);*/ + /*for(i=1;i nlstate && (mle <5) ){ /* Jackson */ + lli=log(out[s1][s2] - savm[s1][s2]); + } else if (mle==1){ + lli= log((1.+bbh)*out[s1][s2]- bbh*savm[s1][s2]); /* linear interpolation */ + } else if(mle==2){ + lli= (savm[s1][s2]>(double)1.e-8 ?log((1.+bbh)*out[s1][s2]- bbh*savm[s1][s2]):log((1.+bbh)*out[s1][s2])); /* linear interpolation */ + } else if(mle==3){ /* exponential inter-extrapolation */ + lli= (savm[s1][s2]>(double)1.e-8 ?(1.+bbh)*log(out[s1][s2])- bbh*log(savm[s1][s2]):log((1.+bbh)*out[s1][s2])); /* exponential inter-extrapolation */ + } else if (mle==4){ /* mle=4 no inter-extrapolation */ + lli=log(out[s1][s2]); /* Original formula */ + } else{ /* ml>=5 no inter-extrapolation no jackson =0.8a */ + lli=log(out[s1][s2]); /* Original formula */ + } /* End of if */ + ipmx +=1; + sw += weight[i]; + ll[s[mw[mi][i]][i]] += 2*weight[i]*lli; +/* printf("i=%6d s1=%1d s2=%1d mi=%1d mw=%1d dh=%3d prob=%10.6f w=%6.4f out=%10.6f sav=%10.6f\n",i,s1,s2,mi,mw[mi][i],dh[mi][i],exp(lli),weight[i],out[s1][s2],savm[s1][s2]); */ + if(globpr){ + fprintf(ficresilk,"%6d %1d %1d %1d %1d %3d %10.6f %6.4f %10.6f %10.6f %10.6f ", \ + i,s1,s2,mi,mw[mi][i],dh[mi][i],exp(lli),weight[i],2*weight[i]*lli,out[s1][s2],savm[s1][s2]); + for(k=1,l=0.; k<=nlstate; k++) + fprintf(ficresilk," %10.6f",ll[k]); + fprintf(ficresilk,"\n"); + } + } /* end of wave */ + } /* end of individual */ + for(k=1,l=0.; k<=nlstate; k++) l += ll[k]; + /* printf("l1=%f l2=%f ",ll[1],ll[2]); */ + l= l*ipmx/sw; /* To get the same order of magnitude as if weight=1 for every body */ + return -l; +} + + +void likelione(FILE *ficres,double p[], int npar, int nlstate, int *globpr, long *ipmx, double *sw, double *fretone, double (*funcone)(double [])) +{ + /* This routine should help understanding what is done with the selection of individuals/waves and + to check the exact contribution to the likelihood. + Plotting could be done. + */ + int k; + if(globpr !=0){ /* Just counts and sums no printings */ + strcpy(fileresilk,"ilk"); + strcat(fileresilk,fileres); + if((ficresilk=fopen(fileresilk,"w"))==NULL) { + printf("Problem with resultfile: %s\n", fileresilk); + fprintf(ficlog,"Problem with resultfile: %s\n", fileresilk); + } + fprintf(ficresilk, "# individual(line's record) s1 s2 wave# effective_wave# number_of_product_matrix pij weight 2ln(pij)*weight 0pij_x 0pij_(x-stepm) cumulating_loglikeli_by_health_state"); + fprintf(ficresilk, "# i s1 s2 mi mw dh likeli weight out sav "); + /* i,s1,s2,mi,mw[mi][i],dh[mi][i],exp(lli),weight[i],2*weight[i]*lli,out[s1][s2],savm[s1][s2]); */ + for(k=1; k<=nlstate; k++) + fprintf(ficresilk," ll[%d]",k); + fprintf(ficresilk,"\n"); + } + + *fretone=(*funcone)(p); + if(globpr !=0) + fclose(ficresilk); + return; +} /*********** Maximum Likelihood Estimation ***************/ @@ -1206,6 +1392,7 @@ void mlikeli(FILE *ficres,double p[], in int i,j, iter; double **xi; double fret; + double fretone; /* Only one call to likelihood */ char filerespow[FILENAMELENGTH]; xi=matrix(1,npar,1,npar); for (i=1;i<=npar;i++) @@ -1223,6 +1410,7 @@ void mlikeli(FILE *ficres,double p[], in for(j=1;j<=nlstate+ndeath;j++) if(j!=i)fprintf(ficrespow," p%1d%1d",i,j); fprintf(ficrespow,"\n"); + powell(p,xi,npar,ftol,&iter,&fret,func); fclose(ficrespow); @@ -1490,7 +1678,7 @@ void lubksb(double **a, int n, int *indx } /************ Frequencies ********************/ -void freqsummary(char fileres[], int iagemin, int iagemax, int **s, double **agev, int nlstate, int imx, int *Tvaraff, int **nbcode, int *ncodemax,double **mint,double **anint, double dateprev1,double dateprev2,double jprev1, double mprev1,double anprev1,double jprev2, double mprev2,double anprev2) +void freqsummary(char fileres[], int iagemin, int iagemax, int **s, double **agev, int nlstate, int imx, int *Tvaraff, int **nbcode, int *ncodemax,double **mint,double **anint) { /* Some frequencies */ int i, m, jk, k1,i1, j1, bool, z1,z2,j; @@ -1544,7 +1732,7 @@ void freqsummary(char fileres[], int ia if (bool==1){ for(m=firstpass; m<=lastpass; m++){ k2=anint[m][i]+(mint[m][i]/12.); - if ((k2>=dateprev1) && (k2<=dateprev2)) { + /*if ((k2>=dateprev1) && (k2<=dateprev2)) {*/ if(agev[m][i]==0) agev[m][i]=iagemax+1; if(agev[m][i]==1) agev[m][i]=iagemax+2; if (s[m][i]>0 && s[m][i]<=nlstate) prop[s[m][i]][(int)agev[m][i]] += weight[i]; @@ -1557,12 +1745,12 @@ void freqsummary(char fileres[], int ia dateintsum=dateintsum+k2; k2cpt++; } - } + /*}*/ } } } - fprintf(ficresp, "#Count between %.lf/%.lf/%.lf and %.lf/%.lf/%.lf\n",jprev1, mprev1,anprev1,jprev2, mprev2,anprev2); + /* fprintf(ficresp, "#Count between %.lf/%.lf/%.lf and %.lf/%.lf/%.lf\n",jprev1, mprev1,anprev1,jprev2, mprev2,anprev2);*/ if (cptcovn>0) { fprintf(ficresp, "\n#********** Variable "); @@ -1623,7 +1811,7 @@ void freqsummary(char fileres[], int ia if( i <= iagemax){ if(pos>=1.e-5){ fprintf(ficresp," %d %.5f %.0f %.0f",i,prop[jk][i]/posprop, prop[jk][i],posprop); - probs[i][jk][j1]= pp[jk]/pos; + /*probs[i][jk][j1]= pp[jk]/pos;*/ /*printf("\ni=%d jk=%d j1=%d %.5f %.0f %.0f %f",i,jk,j1,pp[jk]/pos, pp[jk],pos,probs[i][jk][j1]);*/ } else @@ -1656,7 +1844,7 @@ void freqsummary(char fileres[], int ia } /************ Prevalence ********************/ -void prevalence(double agemin, double agemax, int **s, double **agev, int nlstate, int imx, int *Tvar, int **nbcode, int *ncodemax,double **mint,double **anint, double dateprev1,double dateprev2, int firstpass, int lastpass) +void prevalence(double ***probs, double agemin, double agemax, int **s, double **agev, int nlstate, int imx, int *Tvar, int **nbcode, int *ncodemax,double **mint,double **anint, double dateprev1,double dateprev2, int firstpass, int lastpass) { /* Compute observed prevalence between dateprev1 and dateprev2 by counting the number of people in each health status at the date of interview (if between dateprev1 and dateprev2). @@ -1775,11 +1963,11 @@ void concatwav(int wav[], int **dh, int wav[i]=mi; if(mi==0){ if(first==0){ - printf("Warning! None valid information for:%d line=%d (skipped) and may be others, see log file\n",num[i],i); + printf("Warning! None valid information for:%ld line=%d (skipped) and may be others, see log file\n",num[i],i); first=1; } if(first==1){ - fprintf(ficlog,"Warning! None valid information for:%d line=%d (skipped)\n",num[i],i); + fprintf(ficlog,"Warning! None valid information for:%ld line=%d (skipped)\n",num[i],i); } } /* end mi==0 */ } /* End individuals */ @@ -1791,15 +1979,21 @@ void concatwav(int wav[], int **dh, int else{ if (s[mw[mi+1][i]][i] > nlstate) { /* A death */ if (agedc[i] < 2*AGESUP) { - j= rint(agedc[i]*12-agev[mw[mi][i]][i]*12); - if(j==0) j=1; /* Survives at least one month after exam */ - k=k+1; - if (j >= jmax) jmax=j; - if (j <= jmin) jmin=j; - sum=sum+j; - /*if (j<0) printf("j=%d num=%d \n",j,i);*/ - /* printf("%d %d %d %d\n", s[mw[mi][i]][i] ,s[mw[mi+1][i]][i],j,i);*/ - if(j<0)printf("Warning! Negative delay (%d) between waves %d and %d of individual at line %d who is aged %.1f with statuses %d %d\n ",j,mw[mi][i],mw[mi+1][i], i,agev[mw[mi][i]][i],s[mw[mi][i]][i] ,s[mw[mi+1][i]][i]); + j= rint(agedc[i]*12-agev[mw[mi][i]][i]*12); + if(j==0) j=1; /* Survives at least one month after exam */ + else if(j<0){ + printf("Error! Negative delay (%d to death) between waves %d and %d of individual %ld at line %d who is aged %.1f with statuses from %d to %d\n ",j,mw[mi][i],mw[mi+1][i],num[i], i,agev[mw[mi][i]][i],s[mw[mi][i]][i] ,s[mw[mi+1][i]][i]); + j=1; /* Careful Patch */ + printf(" We assumed that the date of interview was correct (and not the date of death) and postponed the death %d month(s) (one stepm) after the interview.\n You MUST fixe the contradiction between dates.\n",stepm); + printf("Error! Negative delay (%d to death) between waves %d and %d of individual %ld at line %d who is aged %.1f with statuses from %d to %d\n ",j,mw[mi][i],mw[mi+1][i],num[i], i,agev[mw[mi][i]][i],s[mw[mi][i]][i] ,s[mw[mi+1][i]][i]); + fprintf(ficlog," We assumed that the date of interview was correct (and not the date of death) and postponed the death %d month(s) (one stepm) after the interview.\n You MUST fix the contradiction between dates.\n",stepm); + } + k=k+1; + if (j >= jmax) jmax=j; + if (j <= jmin) jmin=j; + sum=sum+j; + /*if (j<0) printf("j=%d num=%d \n",j,i);*/ + /* printf("%d %d %d %d\n", s[mw[mi][i]][i] ,s[mw[mi+1][i]][i],j,i);*/ } } else{ @@ -1810,13 +2004,16 @@ void concatwav(int wav[], int **dh, int else if (j <= jmin)jmin=j; /* if (j<10) printf("j=%d jmin=%d num=%d ",j,jmin,i); */ /*printf("%d %lf %d %d %d\n", i,agev[mw[mi][i]][i],j,s[mw[mi][i]][i] ,s[mw[mi+1][i]][i]);*/ - if(j<0)printf("Warning! Negative delay (%d to death) between waves %d and %d of individual at line %d who is aged %.1f with statuses %d %d\n ",j,mw[mi][i],mw[mi+1][i], i,agev[mw[mi][i]][i],s[mw[mi][i]][i] ,s[mw[mi+1][i]][i]); + if(j<0){ + printf("Error! Negative delay (%d) between waves %d and %d of individual %ld at line %d who is aged %.1f with statuses from %d to %d\n ",j,mw[mi][i],mw[mi+1][i],num[i], i,agev[mw[mi][i]][i],s[mw[mi][i]][i] ,s[mw[mi+1][i]][i]); + fprintf(ficlog,"Error! Negative delay (%d) between waves %d and %d of individual %ld at line %d who is aged %.1f with statuses from %d to %d\n ",j,mw[mi][i],mw[mi+1][i],num[i], i,agev[mw[mi][i]][i],s[mw[mi][i]][i] ,s[mw[mi+1][i]][i]); + } sum=sum+j; } jk= j/stepm; jl= j -jk*stepm; ju= j -(jk+1)*stepm; - if(mle <=1){ + if(mle <=1){ /* only if we use a the linear-interpoloation pseudo-likelihood */ if(jl==0){ dh[mi][i]=jk; bh[mi][i]=0; @@ -1841,8 +2038,8 @@ void concatwav(int wav[], int **dh, int bh[mi][i]=ju; /* At least one step */ /* printf(" bh=%d ju=%d jl=%d dh=%d jk=%d stepm=%d %d\n",bh[mi][i],ju,jl,dh[mi][i],jk,stepm,i);*/ } - } - } /* end if mle */ + } /* end if mle */ + } } /* end wave */ } jmean=sum/k; @@ -2390,7 +2587,7 @@ void varevsij(char optionfilefiname[], d fclose(ficresprobmorprev); fclose(ficgp); fclose(fichtm); -} +} /* end varevsij */ /************ Variance of prevlim ******************/ void varprevlim(char fileres[], double **varpl, double **matcov, double x[], double delti[], int nlstate, int stepm, double bage, double fage, double **oldm, double **savm, double **prlim, double ftolpl, int ij) @@ -2802,11 +2999,11 @@ void printinghtml(char fileres[], char t fprintf(ficlog,"Problem with %s \n",optionfilehtm), exit(0); } - fprintf(fichtm,"