--- imach/src/imach.c	2002/11/20 17:35:59	1.63
+++ imach/src/imach.c	2003/01/28 17:23:35	1.66
@@ -1,4 +1,4 @@
-/* $Id: imach.c,v 1.63 2002/11/20 17:35:59 lievre Exp $
+/* $Id: imach.c,v 1.66 2003/01/28 17:23:35 brouard Exp $
    Interpolated Markov Chain
 
   Short summary of the programme:
@@ -32,8 +32,8 @@
   hPijx is the probability to be observed in state i at age x+h
   conditional to the observed state i at age x. The delay 'h' can be
   split into an exact number (nh*stepm) of unobserved intermediate
-  states. This elementary transition (by month or quarter trimester,
-  semester or year) is model as a multinomial logistic.  The hPx
+  states. This elementary transition (by month, quarter,
+  semester or year) is modelled as a multinomial logistic.  The hPx
   matrix is simply the matrix product of nh*stepm elementary matrices
   and the contribution of each individual to the likelihood is simply
   hPijx.
@@ -83,7 +83,7 @@
 #define ODIRSEPARATOR '\\'
 #endif
 
-char version[80]="Imach version 0.9, November 2002, INED-EUROREVES ";
+char version[80]="Imach version 0.91, November 2002, INED-EUROREVES ";
 int erreur; /* Error number */
 int nvar;
 int cptcovn=0, cptcovage=0, cptcoveff=0,cptcov;
@@ -856,11 +856,13 @@ double **matprod2(double **out, double *
 
 double ***hpxij(double ***po, int nhstepm, double age, int hstepm, double *x, int nlstate, int stepm, double **oldm, double **savm, int ij )
 {
-  /* Computes the transition matrix starting at age 'age' over 'nhstepm*hstepm*stepm' month 
-     duration (i.e. until
-     age (in years)  age+nhstepm*stepm/12) by multiplying nhstepm*hstepm matrices. 
+  /* Computes the transition matrix starting at age 'age' over 
+     'nhstepm*hstepm*stepm' months (i.e. until
+     age (in years)  age+nhstepm*hstepm*stepm/12) by multiplying 
+     nhstepm*hstepm matrices. 
      Output is stored in matrix po[i][j][h] for h every 'hstepm' step 
-     (typically every 2 years instead of every month which is too big).
+     (typically every 2 years instead of every month which is too big 
+     for the memory).
      Model is determined by parameters x and covariates have to be 
      included manually here. 
 
@@ -944,13 +946,10 @@ double func( double *x)
 	  for (kk=1; kk<=cptcovage;kk++) {
 	    cov[Tage[kk]+2]=covar[Tvar[Tage[kk]]][i]*cov[2];
 	  }
-	
 	  out=matprod2(newm,oldm,1,nlstate+ndeath,1,nlstate+ndeath,
 		       1,nlstate+ndeath,pmij(pmmij,cov,ncovmodel,x,nlstate));
 	  savm=oldm;
 	  oldm=newm;
-	
-	
 	} /* end mult */
       
 	/*lli=log(out[s[mw[mi][i]][i]][s[mw[mi+1][i]][i]]);*/ /* Original formula */
@@ -968,13 +967,113 @@ double func( double *x)
 	 */
 	s1=s[mw[mi][i]][i];
 	s2=s[mw[mi+1][i]][i];
-	bbh=(double)bh[mi][i]/(double)stepm;
-
-	/*	lli= (savm[s1][s2]>(double)1.e-8 ?(1.+bbh)*log(out[s1][s2])- bbh*log(savm[s1][s2]):log((1.+bbh)*out[s1][s2]));*/
-
+	bbh=(double)bh[mi][i]/(double)stepm; 
+	/* bias is positive if real duration
+	 * is higher than the multiple of stepm and negative otherwise.
+	 */
+	/* lli= (savm[s1][s2]>1.e-8 ?(1.+bbh)*log(out[s1][s2])- bbh*log(savm[s1][s2]):log((1.+bbh)*out[s1][s2]));*/
+	lli= (savm[s1][s2]>(double)1.e-8 ?log((1.+bbh)*out[s1][s2]- bbh*(savm[s1][s2])):log((1.+bbh)*out[s1][s2]));  /* linear interpolation */
+	/*lli=(1.+bbh)*log(out[s1][s2])- bbh*log(savm[s1][s2]);*/
+	/*if(lli ==000.0)*/
+	/*printf("bbh= %f lli=%f savm=%f out=%f %d\n",bbh,lli,savm[s1][s2], out[s[mw[mi][i]][i]][s[mw[mi+1][i]][i]],i); */
+	ipmx +=1;
+	sw += weight[i];
+	ll[s[mw[mi][i]][i]] += 2*weight[i]*lli;
+      } /* end of wave */
+    } /* end of individual */
+  }  else if(mle==2){
+    for (i=1,ipmx=0, sw=0.; i<=imx; i++){
+      for (k=1; k<=cptcovn;k++) cov[2+k]=covar[Tvar[k]][i];
+      for(mi=1; mi<= wav[i]-1; mi++){
+	for (ii=1;ii<=nlstate+ndeath;ii++)
+	  for (j=1;j<=nlstate+ndeath;j++){
+	    oldm[ii][j]=(ii==j ? 1.0 : 0.0);
+	    savm[ii][j]=(ii==j ? 1.0 : 0.0);
+	  }
+	for(d=0; d<=dh[mi][i]; d++){
+	  newm=savm;
+	  cov[2]=agev[mw[mi][i]][i]+d*stepm/YEARM;
+	  for (kk=1; kk<=cptcovage;kk++) {
+	    cov[Tage[kk]+2]=covar[Tvar[Tage[kk]]][i]*cov[2];
+	  }
+	  out=matprod2(newm,oldm,1,nlstate+ndeath,1,nlstate+ndeath,
+		       1,nlstate+ndeath,pmij(pmmij,cov,ncovmodel,x,nlstate));
+	  savm=oldm;
+	  oldm=newm;
+	} /* end mult */
+      
+	/*lli=log(out[s[mw[mi][i]][i]][s[mw[mi+1][i]][i]]);*/ /* Original formula */
+	/* But now since version 0.9 we anticipate for bias and large stepm.
+	 * If stepm is larger than one month (smallest stepm) and if the exact delay 
+	 * (in months) between two waves is not a multiple of stepm, we rounded to 
+	 * the nearest (and in case of equal distance, to the lowest) interval but now
+	 * we keep into memory the bias bh[mi][i] and also the previous matrix product
+	 * (i.e to dh[mi][i]-1) saved in 'savm'. The we inter(extra)polate the
+	 * probability in order to take into account the bias as a fraction of the way
+	 * from savm to out if bh is neagtive or even beyond if bh is positive. bh varies
+	 * -stepm/2 to stepm/2 .
+	 * For stepm=1 the results are the same as for previous versions of Imach.
+	 * For stepm > 1 the results are less biased than in previous versions. 
+	 */
+	s1=s[mw[mi][i]][i];
+	s2=s[mw[mi+1][i]][i];
+	bbh=(double)bh[mi][i]/(double)stepm; 
+	/* bias is positive if real duration
+	 * is higher than the multiple of stepm and negative otherwise.
+	 */
 	lli= (savm[s1][s2]>(double)1.e-8 ?log((1.+bbh)*out[s1][s2]- bbh*(savm[s1][s2])):log((1.+bbh)*out[s1][s2])); /* linear interpolation */
-
-	/*lli= (savm[s1][s2]>1.e-8 ?(1.+bbh)*log(out[s1][s2])- bbh*log(savm[s1][s2]):log((1.-bbh)*out[s1][s2]));*/
+	/* lli= (savm[s1][s2]>1.e-8 ?(1.+bbh)*log(out[s1][s2])- bbh*log(savm[s1][s2]):log((1.+bbh)*out[s1][s2]));*/
+	/*lli= (savm[s1][s2]>1.e-8 ?(1.+bbh)*log(out[s1][s2])- bbh*log(savm[s1][s2]):log((1.-+bh)*out[s1][s2])); */ /* exponential interpolation */
+	/*lli=(1.+bbh)*log(out[s1][s2])- bbh*log(savm[s1][s2]);*/
+	/*if(lli ==000.0)*/
+	/*printf("bbh= %f lli=%f savm=%f out=%f %d\n",bbh,lli,savm[s1][s2], out[s[mw[mi][i]][i]][s[mw[mi+1][i]][i]],i); */
+	ipmx +=1;
+	sw += weight[i];
+	ll[s[mw[mi][i]][i]] += 2*weight[i]*lli;
+      } /* end of wave */
+    } /* end of individual */
+  }  else if(mle==3){  /* exponential inter-extrapolation */
+    for (i=1,ipmx=0, sw=0.; i<=imx; i++){
+      for (k=1; k<=cptcovn;k++) cov[2+k]=covar[Tvar[k]][i];
+      for(mi=1; mi<= wav[i]-1; mi++){
+	for (ii=1;ii<=nlstate+ndeath;ii++)
+	  for (j=1;j<=nlstate+ndeath;j++){
+	    oldm[ii][j]=(ii==j ? 1.0 : 0.0);
+	    savm[ii][j]=(ii==j ? 1.0 : 0.0);
+	  }
+	for(d=0; d<dh[mi][i]; d++){
+	  newm=savm;
+	  cov[2]=agev[mw[mi][i]][i]+d*stepm/YEARM;
+	  for (kk=1; kk<=cptcovage;kk++) {
+	    cov[Tage[kk]+2]=covar[Tvar[Tage[kk]]][i]*cov[2];
+	  }
+	  out=matprod2(newm,oldm,1,nlstate+ndeath,1,nlstate+ndeath,
+		       1,nlstate+ndeath,pmij(pmmij,cov,ncovmodel,x,nlstate));
+	  savm=oldm;
+	  oldm=newm;
+	} /* end mult */
+      
+	/*lli=log(out[s[mw[mi][i]][i]][s[mw[mi+1][i]][i]]);*/ /* Original formula */
+	/* But now since version 0.9 we anticipate for bias and large stepm.
+	 * If stepm is larger than one month (smallest stepm) and if the exact delay 
+	 * (in months) between two waves is not a multiple of stepm, we rounded to 
+	 * the nearest (and in case of equal distance, to the lowest) interval but now
+	 * we keep into memory the bias bh[mi][i] and also the previous matrix product
+	 * (i.e to dh[mi][i]-1) saved in 'savm'. The we inter(extra)polate the
+	 * probability in order to take into account the bias as a fraction of the way
+	 * from savm to out if bh is neagtive or even beyond if bh is positive. bh varies
+	 * -stepm/2 to stepm/2 .
+	 * For stepm=1 the results are the same as for previous versions of Imach.
+	 * For stepm > 1 the results are less biased than in previous versions. 
+	 */
+	s1=s[mw[mi][i]][i];
+	s2=s[mw[mi+1][i]][i];
+	bbh=(double)bh[mi][i]/(double)stepm; 
+	/* bias is positive if real duration
+	 * is higher than the multiple of stepm and negative otherwise.
+	 */
+	/* lli= (savm[s1][s2]>(double)1.e-8 ?log((1.+bbh)*out[s1][s2]- bbh*(savm[s1][s2])):log((1.+bbh)*out[s1][s2])); */ /* linear interpolation */
+	lli= (savm[s1][s2]>1.e-8 ?(1.+bbh)*log(out[s1][s2])- bbh*log(savm[s1][s2]):log((1.+bbh)*out[s1][s2])); /* exponential inter-extrapolation */
 	/*lli=(1.+bbh)*log(out[s1][s2])- bbh*log(savm[s1][s2]);*/
 	/*if(lli ==000.0)*/
 	/*printf("bbh= %f lli=%f savm=%f out=%f %d\n",bbh,lli,savm[s1][s2], out[s[mw[mi][i]][i]][s[mw[mi+1][i]][i]],i); */
@@ -983,7 +1082,7 @@ double func( double *x)
 	ll[s[mw[mi][i]][i]] += 2*weight[i]*lli;
       } /* end of wave */
     } /* end of individual */
-  }  else{ 
+  }else{  /* ml=4 no inter-extrapolation */
     for (i=1,ipmx=0, sw=0.; i<=imx; i++){
       for (k=1; k<=cptcovn;k++) cov[2+k]=covar[Tvar[k]][i];
       for(mi=1; mi<= wav[i]-1; mi++){
@@ -1034,7 +1133,7 @@ void mlikeli(FILE *ficres,double p[], in
   powell(p,xi,npar,ftol,&iter,&fret,func);
 
    printf("\n#Number of iterations = %d, -2 Log likelihood = %.12f\n",iter,func(p));
-  fprintf(ficlog,"#Number of iterations = %d, -2 Log likelihood = %.12f \n",iter,func(p));
+  fprintf(ficlog,"\n#Number of iterations = %d, -2 Log likelihood = %.12f \n",iter,func(p));
   fprintf(ficres,"#Number of iterations = %d, -2 Log likelihood = %.12f \n",iter,func(p));
 
 }
@@ -1618,22 +1717,35 @@ void  concatwav(int wav[], int **dh, int
 	jk= j/stepm;
 	jl= j -jk*stepm;
 	ju= j -(jk+1)*stepm;
-	if(jl <= -ju){
-	  dh[mi][i]=jk;
-	  bh[mi][i]=jl;
-	}
-	else{
-	  dh[mi][i]=jk+1;
-	  bh[mi][i]=ju;
-	}
-	if(dh[mi][i]==0){
-	  dh[mi][i]=1; /* At least one step */
-	  bh[mi][i]=ju; /* At least one step */
-	  printf(" bh=%d ju=%d jl=%d dh=%d jk=%d stepm=%d %d\n",bh[mi][i],ju,jl,dh[mi][i],jk,stepm,i);
+	if(mle <=1){ 
+	  if(jl==0){
+	    dh[mi][i]=jk;
+	    bh[mi][i]=0;
+	  }else{ /* We want a negative bias in order to only have interpolation ie
+		  * at the price of an extra matrix product in likelihood */
+	    dh[mi][i]=jk+1;
+	    bh[mi][i]=ju;
+	  }
+	}else{
+	  if(jl <= -ju){
+	    dh[mi][i]=jk;
+	    bh[mi][i]=jl;	/* bias is positive if real duration
+				 * is higher than the multiple of stepm and negative otherwise.
+				 */
+	  }
+	  else{
+	    dh[mi][i]=jk+1;
+	    bh[mi][i]=ju;
+	  }
+	  if(dh[mi][i]==0){
+	    dh[mi][i]=1; /* At least one step */
+	    bh[mi][i]=ju; /* At least one step */
+	    printf(" bh=%d ju=%d jl=%d dh=%d jk=%d stepm=%d %d\n",bh[mi][i],ju,jl,dh[mi][i],jk,stepm,i);
+	  }
+	  if(i==298 || i==287 || i==763 ||i==1061)printf(" bh=%d ju=%d jl=%d dh=%d jk=%d stepm=%d",bh[mi][i],ju,jl,dh[mi][i],jk,stepm);
 	}
-	if(i==298 || i==287 || i==763 ||i==1061)printf(" bh=%d ju=%d jl=%d dh=%d jk=%d stepm=%d",bh[mi][i],ju,jl,dh[mi][i],jk,stepm);
-      }
-    }
+      } /* end if mle */
+    } /* end wave */
   }
   jmean=sum/k;
   printf("Delay (in months) between two waves Min=%d Max=%d Mean=%f\n\n ",jmin, jmax,jmean);
@@ -1734,7 +1846,7 @@ void evsij(char fileres[], double ***eij
    * This is mainly to measure the difference between two models: for example
    * if stepm=24 months pijx are given only every 2 years and by summing them
    * we are calculating an estimate of the Life Expectancy assuming a linear 
-   * progression inbetween and thus overestimating or underestimating according
+   * progression in between and thus overestimating or underestimating according
    * to the curvature of the survival function. If, for the same date, we 
    * estimate the model with stepm=1 month, we can keep estepm to 24 months
    * to compare the new estimate of Life expectancy with the same linear 
@@ -1924,7 +2036,7 @@ void varevsij(char optionfilefiname[], d
   }
   printf("Computing total mortality p.j=w1*p1j+w2*p2j+..: result on file '%s' \n",fileresprobmorprev);
   fprintf(ficlog,"Computing total mortality p.j=w1*p1j+w2*p2j+..: result on file '%s' \n",fileresprobmorprev);
-  fprintf(ficresprobmorprev,"# probabilities of dying during a year and weighted mean w1*p1j+w2*p2j+... stand dev in()\n");
+  fprintf(ficresprobmorprev,"# probabilities of dying before estepm=%d months for people of exact age and weighted probabilities w1*p1j+w2*p2j+... stand dev in()\n",estepm);
   fprintf(ficresprobmorprev,"# Age cov=%-d",ij);
   for(j=nlstate+1; j<=(nlstate+ndeath);j++){
     fprintf(ficresprobmorprev," p.%-d SE",j);
@@ -1981,7 +2093,7 @@ void varevsij(char optionfilefiname[], d
      and note for a fixed period like k years */
   /* We decided (b) to get a life expectancy respecting the most precise curvature of the
      survival function given by stepm (the optimization length). Unfortunately it
-     means that if the survival funtion is printed only each two years of age and if
+     means that if the survival funtion is printed every two years of age and if
      you sum them up and add 1 year (area under the trapezoids) you won't get the same 
      results. So we changed our mind and took the option of the best precision.
   */
@@ -1997,7 +2109,7 @@ void varevsij(char optionfilefiname[], d
 
 
     for(theta=1; theta <=npar; theta++){
-      for(i=1; i<=npar; i++){ /* Computes gradient */
+      for(i=1; i<=npar; i++){ /* Computes gradient x + delta*/
 	xp[i] = x[i] + (i==theta ?delti[theta]:0);
       }
       hpxij(p3mat,nhstepm,age,hstepm,xp,nlstate,stepm,oldm,savm, ij);  
@@ -2019,14 +2131,17 @@ void varevsij(char optionfilefiname[], d
 	    gp[h][j] += prlim[i][i]*p3mat[i][j][h];
 	}
       }
-      /* This for computing forces of mortality (h=1)as a weighted average */
+      /* This for computing probability of death (h=1 means
+         computed over hstepm matrices product = hstepm*stepm months) 
+         as a weighted average of prlim.
+      */
       for(j=nlstate+1,gpp[j]=0.;j<=nlstate+ndeath;j++){
 	for(i=1; i<= nlstate; i++)
 	  gpp[j] += prlim[i][i]*p3mat[i][j][1];
       }    
-      /* end force of mortality */
+      /* end probability of death */
 
-      for(i=1; i<=npar; i++) /* Computes gradient */
+      for(i=1; i<=npar; i++) /* Computes gradient x - delta */
 	xp[i] = x[i] - (i==theta ?delti[theta]:0);
       hpxij(p3mat,nhstepm,age,hstepm,xp,nlstate,stepm,oldm,savm, ij);  
       prevalim(prlim,nlstate,xp,age,oldm,savm,ftolpl,ij);
@@ -2047,12 +2162,15 @@ void varevsij(char optionfilefiname[], d
 	    gm[h][j] += prlim[i][i]*p3mat[i][j][h];
 	}
       }
-      /* This for computing force of mortality (h=1)as a weighted average */
+      /* This for computing probability of death (h=1 means
+         computed over hstepm matrices product = hstepm*stepm months) 
+         as a weighted average of prlim.
+      */
       for(j=nlstate+1,gmp[j]=0.;j<=nlstate+ndeath;j++){
 	for(i=1; i<= nlstate; i++)
 	  gmp[j] += prlim[i][i]*p3mat[i][j][1];
       }    
-      /* end force of mortality */
+      /* end probability of death */
 
       for(j=1; j<= nlstate; j++) /* vareij */
 	for(h=0; h<=nhstepm; h++){
@@ -2097,6 +2215,7 @@ void varevsij(char optionfilefiname[], d
       for(i=nlstate+1;i<=nlstate+ndeath;i++)
 	varppt[j][i]=doldmp[j][i];
     /* end ppptj */
+    /*  x centered again */
     hpxij(p3mat,nhstepm,age,hstepm,x,nlstate,stepm,oldm,savm, ij);  
     prevalim(prlim,nlstate,x,age,oldm,savm,ftolpl,ij);
  
@@ -2110,12 +2229,15 @@ void varevsij(char optionfilefiname[], d
       }
     }
     
-    /* This for computing force of mortality (h=1)as a weighted average */
+    /* This for computing probability of death (h=1 means
+       computed over hstepm (estepm) matrices product = hstepm*stepm months) 
+       as a weighted average of prlim.
+    */
     for(j=nlstate+1,gmp[j]=0.;j<=nlstate+ndeath;j++){
       for(i=1; i<= nlstate; i++)
 	gmp[j] += prlim[i][i]*p3mat[i][j][1]; 
     }    
-    /* end force of mortality */
+    /* end probability of death */
 
     fprintf(ficresprobmorprev,"%3d %d ",(int) age, ij);
     for(j=nlstate+1; j<=(nlstate+ndeath);j++){
@@ -2149,7 +2271,7 @@ void varevsij(char optionfilefiname[], d
   fprintf(ficgp,"\n replot \"%s\"  u 1:(($3+1.96*$4)*%6.3f) t \"95\%% interval\" w l 2 ",fileresprobmorprev,YEARM/estepm);
   fprintf(ficgp,"\n replot \"%s\"  u 1:(($3-1.96*$4)*%6.3f) not w l 2 ",fileresprobmorprev,YEARM/estepm);
   fprintf(fichtm,"\n<br> File (multiple files are possible if covariates are present): <A href=\"%s\">%s</a>\n",fileresprobmorprev,fileresprobmorprev);
-  fprintf(fichtm,"\n<br> Probability is computed over estepm=%d months. <br> <img src=\"varmuptjgr%s%s.png\"> <br>\n", stepm,digitp,digit);
+  fprintf(fichtm,"\n<br> Probability is computed over estepm=%d months. <br> <img src=\"varmuptjgr%s%s.png\"> <br>\n", estepm,digitp,digit);
   /*  fprintf(fichtm,"\n<br> Probability is computed over estepm=%d months and then divided by estepm and multiplied by %.0f in order to have the probability to die over a year <br> <img src=\"varmuptjgr%s%s.png\"> <br>\n", stepm,YEARM,digitp,digit);
 */
   fprintf(ficgp,"\nset out \"varmuptjgr%s%s.png\";replot;",digitp,digit);
@@ -3900,7 +4022,7 @@ Interval (in months) between two waves:
   free_imatrix(mw,1,lastpass-firstpass+1,1,imx);   
   free_ivector(num,1,n);
   free_vector(agedc,1,n);
-  free_matrix(covar,0,NCOVMAX,1,n);
+  /*free_matrix(covar,0,NCOVMAX,1,n);*/
   /*free_matrix(covar,1,NCOVMAX,1,n);*/
   fclose(ficparo);
   fclose(ficres);
@@ -4183,7 +4305,8 @@ Interval (in months) between two waves:
   free_matrix(oldms, 1,nlstate+ndeath,1,nlstate+ndeath);
   free_matrix(newms, 1,nlstate+ndeath,1,nlstate+ndeath);
   free_matrix(savms, 1,nlstate+ndeath,1,nlstate+ndeath);
- 
+  
+  free_matrix(covar,0,NCOVMAX,1,n);
   free_matrix(matcov,1,npar,1,npar);
   free_vector(delti,1,npar);
   free_matrix(agev,1,maxwav,1,imx);