--- imach/src/imach.c 2016/08/30 15:01:20 1.242 +++ imach/src/imach.c 2016/09/08 16:07:27 1.250 @@ -1,6 +1,30 @@ -/* $Id: imach.c,v 1.242 2016/08/30 15:01:20 brouard Exp $ +/* $Id: imach.c,v 1.250 2016/09/08 16:07:27 brouard Exp $ $State: Exp $ $Log: imach.c,v $ + Revision 1.250 2016/09/08 16:07:27 brouard + Summary: continue + + Revision 1.249 2016/09/07 17:14:18 brouard + Summary: Starting values from frequencies + + Revision 1.248 2016/09/07 14:10:18 brouard + *** empty log message *** + + Revision 1.247 2016/09/02 11:11:21 brouard + *** empty log message *** + + Revision 1.246 2016/09/02 08:49:22 brouard + *** empty log message *** + + Revision 1.245 2016/09/02 07:25:01 brouard + *** empty log message *** + + Revision 1.244 2016/09/02 07:17:34 brouard + *** empty log message *** + + Revision 1.243 2016/09/02 06:45:35 brouard + *** empty log message *** + Revision 1.242 2016/08/30 15:01:20 brouard Summary: Fixing a lots @@ -928,12 +952,12 @@ typedef struct { #define ODIRSEPARATOR '\\' #endif -/* $Id: imach.c,v 1.242 2016/08/30 15:01:20 brouard Exp $ */ +/* $Id: imach.c,v 1.250 2016/09/08 16:07:27 brouard Exp $ */ /* $State: Exp $ */ #include "version.h" char version[]=__IMACH_VERSION__; char copyright[]="February 2016,INED-EUROREVES-Institut de longevite-Japan Society for the Promotion of Science (Grant-in-Aid for Scientific Research 25293121), Intel Software 2015-2018"; -char fullversion[]="$Revision: 1.242 $ $Date: 2016/08/30 15:01:20 $"; +char fullversion[]="$Revision: 1.250 $ $Date: 2016/09/08 16:07:27 $"; char strstart[80]; char optionfilext[10], optionfilefiname[FILENAMELENGTH]; int erreur=0, nberr=0, nbwarn=0; /* Error number, number of errors number of warnings */ @@ -2222,7 +2246,8 @@ void powell(double p[], double **xi, int /* printf("\n"); */ /* fprintf(ficlog,"\n"); */ } - if (2.0*fabs(fp-(*fret)) <= ftol*(fabs(fp)+fabs(*fret))) { /* Did we reach enough precision? */ + /* if (2.0*fabs(fp-(*fret)) <= ftol*(fabs(fp)+fabs(*fret))) { /\* Did we reach enough precision? *\/ */ + if (2.0*fabs(fp-(*fret)) <= ftol) { /* Did we reach enough precision? */ /* We could compare with a chi^2. chisquare(0.95,ddl=1)=3.84 */ /* By adding age*age in a model, the new -2LL should be lower and the difference follows a */ /* a chisquare statistics with 1 degree. To be significant at the 95% level, it should have */ @@ -2571,6 +2596,7 @@ Earliest age to start was %d-%d=%d, ncvl /* If we start from prlim again, prlim tends to a constant matrix */ int i, ii,j,k; + int first=0; double *min, *max, *meandiff, maxmax,sumnew=0.; /* double **matprod2(); */ /* test */ double **out, cov[NCOVMAX+1], **bmij(); @@ -2696,7 +2722,12 @@ Earliest age to start was %d-%d=%d, ncvl } } /* age loop */ /* After some age loop it doesn't converge */ - printf("Warning: the back stable prevalence at age %d did not converge with the required precision (%g > ftolpl=%g) within %.0f years. Try to lower 'ftolpl'. \n\ + if(first){ + first=1; + printf("Warning: the back stable prevalence at age %d did not converge with the required precision (%g > ftolpl=%g) within %.0f years. Try to lower 'ftolpl'. Others in log file only...\n\ +Oldest age to start was %d-%d=%d, ncvloop=%d, ncvyear=%d\n", (int)age, maxmax, ftolpl, delaymax, (int)age, (int)delaymax, (int)agefin, ncvloop, *ncvyear); + } + fprintf(ficlog,"Warning: the back stable prevalence at age %d did not converge with the required precision (%g > ftolpl=%g) within %.0f years. Try to lower 'ftolpl'. \n\ Oldest age to start was %d-%d=%d, ncvloop=%d, ncvyear=%d\n", (int)age, maxmax, ftolpl, delaymax, (int)age, (int)delaymax, (int)agefin, ncvloop, *ncvyear); /* Try to lower 'ftol', for example from 1.e-8 to 6.e-9.\n", ftolpl, (int)age, (int)delaymax, (int)agefin, ncvloop, (int)age-(int)agefin); */ free_vector(min,1,nlstate); @@ -3213,7 +3244,7 @@ double func( double *x) Then computes with function pmij which return a matrix p[i][j] giving the elementary probability to be observed in j being in i according to the model. */ - ioffset=2+nagesqr+cptcovage; + ioffset=2+nagesqr ; /* Fixed */ for (k=1; k<=ncovf;k++){ /* Simple and product fixed covariates without age* products */ cov[ioffset+TvarFind[k]]=covar[Tvar[TvarFind[k]]][i];/* V5+V4+V3+V4*V3+V5*age+V2+V1*V2+V1*age+V1, only V1 is fixed (k=6)*/ @@ -3532,7 +3563,8 @@ double funcone( double *x) for(k=1; k<=nlstate; k++) ll[k]=0.; ioffset=0; for (i=1,ipmx=0, sw=0.; i<=imx; i++){ - ioffset=2+nagesqr+cptcovage; + /* ioffset=2+nagesqr+cptcovage; */ + ioffset=2+nagesqr; /* Fixed */ /* for (k=1; k<=cptcovn;k++) cov[2+nagesqr+k]=covar[Tvar[k]][i]; */ /* for (k=1; k<=ncoveff;k++){ /\* Simple and product fixed Dummy covariates without age* products *\/ */ @@ -3582,10 +3614,11 @@ double funcone( double *x) agebegin=agev[mw[mi][i]][i]; /* Age at beginning of effective wave */ ageend=agev[mw[mi][i]][i] + (dh[mi][i])*stepm/YEARM; /* Age at end of effective wave and at the end of transition */ for(d=0; d=10 || firstime ==1){ - printf("Warning: directions %d-%d, you are not estimating the Hessian at the exact maximum likelihood; you may increase ftol=%.2e\n",thetai,thetaj, ftol); - fprintf(ficlog,"Warning: directions %d-%d, you are not estimating the Hessian at the exact maximum likelihood; you may increase ftol=%.2e\n",thetai,thetaj, ftol); + printf("Warning: directions %d-%d, you are not estimating the Hessian at the exact maximum likelihood; you could increase ftol=%.2e\n",thetai,thetaj, ftol); + fprintf(ficlog,"Warning: directions %d-%d, you are not estimating the Hessian at the exact maximum likelihood; you could increase ftol=%.2e\n",thetai,thetaj, ftol); printf("%d %d k=%d, k1=%.12e k2=%.12e k3=%.12e k4=%.12e delti*k=%.12e deltj*k=%.12e, xi-de*k=%.12e xj-de*k=%.12e res=%.12e k1234=%.12e,k1-2=%.12e,k3-4=%.12e\n",thetai,thetaj,k,k1,k2,k3,k4,delti[thetai]/k,delti[thetaj]/k,x[thetai]-delti[thetai]/k,x[thetaj]-delti[thetaj]/k, res,k1-k2-k3+k4,k1-k2,k3-k4); fprintf(ficlog,"%d %d k=%d, k1=%.12e k2=%.12e k3=%.12e k4=%.12e delti*k=%.12e deltj*k=%.12e, xi-de*k=%.12e xj-de*k=%.12e res=%.12e k1234=%.12e,k1-2=%.12e,k3-4=%.12e\n",thetai,thetaj,k,k1,k2,k3,k4,delti[thetai]/k,delti[thetaj]/k,x[thetai]-delti[thetai]/k,x[thetaj]-delti[thetaj]/k, res,k1-k2-k3+k4,k1-k2,k3-k4); } @@ -4153,12 +4186,12 @@ void pstamp(FILE *fichier) } /************ Frequencies ********************/ -void freqsummary(char fileres[], int iagemin, int iagemax, int **s, double **agev, int nlstate, int imx, \ +void freqsummary(char fileres[], double p[], int iagemin, int iagemax, int **s, double **agev, int nlstate, int imx, \ int *Tvaraff, int *invalidvarcomb, int **nbcode, int *ncodemax,double **mint,double **anint, char strstart[], \ int firstpass, int lastpass, int stepm, int weightopt, char model[]) -{ /* Some frequencies */ +{ /* Some frequencies as well as proposing some starting values */ - int i, m, jk, j1, bool, z1,j, k, iv; + int i, m, jk, j1, bool, z1,j, k, iv, jj=0; int iind=0, iage=0; int mi; /* Effective wave */ int first; @@ -4222,7 +4255,6 @@ Title=%s
Datafile=%s Firstpass=%d La j=cptcoveff; /* Only dummy covariates of the model */ if (cptcovn<1) {j=1;ncodemax[1]=1;} - first=1; /* Detects if a combination j1 is empty: for a multinomial variable like 3 education levels: reference=low_education V1=0,V2=0 @@ -4230,7 +4262,11 @@ Title=%s
Datafile=%s Firstpass=%d La high_educ V1=0 V2=1 Then V1=1 and V2=1 is a noisy combination that we want to exclude for the list 2**cptcoveff */ - + dateintsum=0; + k2cpt=0; + + for (j = 0; j <= cptcoveff; j+=cptcoveff){ + first=1; for (j1 = 1; j1 <= (int) pow(2,j); j1++){ /* Loop on covariates combination in order of model, excluding quantitatives V4=0, V3=0 for example, fixed or varying covariates */ posproptt=0.; /*printf("cptcoveff=%d Tvaraff=%d", cptcoveff,Tvaraff[1]); @@ -4253,25 +4289,27 @@ Title=%s
Datafile=%s Firstpass=%d La /* } */ /* } */ - dateintsum=0; - k2cpt=0; + /* dateintsum=0; */ + /* k2cpt=0; */ + /* For that combination of covariate j1, we count and print the frequencies in one pass */ for (iind=1; iind<=imx; iind++) { /* For each individual iind */ bool=1; + if(j !=0){ if(anyvaryingduminmodel==0){ /* If All fixed covariates */ if (cptcoveff >0) { /* Filter is here: Must be looked at for model=V1+V2+V3+V4 */ /* for (z1=1; z1<= nqfveff; z1++) { */ /* meanq[z1]+=coqvar[Tvar[z1]][iind]; /\* Computes mean of quantitative with selected filter *\/ */ /* } */ - for (z1=1; z1<=cptcoveff; z1++) { + for (z1=1; z1<=cptcoveff; z1++) { /* loops on covariates in the model */ /* if(Tvaraff[z1] ==-20){ */ /* /\* sumnew+=cotvar[mw[mi][iind]][z1][iind]; *\/ */ /* }else if(Tvaraff[z1] ==-10){ */ /* /\* sumnew+=coqvar[z1][iind]; *\/ */ /* }else */ - if (covar[Tvaraff[z1]][iind]!= nbcode[Tvaraff[z1]][codtabm(j1,z1)]){ - /* Tests if this individual iind responded to j1 (V4=1 V3=0) */ - bool=0; + if (covar[Tvaraff[z1]][iind]!= nbcode[Tvaraff[z1]][codtabm(j1,z1)]){ /* for combination j1 of covariates */ + /* Tests if this individual iind responded to combination j1 (V4=1 V3=0) */ + bool=0; /* bool should be equal to 1 to be selected, one covariate value failed */ /* printf("bool=%d i=%d, z1=%d, Tvaraff[%d]=%d, covar[Tvarff][%d]=%2f, codtabm(%d,%d)=%d, nbcode[Tvaraff][codtabm(%d,%d)=%d, j1=%d\n", bool,i,z1, z1, Tvaraff[z1],i,covar[Tvaraff[z1]][i],j1,z1,codtabm(j1,z1), j1,z1,nbcode[Tvaraff[z1]][codtabm(j1,z1)],j1);*/ @@ -4280,16 +4318,20 @@ Title=%s
Datafile=%s Firstpass=%d La } /* end z1 */ } /* cptcovn > 0 */ } /* end any */ + }/* end j==0 */ if (bool==1){ /* We selected an individual iind satisfying combination j1 or all fixed */ /* for(m=firstpass; m<=lastpass; m++){ */ for(mi=1; miDatafile=%s Firstpass=%d La } } }/* Some are varying covariates, we tried to speed up if all fixed covariates in the model, avoiding waves loop */ + } /* end j==0 */ /* bool =0 we keep that guy which corresponds to the combination of dummy values */ if(bool==1){ /* dh[m][iind] or dh[mw[mi][iind]][iind] is the delay between two effective (mi) waves m=mw[mi][iind] @@ -4316,16 +4359,20 @@ Title=%s
Datafile=%s Firstpass=%d La if(s[m][iind]==-1) printf(" num=%ld m=%d, iind=%d s1=%d s2=%d agev at m=%d agebegin=%.2f ageend=%.2f, agemed=%d\n", num[iind], m, iind,s[m][iind],s[m+1][iind], (int)agev[m][iind],agebegin, ageend, (int)((agebegin+ageend)/2.)); freq[s[m][iind]][s[m+1][iind]][(int)agev[m][iind]] += weight[iind]; /* At age of beginning of transition, where status is known */ + /* if((int)agev[m][iind] == 55) */ + /* printf("j=%d, j1=%d Age %d, iind=%d, num=%09ld m=%d\n",j,j1,(int)agev[m][iind],iind, num[iind],m); */ /* freq[s[m][iind]][s[m+1][iind]][(int)((agebegin+ageend)/2.)] += weight[iind]; */ freq[s[m][iind]][s[m+1][iind]][iagemax+3] += weight[iind]; /* Total is in iagemax+3 *//* At age of beginning of transition, where status is known */ } } /* end if between passes */ - if ((agev[m][iind]>1) && (agev[m][iind]< (iagemax+3)) && (anint[m][iind]!=9999) && (mint[m][iind]!=99)) { - dateintsum=dateintsum+k2; + if ((agev[m][iind]>1) && (agev[m][iind]< (iagemax+3)) && (anint[m][iind]!=9999) && (mint[m][iind]!=99) && (j==0)) { + dateintsum=dateintsum+k2; /* on all covariates ?*/ k2cpt++; /* printf("iind=%ld dateintmean = %lf dateintsum=%lf k2cpt=%lf k2=%lf\n",iind, dateintsum/k2cpt, dateintsum,k2cpt, k2); */ } - } /* end bool 2 */ + }else{ + bool=1; + }/* end bool 2 */ } /* end m */ } /* end bool */ } /* end iind = 1 to imx */ @@ -4335,7 +4382,7 @@ Title=%s
Datafile=%s Firstpass=%d La /* fprintf(ficresp, "#Count between %.lf/%.lf/%.lf and %.lf/%.lf/%.lf\n",jprev1, mprev1,anprev1,jprev2, mprev2,anprev2);*/ pstamp(ficresp); - if (cptcoveff>0){ + if (cptcoveff>0 && j!=0){ fprintf(ficresp, "\n#********** Variable "); fprintf(ficresphtm, "\n

********** Variable "); fprintf(ficresphtmfr, "\n

********** Variable "); @@ -4501,6 +4548,45 @@ Title=%s
Datafile=%s Firstpass=%d La } fprintf(ficresphtmfr,"\n"); } /* end selected combination of covariate j1 */ + if(j==0){ /* We can estimate starting values from the occurences in each case */ + printf("#Freqsummary\n"); + fprintf(ficlog,"\n"); + for(i=1,jk=1; i <=nlstate; i++){ + for(k=1; k <=(nlstate+ndeath); k++){ + if (k != i) { + printf("%d%d ",i,k); + fprintf(ficlog,"%d%d ",i,k); + for(jj=1; jj <=ncovmodel; jj++){ + if(jj==1){ + printf("%12.7f ln(%12.1f/%12.1f)= %12.7f ",p[jk],freq[i][k][iagemax+3],freq[i][i][iagemax+3], log(freq[i][k][iagemax+3]/freq[i][i][iagemax+3])); + fprintf(ficlog,"%12.7f ln(%12.1f/%12.1f)= %12.7f ",p[jk],freq[i][k][iagemax+3],freq[i][i][iagemax+3], log(freq[i][k][iagemax+3]/freq[i][i][iagemax+3])); + } + /* printf("%12.7f )", param[i][jj][k]); */ + /* fprintf(ficlog,"%12.7f )", param[i][jj][k]); */ + jk++; + } + printf("\n"); + fprintf(ficlog,"\n"); + } + } + } + printf("#Freqsummary\n"); + fprintf(ficlog,"\n"); + for(jk=-1; jk <=nlstate+ndeath; jk++){ + for(m=-1; m <=nlstate+ndeath; m++){ + /* param[i]|j][k]= freq[jk][m][iagemax+3] */ + printf(" %d%d=%.0f",jk,m,freq[jk][m][iagemax+3]); + fprintf(ficlog," %d%d=%.0f",jk,m,freq[jk][m][iagemax+3]); + /* if(freq[jk][m][iage] !=0 ) { /\* minimizing output *\/ */ + /* printf(" %d%d=%.0f",jk,m,freq[jk][m][iagemax+3]); */ + /* fprintf(ficlog," %d%d=%.0f",jk,m,freq[jk][m][iagemax+3]); */ + /* } */ + } + } /* end loop jk */ + printf("\n"); + fprintf(ficlog,"\n"); + } /* if j=0 */ + } /* end j */ dateintmean=dateintsum/k2cpt; fclose(ficresp); @@ -6390,7 +6476,7 @@ void printinggnuplot(char fileresu[], ch if(TKresult[nres]!= k1) continue; /* We are interested in selected combination by the resultline */ - printf("\n# 1st: Period (stable) prevalence with CI: 'VPL_' files and live state =%d ", cpt); + /* printf("\n# 1st: Period (stable) prevalence with CI: 'VPL_' files and live state =%d ", cpt); */ fprintf(ficgp,"\n# 1st: Period (stable) prevalence with CI: 'VPL_' files and live state =%d ", cpt); for (k=1; k<=cptcoveff; k++){ /* For each covariate k get corresponding value lv for combination k1 */ lv= decodtabm(k1,k,cptcoveff); /* Should be the value of the covariate corresponding to k1 combination */ @@ -6399,14 +6485,14 @@ void printinggnuplot(char fileresu[], ch /* decodtabm(13,3,4)= 2 because h=13 k= 1 1 (2) 2 */ vlv= nbcode[Tvaraff[k]][lv]; /* vlv is the value of the covariate lv, 0 or 1 */ /* For each combination of covariate k1 (V1=1, V3=0), we printed the current covariate k and its value vlv */ - printf(" V%d=%d ",Tvaraff[k],vlv); + /* printf(" V%d=%d ",Tvaraff[k],vlv); */ fprintf(ficgp," V%d=%d ",Tvaraff[k],vlv); } for (k4=1; k4<= nsq; k4++){ /* For each selected (single) quantitative value */ - printf(" V%d=%f ",Tvqresult[nres][k4],Tqresult[nres][k4]); + /* printf(" V%d=%f ",Tvqresult[nres][k4],Tqresult[nres][k4]); */ fprintf(ficgp," V%d=%f ",Tvqresult[nres][k4],Tqresult[nres][k4]); } - printf("\n#\n"); + /* printf("\n#\n"); */ fprintf(ficgp,"\n#\n"); if(invalidvarcomb[k1]){ fprintf(ficgp,"#Combination (%d) ignored because no cases \n",k1); @@ -6436,7 +6522,7 @@ void printinggnuplot(char fileresu[], ch /* fprintf(ficgp,",\"%s\" every :::%d::%d u 1:($%d) t\"Backward stable prevalence\" w l lt 3",subdirf2(fileresu,"PLB_"),k1-1,k1-1,1+cpt); */ fprintf(ficgp,",\"%s\" u 1:((",subdirf2(fileresu,"PLB_")); /* Age is in 1, nres in 2 to be fixed */ if(cptcoveff ==0){ - fprintf(ficgp,"$%d)) t 'Backward prevalence in state %d' with line ", 2+(cpt-1), cpt ); + fprintf(ficgp,"$%d)) t 'Backward prevalence in state %d' with line lt 3", 2+(cpt-1), cpt ); }else{ kl=0; for (k=1; k<=cptcoveff; k++){ /* For each combination of covariate */ @@ -6451,7 +6537,7 @@ void printinggnuplot(char fileresu[], ch /*6+1+(i-1)+(nlstate+1)*nlstate; 6+1+(1-1) +(2+1)*2=13 */ /* '' u 6:(($1==1 && $2==0 && $3==2 && $4==0)? $9/(1.-$15) : 1/0):($5==2000? 3:2) t 'p.1' with line lc variable*/ if(k==cptcoveff){ - fprintf(ficgp,"$%d==%d && $%d==%d)? $%d : 1/0) t 'Backward prevalence in state %d' ",kl+1, Tvaraff[k],kl+1+1,nbcode[Tvaraff[k]][lv], \ + fprintf(ficgp,"$%d==%d && $%d==%d)? $%d : 1/0) t 'Backward prevalence in state %d' w l lt 3",kl+1, Tvaraff[k],kl+1+1,nbcode[Tvaraff[k]][lv], \ 2+cptcoveff*2+(cpt-1), cpt ); /* 4 or 6 ?*/ }else{ fprintf(ficgp,"$%d==%d && $%d==%d && ",kl+1, Tvaraff[k],kl+1+1,nbcode[Tvaraff[k]][lv]); @@ -6521,7 +6607,7 @@ void printinggnuplot(char fileresu[], ch else fprintf(ficgp,"\" t\"\" w l lt 0,\\\n"); } /* state */ } /* vpopbased */ - fprintf(ficgp,"\nset out;set out \"%s_%d.svg\"; replot; set out; \n",subdirf2(optionfilefiname,"E_"),k1); /* Buggy gnuplot */ + fprintf(ficgp,"\nset out;set out \"%s_%d-%d.svg\"; replot; set out; \n",subdirf2(optionfilefiname,"E_"),k1,nres); /* Buggy gnuplot */ } /* end nres */ } /* k1 end 2 eme*/ @@ -8819,7 +8905,7 @@ Dummy[k] 0=dummy (0 1), 1 quantitative ( } int calandcheckages(int imx, int maxwav, double *agemin, double *agemax, int *nberr, int *nbwarn ) -{ +{/* Check ages at death */ int i, m; int firstone=0; @@ -9863,6 +9949,12 @@ int main(int argc, char *argv[]) delti=delti3[1][1]; /*delti=vector(1,npar); *//* Scale of each paramater (output from hesscov)*/ if(mle==-1){ /* Print a wizard for help writing covariance matrix */ +/* We could also provide initial parameters values giving by simple logistic regression + * only one way, that is without matrix product. We will have nlstate maximizations */ + /* for(i=1;iDatafile=%s Firstpass=%d La /* Calculates basic frequencies. Computes observed prevalence at single age and for any valid combination of covariates and prints on file fileres'p'. */ - freqsummary(fileres, agemin, agemax, s, agev, nlstate, imx, Tvaraff, invalidvarcomb, nbcode, ncodemax,mint,anint,strstart, \ + freqsummary(fileres, p, agemin, agemax, s, agev, nlstate, imx, Tvaraff, invalidvarcomb, nbcode, ncodemax,mint,anint,strstart, \ firstpass, lastpass, stepm, weightopt, model); fprintf(fichtm,"\n"); @@ -10397,9 +10494,9 @@ Interval (in months) between two waves: /* For mortality only */ if (mle==-3){ ximort=matrix(1,NDIM,1,NDIM); - for(i=1;i<=NDIM;i++) - for(j=1;j<=NDIM;j++) - ximort[i][j]=0.; + for(i=1;i<=NDIM;i++) + for(j=1;j<=NDIM;j++) + ximort[i][j]=0.; /* ximort=gsl_matrix_alloc(1,NDIM,1,NDIM); */ cens=ivector(1,n); ageexmed=vector(1,n); @@ -10635,6 +10732,10 @@ Please run with mle=-1 to get a correct printf("\n"); if(mle>=1){ /* Could be 1 or 2, Real Maximization */ /* mlikeli uses func not funcone */ + /* for(i=1;i