--- imach/src/imach.c 2010/04/26 20:30:53 1.136 +++ imach/src/imach.c 2010/04/29 18:11:38 1.137 @@ -1,6 +1,10 @@ -/* $Id: imach.c,v 1.136 2010/04/26 20:30:53 brouard Exp $ +/* $Id: imach.c,v 1.137 2010/04/29 18:11:38 brouard Exp $ $State: Exp $ $Log: imach.c,v $ + Revision 1.137 2010/04/29 18:11:38 brouard + (Module): Checking covariates for more complex models + than V1+V2. A lot of change to be done. Unstable. + Revision 1.136 2010/04/26 20:30:53 brouard (Module): merging some libgsl code. Fixing computation of likelione (using inter/intrapolation if mle = 0) in order to @@ -420,11 +424,11 @@ extern int errno; #define ODIRSEPARATOR '/' #endif -/* $Id: imach.c,v 1.136 2010/04/26 20:30:53 brouard Exp $ */ +/* $Id: imach.c,v 1.137 2010/04/29 18:11:38 brouard Exp $ */ /* $State: Exp $ */ char version[]="Imach version 0.98m, April 2010, INED-EUROREVES-Institut de longevite "; -char fullversion[]="$Revision: 1.136 $ $Date: 2010/04/26 20:30:53 $"; +char fullversion[]="$Revision: 1.137 $ $Date: 2010/04/29 18:11:38 $"; char strstart[80]; char optionfilext[10], optionfilefiname[FILENAMELENGTH]; int erreur=0, nberr=0, nbwarn=0; /* Error number, number of errors number of warnings */ @@ -622,11 +626,11 @@ void replace_back_to_slash(char *s, char } char *trimbb(char *out, char *in) -{ /* Trim multiple blanks in line */ +{ /* Trim multiple blanks in line but keeps first blanks if line starts with blanks */ char *s; s=out; while (*in != '\0'){ - while( *in == ' ' && *(in+1) == ' ' && *(in+1) != '\0'){ + while( *in == ' ' && *(in+1) == ' '){ /* && *(in+1) != '\0'){*/ in++; } *out++ = *in++; @@ -635,6 +639,35 @@ char *trimbb(char *out, char *in) return s; } +char *cutv(char *blocc, char *alocc, char *in, char occ) +{ + /* cuts string in into blocc and alocc where blocc ends before last occurence of char 'occ' + and alocc starts after last occurence of char 'occ' : ex cutv(blocc,alocc,"abcdef2ghi2j",'2') + gives blocc="abcdef2ghi" and alocc="j". + If occ is not found blocc is null and alocc is equal to in. Returns alocc + */ + char *s, *t; + t=in;s=in; + while (*in != '\0'){ + while( *in == occ){ + *blocc++ = *in++; + s=in; + } + *blocc++ = *in++; + } + if (s == t) /* occ not found */ + *(blocc-(in-s))='\0'; + else + *(blocc-(in-s)-1)='\0'; + in=s; + while ( *in != '\0'){ + *alocc++ = *in++; + } + + *alocc='\0'; + return s; +} + int nbocc(char *s, char occ) { int i,j=0; @@ -647,27 +680,27 @@ int nbocc(char *s, char occ) return j; } -void cutv(char *u,char *v, char*t, char occ) -{ - /* cuts string t into u and v where u ends before first occurence of char 'occ' - and v starts after first occurence of char 'occ' : ex cutv(u,v,"abcdef2ghi2j",'2') - gives u="abcedf" and v="ghi2j" */ - int i,lg,j,p=0; - i=0; - for(j=0; j<=strlen(t)-1; j++) { - if((t[j]!= occ) && (t[j+1]== occ)) p=j+1; - } +/* void cutv(char *u,char *v, char*t, char occ) */ +/* { */ +/* /\* cuts string t into u and v where u ends before last occurence of char 'occ' */ +/* and v starts after last occurence of char 'occ' : ex cutv(u,v,"abcdef2ghi2j",'2') */ +/* gives u="abcdef2ghi" and v="j" *\/ */ +/* int i,lg,j,p=0; */ +/* i=0; */ +/* lg=strlen(t); */ +/* for(j=0; j<=lg-1; j++) { */ +/* if((t[j]!= occ) && (t[j+1]== occ)) p=j+1; */ +/* } */ - lg=strlen(t); - for(j=0; j=(p+1))(v[j-p-1] = t[j]); - } -} +/* for(j=0; j<= lg; j++) { */ +/* if (j>=(p+1))(v[j-p-1] = t[j]); */ +/* } */ +/* } */ /********************** nrerror ********************/ @@ -1434,6 +1467,9 @@ double func( double *x) if(mle==1){ for (i=1,ipmx=0, sw=0.; i<=imx; i++){ for (k=1; k<=cptcovn;k++) cov[2+k]=covar[Tvar[k]][i]; + /* In model V2+V1*V4+age*V3+V3*V2 Tvar[1] is V2, Tvar[2=V1*V4] + is 6, Tvar[3=age*V3] should not been computed because of age Tvar[4=V3*V2] + has been calculated etc */ for(mi=1; mi<= wav[i]-1; mi++){ for (ii=1;ii<=nlstate+ndeath;ii++) for (j=1;j<=nlstate+ndeath;j++){ @@ -1444,7 +1480,7 @@ double func( double *x) newm=savm; cov[2]=agev[mw[mi][i]][i]+d*stepm/YEARM; for (kk=1; kk<=cptcovage;kk++) { - cov[Tage[kk]+2]=covar[Tvar[Tage[kk]]][i]*cov[2]; + cov[Tage[kk]+2]=covar[Tvar[Tage[kk]]][i]*cov[2]; /* Tage[kk] gives the data-covariate associated with age */ } out=matprod2(newm,oldm,1,nlstate+ndeath,1,nlstate+ndeath, 1,nlstate+ndeath,pmij(pmmij,cov,ncovmodel,x,nlstate)); @@ -2503,8 +2539,8 @@ void tricode(int *Tvar, int **nbcode, in (should be 0 or 1 now) Tvar[j]. If V=sex and male is coded 0 and female is 1, then modmaxcovj=1.*/ } - - for (i=0; i<=modmaxcovj; i++) { /* i=-1 ? 0 and 1*/ + /* Ndum[0] = frequency of 0 for model-covariate j, Ndum[1] frequency of 1 etc. */ + for (i=0; i<=modmaxcovj; i++) { /* i=-1 ? 0 and 1*//* For each modality of model-cov j */ if( Ndum[i] != 0 ) ncodemax[j]++; /* Number of modalities of the j th covariate. In fact @@ -2515,7 +2551,7 @@ void tricode(int *Tvar, int **nbcode, in /* j is a covariate, n=Tvar[j] of Vn; Fills nbcode */ ij=1; for (i=1; i<=ncodemax[j]; i++) { /* i= 1 to 2 for dichotomous */ - for (k=0; k<= maxncov; k++) { /* k=-1 ? NCOVMAX*/ + for (k=0; k<= modmaxcovj; k++) { /* k=-1 ? NCOVMAX*//* maxncov or modmaxcovj */ if (Ndum[k] != 0) { /* If at least one individual responded to this modality k */ nbcode[Tvar[j]][ij]=k; /* stores the modality in an array nbcode. k is a modality. If we have model=V1+V1*sex @@ -2523,20 +2559,20 @@ void tricode(int *Tvar, int **nbcode, in ij++; } if (ij > ncodemax[j]) break; - } - } - } - - for (k=0; k< maxncov; k++) Ndum[k]=0; - - for (i=1; i<=ncovmodel-2; i++) { /* -2, cste and age */ + } /* end of loop on */ + } /* end of loop on modality */ + } /* end of loop on model-covariate j. nbcode[Tvarj][1]=0 and nbcode[Tvarj][2]=1 sets the value of covariate j*/ + + for (k=0; k< maxncov; k++) Ndum[k]=0; + + for (i=1; i<=ncovmodel-2; i++) { /* -2, cste and age */ /* Listing of all covariables in statement model to see if some covariates appear twice. For example, V1 appears twice in V1+V1*V2.*/ ij=Tvar[i]; /* Tvar might be -1 if status was unknown */ Ndum[ij]++; } ij=1; - for (i=1; i<= maxncov; i++) { + for (i=1; i<= maxncov; i++) { /* modmaxcovj is unknown here. Only Ndum[2(V2),3(age*V3), 5(V3*V2) 6(V1*V4) */ if((Ndum[i]!=0) && (i<=ncovcol)){ Tvaraff[ij]=i; /*For printing */ ij++; @@ -4533,7 +4569,7 @@ int readdata(char datafile[], int firsto for (j=maxwav;j>=1;j--){ - cutv(stra, strb,line,' '); + cutv(stra, strb, line, ' '); if(strb[0]=='.') { /* Missing status */ lval=-1; }else{ @@ -4693,71 +4729,76 @@ int decodemodel ( char model[], int last cptcovprod=j1; /*Number of products V1*V2 +v3*age = 2 */ strcpy(modelsav,model); - if ((strcmp(model,"age")==0) || (strcmp(model,"age*age")==0)){ - printf("Error. Non available option model=%s ",model); - fprintf(ficlog,"Error. Non available option model=%s ",model);fflush(ficlog); + if (strstr(model,"AGE") !=0){ + printf("Error. AGE must be in lower case 'age' model=%s ",model); + fprintf(ficlog,"Error. AGE must be in lower case model=%s ",model);fflush(ficlog); return 1; } /* This loop fills the array Tvar from the string 'model'.*/ /* j is the number of + signs in the model V1+V2+V3 j=2 i=3 to 1 */ - /* modelsav=V3*age+V2+V1+V4 strb=V3*age stra=V2+V1+V4 - i=1 Tvar[1]=3 Tage[1]=1 - i=2 Tvar[2]=2 - i=3 Tvar[3]=1 - i=4 Tvar[4]= 4 - i=5 Tvar[5] - for (k=1; k<=cptcovn;k++) { - cov[2+k]=nbcode[Tvar[k]][codtab[ij][Tvar[k]]]; - */ - for(k=1; k<=(j+1);k++){ - cutv(strb,stra,modelsav,'+'); /* keeps in strb after the first '+' - modelsav=V3*age+V2+V1+V4 strb=V3*age stra=V2+V1+V4 + /* modelsav=V2+V1+V4+age*V3 strb=age*V3 stra=V2+V1+V4 */ + /* k=4 (age*V3) Tvar[k=4]= 3 (from V3) Tage[cptcovage=1]=4 */ + /* k=3 V4 Tvar[k=3]= 4 (from V4) */ + /* k=2 V1 Tvar[k=2]= 1 (from V1) */ + /* k=1 Tvar[1]=2 (from V2) */ + /* k=5 Tvar[5] */ + /* for (k=1; k<=cptcovn;k++) { */ + /* cov[2+k]=nbcode[Tvar[k]][codtab[ij][Tvar[k]]]; */ + /* } */ + /* for (k=1; k<=cptcovage;k++) cov[2+Tage[k]]=cov[2+Tage[k]]*cov[2]; */ + for(k=cptcovn; k>=1;k--){ + cutv(stra,strb,modelsav,'+'); /* keeps in strb after the first '+' + modelsav==V2+V1+V4+V3*age strb=V3*age stra=V2+V1+V4 */ - /* if (nbocc(modelsav,'+')==0) strcpy(strb,modelsav);*/ /* and analyzes it */ + if (nbocc(modelsav,'+')==0) strcpy(strb,modelsav); /* and analyzes it */ /* printf("i=%d a=%s b=%s sav=%s\n",i, stra,strb,modelsav);*/ /*scanf("%d",i);*/ - if (strchr(strb,'*')) { /* Model includes a product V3*age+V2+V1+V4 strb=V3*age */ + if (strchr(strb,'*')) { /* Model includes a product V2+V1+V4+V3*age strb=V3*age */ cutv(strd,strc,strb,'*'); /* strd*strc Vm*Vn: strb=V3*age strc=age strd=V3 ; V3*V2 strc=V2, strd=V3 */ if (strcmp(strc,"age")==0) { /* Vn*age */ cptcovprod--; cutv(strb,stre,strd,'V'); /* stre="V3" */ - Tvar[k]=atoi(stre); /* V1+V3*age+V2 Tvar[2]=3, and Tvar[3]=2 */ + Tvar[k]=atoi(stre); /* V2+V1+V4+V3*age Tvar[4]=2 ; V1+V2*age Tvar[2]=2 */ cptcovage++; /* Sums the number of covariates which include age as a product */ - Tage[cptcovage]=k; /* Tage[1] =2 */ + Tage[cptcovage]=k; /* Tage[1] = 4 */ /*printf("stre=%s ", stre);*/ - } - else if (strcmp(strd,"age")==0) { /* or age*Vn */ + } else if (strcmp(strd,"age")==0) { /* or age*Vn */ cptcovprod--; cutv(strb,stre,strc,'V'); Tvar[k]=atoi(stre); cptcovage++; Tage[cptcovage]=k; - } - else { /* Age is not in the model V1+V3*V2+V2 strb=V3*V2*/ + } else { /* Age is not in the model product V2+V1+V1*V4+V3*age+V3*V2 strb=V3*V2*/ + /* loops on k1=1 (V3*V2) and k1=2 V4*V3 */ cutv(strb,stre,strc,'V'); /* strc= Vn, stre is n; strb=V3*V2 stre=3 strc=*/ - Tvar[k]=ncovcol+k1; /* find 'n' in Vn and stores in Tvar. - If already ncovcol=2 and model=V2*V1 Tvar[1]=2+1 and Tvar[2]=2+2 etc */ + Tvar[k]=ncovcol+k1; /* For model-covariate k tells which data-covariate to use but + because this model-covariate is a construction we invent a new column + ncovcol + k1 + If already ncovcol=4 and model=V2+V1+V1*V4+age*V3+V3*V2 + Tvar[3=V1*V4]=4+1 Tvar[5=V3*V2]=4 + 2= 6, etc */ cutv(strb,strc,strd,'V'); /* strd was Vm, strc is m */ - Tprod[k1]=k; /* Tprod[1] */ - Tvard[k1][1]=atoi(strc); /* m*/ - Tvard[k1][2]=atoi(stre); /* n */ - Tvar[cptcovn+k2]=Tvard[k1][1]; - Tvar[cptcovn+k2+1]=Tvard[k1][2]; - for (i=1; i<=lastobs;i++) /* Computes the new covariate which is a product of covar[n][i]* covar[m][i] - and is stored at ncovol+k1 */ + Tprod[k1]=k; /* Tprod[1]=3(=V1*V4) for V2+V1+V1*V4+age*V3+V3*V2 */ + Tvard[k1][1]=atoi(strc); /* m 1 for V1*/ + Tvard[k1][2]=atoi(stre); /* n 4 for V4*/ + Tvar[cptcovn+k2]=Tvard[k1][1]; /* Tvar[(cptcovn=4+k2=1)=5]= 1 (V1) */ + Tvar[cptcovn+k2+1]=Tvard[k1][2]; /* Tvar[(cptcovn=4+(k2=1)+1)=6]= 4 (V4) */ + for (i=1; i<=lastobs;i++){ + /* Computes the new covariate which is a product of + covar[n][i]* covar[m][i] and stores it at ncovol+k1 */ covar[ncovcol+k1][i]=covar[atoi(stre)][i]*covar[atoi(strc)][i]; + } k1++; k2=k2+2; - } - } + } /* End age is not in the model */ + } /* End if model includes a product */ else { /* no more sum */ /*printf("d=%s c=%s b=%s\n", strd,strc,strb);*/ /* scanf("%d",i);*/ cutv(strd,strc,strb,'V'); - Tvar[i]=atoi(strc); + Tvar[k]=atoi(strc); } - strcpy(modelsav,stra); /* modelsav=V2+V3*age+V1+V4 strb=V3*age+V1+V4 */ + strcpy(modelsav,stra); /* modelsav=V2+V1+V4 stra=V2+V1+V4 */ /*printf("a=%s b=%s sav=%s\n", stra,strb,modelsav); scanf("%d",i);*/ } /* end of loop + */ @@ -4773,7 +4814,7 @@ int decodemodel ( char model[], int last scanf("%d ",i);*/ - return (0); + return (0); /* with covar[new additional covariate if product] and Tage if age */ endread: printf("Exiting decodemodel: "); return (1); @@ -5317,25 +5358,51 @@ run imach with mle=-1 to get a correct t anint=matrix(1,maxwav,1,n); s=imatrix(1,maxwav+1,1,n); /* s[i][j] health state for wave i and individual j */ tab=ivector(1,NCOVMAX); - ncodemax=ivector(1,8); + ncodemax=ivector(1,8); /* hard coded ? */ /* Reads data from file datafile */ if (readdata(datafile, firstobs, lastobs, &imx)==1) goto end; /* Calculation of the number of parameters from char model */ - Tvar=ivector(1,NCOVMAX); /* Was 15 changed to NCOVMAX. Stores the number n of the covariates in Vm+Vn at 1 and m at 2 */ + /* modelsav=V2+V1+V4+age*V3 strb=age*V3 stra=V2+V1+V4 + k=4 (age*V3) Tvar[k=4]= 3 (from V3) Tag[cptcovage=1]=4 + k=3 V4 Tvar[k=3]= 4 (from V4) + k=2 V1 Tvar[k=2]= 1 (from V1) + k=1 Tvar[1]=2 (from V2) + */ + Tvar=ivector(1,NCOVMAX); /* Was 15 changed to NCOVMAX. */ + /* V2+V1+V4+age*V3 is a model with 4 covariates (3 plus signs). + For each model-covariate stores the data-covariate id. Tvar[1]=2, Tvar[2]=1, Tvar[3]=4, + Tvar[4=age*V3] is 3 and 'age' is recorded in Tage. + */ + /* For model-covariate k tells which data-covariate to use but + because this model-covariate is a construction we invent a new column + ncovcol + k1 + If already ncovcol=4 and model=V2+V1+V1*V4+age*V3 + Tvar[3=V1*V4]=4+1 etc */ Tprod=ivector(1,15); + /* Tprod[k1=1]=3(=V1*V4) for V2+V1+V1*V4+age*V3 + if V2+V1+V1*V4+age*V3+V3*V2 TProd[k1=2]=5 (V3*V2) + */ Tvaraff=ivector(1,15); - Tvard=imatrix(1,15,1,2); - Tage=ivector(1,15); + Tvard=imatrix(1,15,1,2); /* For V3*V2 Tvard[k1=2][1]=3 (V3) Tvard[k1=2][2]=2(V2) */ + Tage=ivector(1,15); /* Gives the covariate id of covariates associated with age: V2 + V1 + age*V4 + V3*age + 4 covariates (3 plus signs) + Tage[1=V3*age]= 4; Tage[2=age*V4] = 3 + */ if(decodemodel(model, lastobs) == 1) goto end; + if((double)(lastobs-imx)/(double)imx > 1.10){ + nbwarn++; + printf("Warning: The value of parameter lastobs=%d is big compared to the \n effective number of cases imx=%d, please adjust, \n otherwise you are allocating more memory than necessary.\n",lastobs, imx); + fprintf(ficlog,"Warning: The value of parameter lastobs=%d is big compared to the \n effective number of cases imx=%d, please adjust, \n otherwise you are allocating more memory than necessary.\n",lastobs, imx); + } /* if(mle==1){*/ - if (weightopt != 1) { /* Maximisation without weights*/ - for(i=1;i<=n;i++) weight[i]=1.0; + if (weightopt != 1) { /* Maximisation without weights. We can have weights different from 1 but want no weight*/ + for(i=1;i<=imx;i++) weight[i]=1.0; /* changed to imx */ } /*-calculation of age at interview from date of interview and age at death -*/