Annotation of imach/src/simulation.R, revision 1.2

1.2     ! brouard     1: # $Id: simulation.r,v 1.358 2023/06/14 14:57:02 brouard Exp $
        !             2: #  $State: Exp $
        !             3: #  
1.1       brouard     4: # Simulating data for IMaCh tests
                      5: # Feinuo Sun and Nicolas Brouard (INED, July 2023)
                      6: # 
                      7: install.packages("dplyr")
                      8: if(!require("tidyverse")){
                      9:     install.packages("tidyverse",repos="http://cran.r-project.org")
                     10: #    install.packages("tidyverse")
                     11:     require("tidyverse")
                     12:   }
                     13: # LIBRARIES
                     14: library(haven)
1.2     ! brouard    15: #samplesize<- 10000
        !            16: samplesize<- 50000
1.1       brouard    17: 
                     18: # Which Life table?
                     19: # Simulating a gompertz law of mortality: slope of line mu_m (about 9% increase per year)
                     20: # and a_m, modal age at death. From whose we can compute the life expectancy e_65.
                     21: 
                     22: install.packages("expint",repos="http://cran.r-project.org")
                     23: library(expint)
                     24: am <- 85
                     25: mum <- 9/100
                     26: expint(exp(mum*(65-am)))
                     27: am <- 84.8506
                     28: exp(exp(mum*(65-am)))/mum*expint(exp(mum*(65-am)))
                     29: # e(65) = 18.10904 years
                     30: e65 <- exp(exp(mum*(65-am)))/mum*expint(exp(mum*(65-am)))
                     31: 
                     32: e65 <- exp(exp(mum*(65-am)))/mum*expint(exp(mum*(65-am)))
1.2     ! brouard    33: e65
        !            34:                                         # Life table from 0
1.1       brouard    35: l <- function(a,am,mum){
                     36:     exp(-exp(mum*(a-am)))/exp(-exp(-mum*am))
                     37: }
                     38: l65 <- function(a,am,mum){
                     39:     exp(-exp(mum*(a-am)))/exp(-exp(mum*(65-am)))
                     40: }
                     41: l65(65,am,mum)
                     42: #l65<-l(65,am,mum) # 0.84
                     43: l65(100,am,mum)
                     44: 
                     45: #curve(l65(x,am,mum), from = 65, to = 100)
                     46: # Add a line:
                     47: #curve(1 - l65(x,am,mum), add = TRUE, col = "red")
                     48: 
                     49: # inverse function from l(a) find a.
                     50: linv <- function(la,am,mum){
                     51:     am+log(exp(-mum*am) -log(la))/mum
                     52: }
                     53: linv(0.001,am,mum)
                     54: linv(1,am,mum)
                     55: 
                     56: l65inv <- function(la,am,mum){
                     57:     am+log(exp(mum*(65-am)) -log(la))/mum
                     58: }
                     59: l65inv(0.001,am,mum)
                     60: l65inv(1,am,mum)
                     61: 
                     62: am
                     63: mum
                     64: l65bisinv <- function(la){
                     65:     84.8506+log(exp(0.09*(65-84.8506)) -log(la))/0.09
                     66: }
1.2     ! brouard    67: l65bisinv(0.0001)
1.1       brouard    68: l65bisinv(1)
                     69: 
                     70: set.seed(128)
1.2     ! brouard    71: zeroone<-runif(samplesize,min=0.00000001,max=1)
1.1       brouard    72: zeroone<-runif(samplesize,min=0.001,max=1)
                     73: lifelength <- lapply(zeroone,l65bisinv)
1.2     ! brouard    74: #slife<- sort(unlist(lifelength),decreasing=TRUE)
        !            75: #?sort
        !            76: #head(slife)
        !            77: #mean(unlist(lifelength)-65) #17.99482
        !            78: #head(lifelength,10)
        !            79: #class(lifelength)
1.1       brouard    80: 
                     81: #lifelength <- rnorm(n=samplesize, mean=85, sd=16)
                     82: 
                     83: set.seed(124)
                     84: # First interview 4th semester
1.2     ! brouard    85: monthinterview <- runif(samplesize, min=10,max=13)
1.1       brouard    86: st_98 <- rbinom(n=samplesize, size=1, prob=0.5)+1
                     87: # state 2
                     88: st_00 <- rbinom(n=samplesize, size=1, prob=0.2)+1
                     89: # date of birth (simulating population in 1998 age 65+), uniformly?
                     90: popage65110in1998<- runif(samplesize, min=65, max=110)
                     91: #gender
                     92: ragender <- rbinom(n=samplesize, size=1, prob=0.56)
                     93: 
1.2     ! brouard    94: r98iwmid <- 1998 + monthinterview/12
1.1       brouard    95: yrinterview1 <- floor(r98iwmid)
1.2     ! brouard    96: #monthinw1 <- floor((monthinterview - floor(monthinterview))*12)+1
        !            97: monthinw1 <- floor(monthinterview)
        !            98: 
1.1       brouard    99: 
                    100: int_98 <- paste0(monthinw1,"/",yrinterview1)
                    101: 
                    102: rabdate <- r98iwmid - popage65110in1998 
                    103: birthyr <- floor(rabdate)
                    104: monthdb <- floor((rabdate - floor(rabdate))*12)+1
                    105: brt <- paste0(monthdb,"/",birthyr)
                    106: # date of death for dropping cases
1.2     ! brouard   107: #raddate <- rabdate + lifelength
        !           108: raddate <- rabdate + unlist(lifelength)
1.1       brouard   109: dateinterview2 <- 1998 + 2 + monthinterview/12
                    110: #head(cbind(r98iwmid,dateinterview2,raddate),70)
                    111: # people whose death occured before the interview will be dropped (radnate)
                    112: radndate <- if_else(raddate < r98iwmid,  NA, raddate )
1.2     ! brouard   113: head(cbind(r98iwmid,dateinterview2,raddate,radndate,lifelength),n=5299)
1.1       brouard   114: #head(cbind(r98iwmid,dateinterview2,raddate,radndate,lifelength),70)
                    115: # in order to avoid date of death known after last wave and potential bias
                    116: # people whose death will occur after last interview will have an unkwonn (99/9999) date of death
                    117: lastinterview<- dateinterview2
1.2     ! brouard   118: radldate<- if_else(radndate > dateinterview2+1/12,  9999, radndate )
        !           119: head(cbind(r98iwmid,dateinterview2,raddate,radndate,lifelength,radldate),n=5299)
        !           120: radldate<- if_else(dateinterview2+1/12 > radndate & radndate > dateinterview2, radndate , radldate )
        !           121: head(cbind(r98iwmid,dateinterview2,raddate,radndate,lifelength,radldate),n=5299)
1.1       brouard   122: ddtyr <- if_else((!is.na(radldate) & radldate ==9999),  9999, floor(radldate))
                    123: monthdd <- if_else((!is.na(radldate) & radldate ==9999),  99,floor((radldate - floor(radldate))*12)+1)
                    124: #head(cbind(r98iwmid,dateinterview2,raddate,lifelength,radldate,ddtyr,monthdd),70)
                    125: ddt <- if_else(!is.na(radldate),paste0(monthdd,"/",ddtyr), NA)
                    126: #head(cbind(r98iwmid,dateinterview2,raddate,lifelength,radldate,ddt),70)
                    127: weight <- rep(1, samplesize)
                    128: # state 1 st_98
                    129: ageatinterview1 <- r98iwmid - rabdate
                    130: # interview 2 st 2000
                    131: # same month of interview
                    132: yrinterview2 <- floor(dateinterview2)
                    133: monthinw2 <- floor((dateinterview2 - floor(dateinterview2))*12)+1
                    134: int_00 <- paste0(monthinw2,"/",yrinterview2)
1.2     ! brouard   135: #head(cbind(r98iwmid,dateinterview2,int_00,raddate,radndate,lifelength,radldate),70)
1.1       brouard   136: ageatinterview2 <- dateinterview2 - rabdate
                    137: # state 2
                    138: st_00 <- if_else(raddate < dateinterview2, 3, st_00)
                    139: hhidpn <- seq(1,samplesize)
                    140: HRSSIMULdata <- data.frame(hhidpn,ragender, weight, brt, ddt, int_98, st_98, int_00, st_00)
                    141: head(HRSSIMULdata,70)                             
                    142: 
                    143: HRSSIMULdata <- HRSSIMULdata %>% filter(!is.na(ddt))
                    144: HRSSIMULdata <- HRSSIMULdata[,c("hhidpn","ragender", "weight", "brt", "ddt", "int_98", "st_98", "int_00", "st_00" )]
                    145: head(HRSSIMULdata,70)                             
                    146: #### export to txt file for IMaCh
                    147: write.table(HRSSIMULdata,file="HRSSIMUL.txt",col.names=F,row.names=F,quote=F)
                    148: 
                    149: 
                    150: #HRSSIMULdata<-HRSSIMULdata[,c("hhidpn","female","nhwhites","schlyrs","weight","brt","ddt","int_10","st_10","marpar_10","smoker_10","srh_10","int_12","st_12","marpar_12","smoker_12","srh_12","int_14","st_14","marpar_14","smoker_14","srh_14")]
                    151: 
                    152: ## # VARIABLE SELECTIONS
                    153: ## dt1<-dat[,c( "hhidpn",   # ID number of the respondent
                    154: ##             "ragender",  # respondent's gender 1M or 2F, mean 1.56
                    155: ##             "rabdate",   # Respondent's birth date (1890.0  to    1995.0)
                    156: ##             "raddate",   # Respondent's death date (1917.0  to   2019.0)
                    157: ##             "rahispan",  # Mexican-American and Other Hispanic are recoded to "1." 
                    158: ##             "raracem",   # Race-masked: White/Caucasian 1, Black/African American 2, Other 3, Missing . 
                    159: ##             "raeduc",    # Education: Years of
                    160: ##             "r11mstat",  # Marital status at wave 11: .J Webonterview missing, .M Other missing. Married 1
                    161: ##             "r12mstat",    #  Married spouse absent 2
                    162: ##             "r13mstat",    #  Partnered  3 Separated 4 Divorced  5
                    163: ##             "r14mstat",    #  Widowed 7, Never married 8
                    164: ##             "r11iwmid",    #  Date of interview at wave 11
                    165: ##             "r12iwmid",    #
                    166: ##             "r13iwmid",    #
                    167: ##             "r14iwmid",    #
                    168: ##             "s11ddate",    # Date of death (from wave 11) 1669
                    169: ##             "s12ddate",    # 967
                    170: ##             "s13ddate",    # 381
                    171: ##             "s14ddate",    #   8
                    172: ##             "r11cesd",    # CESD score at 11, 19400, mean 1.54
                    173: ##             "r12cesd",     #
                    174: ##             "r13cesd",     #
                    175: ##             "r14cesd",     #
                    176: ##             "r11agey_m",  # Age at mid wave 11 66.85
                    177: ##             "r11adla",    # Sum of ADLs at wave 11, 0.41 
                    178: ##             "r12adla",     # 0.43
                    179: ##             "r13adla",     # 0.40
                    180: ##             "r14adla",     # 0.39
                    181: ##             "r11wtresp"   # Weight
                    182: ##             )]
                    183: 
                    184: # INDIVIDUAL SELECTIONS:
                    185: # Respondents in 2012, aged 50 and older, with no missing information on marital status in 2012
                    186: #dt2<-dt1 %>% filter(!is.na(r11mstat) & r11agey_m>=50);nrow(dm_bis)
                    187: #write.csv2(dt2,file="hrs12xSAS_noM.csv")

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>