File:  [Local Repository] / imach / src / simulation.R
Revision 1.3: download - view: text, annotated - select for diffs
Thu Jul 20 08:40:31 2023 UTC (10 months, 4 weeks ago) by brouard
Branches: MAIN
CVS tags: HEAD
*** empty log message ***

    1: # $Id: simulation.R,v 1.3 2023/07/20 08:40:31 brouard Exp $
    2: #  $State: Exp $
    3: #  
    4: # Simulating data for IMaCh tests
    5: # Feinuo Sun and Nicolas Brouard (INED, July 2023)
    6: # 
    7: install.packages("dplyr")
    8: if(!require("tidyverse")){
    9:     install.packages("tidyverse",repos="http://cran.r-project.org")
   10: #    install.packages("tidyverse")
   11:     require("tidyverse")
   12:   }
   13: # LIBRARIES
   14: library(haven)
   15: #samplesize<- 10000
   16: samplesize<- 50000
   17: 
   18: # Which Life table?
   19: # Simulating a gompertz law of mortality: slope of line mu_m (about 9% increase per year)
   20: # and a_m, modal age at death. From whose we can compute the life expectancy e_65.
   21: 
   22: install.packages("expint",repos="http://cran.r-project.org")
   23: library(expint)
   24: am <- 85
   25: mum <- 9/100
   26: expint(exp(mum*(65-am)))
   27: am <- 84.8506
   28: exp(exp(mum*(65-am)))/mum*expint(exp(mum*(65-am)))
   29: # e(65) = 18.10904 years
   30: e65 <- exp(exp(mum*(65-am)))/mum*expint(exp(mum*(65-am)))
   31: 
   32: e65 <- exp(exp(mum*(65-am)))/mum*expint(exp(mum*(65-am)))
   33: e65
   34:                                         # Life table from 0
   35: l <- function(a,am,mum){
   36:     exp(-exp(mum*(a-am)))/exp(-exp(-mum*am))
   37: }
   38: l65 <- function(a,am,mum){
   39:     exp(-exp(mum*(a-am)))/exp(-exp(mum*(65-am)))
   40: }
   41: l65(65,am,mum)
   42: #l65<-l(65,am,mum) # 0.84
   43: l65(100,am,mum)
   44: 
   45: #curve(l65(x,am,mum), from = 65, to = 100)
   46: # Add a line:
   47: #curve(1 - l65(x,am,mum), add = TRUE, col = "red")
   48: 
   49: # inverse function from l(a) find a.
   50: linv <- function(la,am,mum){
   51:     am+log(exp(-mum*am) -log(la))/mum
   52: }
   53: linv(0.001,am,mum)
   54: linv(1,am,mum)
   55: 
   56: l65inv <- function(la,am,mum){
   57:     am+log(exp(mum*(65-am)) -log(la))/mum
   58: }
   59: l65inv(0.001,am,mum)
   60: l65inv(1,am,mum)
   61: 
   62: am
   63: mum
   64: l65bisinv <- function(la){
   65:     84.8506+log(exp(0.09*(65-84.8506)) -log(la))/0.09
   66: }
   67: l65bisinv(0.0001)
   68: l65bisinv(1)
   69: 
   70: set.seed(128)
   71: zeroone<-runif(samplesize,min=0.00000001,max=1)
   72: zeroone<-runif(samplesize,min=0.001,max=1)
   73: lifelength <- lapply(zeroone,l65bisinv)
   74: #slife<- sort(unlist(lifelength),decreasing=TRUE)
   75: #?sort
   76: #head(slife)
   77: #mean(unlist(lifelength)-65) #17.99482
   78: #head(lifelength,10)
   79: #class(lifelength)
   80: 
   81: #lifelength <- rnorm(n=samplesize, mean=85, sd=16)
   82: 
   83: set.seed(124)
   84: # First interview 4th semester
   85: monthinterview <- runif(samplesize, min=10,max=13)
   86: st_98 <- rbinom(n=samplesize, size=1, prob=0.5)+1
   87: # state 2
   88: st_00 <- rbinom(n=samplesize, size=1, prob=0.2)+1
   89: # date of birth (simulating population in 1998 age 65+), uniformly?
   90: popage65110in1998<- runif(samplesize, min=65, max=110)
   91: #gender
   92: ragender <- rbinom(n=samplesize, size=1, prob=0.56)
   93: 
   94: r98iwmid <- 1998 + (monthinterview-1)/12
   95: yrinterview1 <- floor(r98iwmid)
   96: #monthinw1 <- floor((monthinterview - floor(monthinterview))*12)+1
   97: monthinw1 <- floor(monthinterview)
   98: 
   99: 
  100: int_98 <- paste0(monthinw1,"/",yrinterview1)
  101: 
  102: rabdate <- r98iwmid - popage65110in1998 
  103: birthyr <- floor(rabdate)
  104: monthdb <- floor((rabdate - floor(rabdate))*12)+1
  105: brt <- paste0(monthdb,"/",birthyr)
  106: # date of death for dropping cases
  107: #raddate <- rabdate + lifelength
  108: raddate <- rabdate + unlist(lifelength)
  109: dateinterview2 <- 1998 + 2 + monthinterview/12
  110: #head(cbind(r98iwmid,dateinterview2,raddate),70)
  111: # people whose death occured before the interview will be dropped (radnate)
  112: radndate <- if_else(raddate < r98iwmid,  NA, raddate )
  113: head(cbind(r98iwmid,dateinterview2,raddate,radndate,lifelength),n=5299)
  114: #head(cbind(r98iwmid,dateinterview2,raddate,radndate,lifelength),70)
  115: # in order to avoid date of death known after last wave and potential bias
  116: # people whose death will occur after last interview will have an unkwonn (99/9999) date of death
  117: lastinterview<- dateinterview2
  118: radldate<- if_else(radndate > dateinterview2+1/12,  9999, radndate )
  119: head(cbind(r98iwmid,dateinterview2,raddate,radndate,lifelength,radldate),n=5299)
  120: radldate<- if_else(dateinterview2+1/12 > radndate & radndate > dateinterview2, radndate , radldate )
  121: head(cbind(r98iwmid,dateinterview2,raddate,radndate,lifelength,radldate),n=5299)
  122: ddtyr <- if_else((!is.na(radldate) & radldate ==9999),  9999, floor(radldate))
  123: monthdd <- if_else((!is.na(radldate) & radldate ==9999),  99,floor((radldate - floor(radldate))*12)+1)
  124: #head(cbind(r98iwmid,dateinterview2,raddate,lifelength,radldate,ddtyr,monthdd),70)
  125: ddt <- if_else(!is.na(radldate),paste0(monthdd,"/",ddtyr), NA)
  126: #head(cbind(r98iwmid,dateinterview2,raddate,lifelength,radldate,ddt),70)
  127: weight <- rep(1, samplesize)
  128: # state 1 st_98
  129: ageatinterview1 <- r98iwmid - rabdate
  130: # interview 2 st 2000
  131: # same month of interview
  132: yrinterview2 <- floor(dateinterview2)
  133: monthinw2 <- floor((dateinterview2 - floor(dateinterview2))*12)+1
  134: int_00 <- paste0(monthinw2,"/",yrinterview2)
  135: #head(cbind(r98iwmid,dateinterview2,int_00,raddate,radndate,lifelength,radldate),70)
  136: ageatinterview2 <- dateinterview2 - rabdate
  137: # state 2
  138: st_00 <- if_else(raddate < dateinterview2, 3, st_00)
  139: hhidpn <- seq(1,samplesize)
  140: HRSSIMULdata <- data.frame(hhidpn,ragender, weight, brt, ddt, int_98, st_98, int_00, st_00)
  141: head(HRSSIMULdata,70)                             
  142: 
  143: HRSSIMULdata <- HRSSIMULdata %>% filter(!is.na(ddt))
  144: HRSSIMULdata <- HRSSIMULdata[,c("hhidpn","ragender", "weight", "brt", "ddt", "int_98", "st_98", "int_00", "st_00" )]
  145: head(HRSSIMULdata,70)                             
  146: #### export to txt file for IMaCh
  147: write.table(HRSSIMULdata,file="HRSSIMUL.txt",col.names=F,row.names=F,quote=F)
  148: 
  149: 
  150: #HRSSIMULdata<-HRSSIMULdata[,c("hhidpn","female","nhwhites","schlyrs","weight","brt","ddt","int_10","st_10","marpar_10","smoker_10","srh_10","int_12","st_12","marpar_12","smoker_12","srh_12","int_14","st_14","marpar_14","smoker_14","srh_14")]
  151: 
  152: ## # VARIABLE SELECTIONS
  153: ## dt1<-dat[,c( "hhidpn",   # ID number of the respondent
  154: ##             "ragender",  # respondent's gender 1M or 2F, mean 1.56
  155: ##             "rabdate",   # Respondent's birth date (1890.0  to    1995.0)
  156: ##             "raddate",   # Respondent's death date (1917.0  to   2019.0)
  157: ##             "rahispan",  # Mexican-American and Other Hispanic are recoded to "1." 
  158: ##             "raracem",   # Race-masked: White/Caucasian 1, Black/African American 2, Other 3, Missing . 
  159: ##             "raeduc",    # Education: Years of
  160: ##             "r11mstat",  # Marital status at wave 11: .J Webonterview missing, .M Other missing. Married 1
  161: ##             "r12mstat",    #  Married spouse absent 2
  162: ##             "r13mstat",    #  Partnered  3 Separated 4 Divorced  5
  163: ##             "r14mstat",    #  Widowed 7, Never married 8
  164: ##             "r11iwmid",    #  Date of interview at wave 11
  165: ##             "r12iwmid",    #
  166: ##             "r13iwmid",    #
  167: ##             "r14iwmid",    #
  168: ##             "s11ddate",    # Date of death (from wave 11) 1669
  169: ##             "s12ddate",    # 967
  170: ##             "s13ddate",    # 381
  171: ##             "s14ddate",    #   8
  172: ##             "r11cesd",    # CESD score at 11, 19400, mean 1.54
  173: ##             "r12cesd",     #
  174: ##             "r13cesd",     #
  175: ##             "r14cesd",     #
  176: ##             "r11agey_m",  # Age at mid wave 11 66.85
  177: ##             "r11adla",    # Sum of ADLs at wave 11, 0.41 
  178: ##             "r12adla",     # 0.43
  179: ##             "r13adla",     # 0.40
  180: ##             "r14adla",     # 0.39
  181: ##             "r11wtresp"   # Weight
  182: ##             )]
  183: 
  184: # INDIVIDUAL SELECTIONS:
  185: # Respondents in 2012, aged 50 and older, with no missing information on marital status in 2012
  186: #dt2<-dt1 %>% filter(!is.na(r11mstat) & r11agey_m>=50);nrow(dm_bis)
  187: #write.csv2(dt2,file="hrs12xSAS_noM.csv")

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>