File:  [Local Repository] / imach / src / simulation.R
Revision 1.1: download - view: text, annotated - select for diffs
Fri Jul 14 11:25:16 2023 UTC (10 months, 2 weeks ago) by brouard
Branches: MAIN
CVS tags: HEAD
Summary: R program to provie simulated data for IMaCh

    1: # Simulating data for IMaCh tests
    2: # Feinuo Sun and Nicolas Brouard (INED, July 2023)
    3: # 
    4: install.packages("dplyr")
    5: if(!require("tidyverse")){
    6:     install.packages("tidyverse",repos="http://cran.r-project.org")
    7: #    install.packages("tidyverse")
    8:     require("tidyverse")
    9:   }
   10: # LIBRARIES
   11: library(haven)
   12: samplesize<- 10000
   13: 
   14: # Which Life table?
   15: # Simulating a gompertz law of mortality: slope of line mu_m (about 9% increase per year)
   16: # and a_m, modal age at death. From whose we can compute the life expectancy e_65.
   17: 
   18: install.packages("expint",repos="http://cran.r-project.org")
   19: library(expint)
   20: am <- 85
   21: mum <- 9/100
   22: expint(exp(mum*(65-am)))
   23: am <- 84.8506
   24: exp(exp(mum*(65-am)))/mum*expint(exp(mum*(65-am)))
   25: # e(65) = 18.10904 years
   26: e65 <- exp(exp(mum*(65-am)))/mum*expint(exp(mum*(65-am)))
   27: 
   28: e65 <- exp(exp(mum*(65-am)))/mum*expint(exp(mum*(65-am)))
   29: # Life table from 0
   30: l <- function(a,am,mum){
   31:     exp(-exp(mum*(a-am)))/exp(-exp(-mum*am))
   32: }
   33: l65 <- function(a,am,mum){
   34:     exp(-exp(mum*(a-am)))/exp(-exp(mum*(65-am)))
   35: }
   36: l65(65,am,mum)
   37: #l65<-l(65,am,mum) # 0.84
   38: l65(100,am,mum)
   39: 
   40: #curve(l65(x,am,mum), from = 65, to = 100)
   41: # Add a line:
   42: #curve(1 - l65(x,am,mum), add = TRUE, col = "red")
   43: 
   44: # inverse function from l(a) find a.
   45: linv <- function(la,am,mum){
   46:     am+log(exp(-mum*am) -log(la))/mum
   47: }
   48: linv(0.001,am,mum)
   49: linv(1,am,mum)
   50: 
   51: l65inv <- function(la,am,mum){
   52:     am+log(exp(mum*(65-am)) -log(la))/mum
   53: }
   54: l65inv(0.001,am,mum)
   55: l65inv(1,am,mum)
   56: 
   57: am
   58: mum
   59: l65bisinv <- function(la){
   60:     84.8506+log(exp(0.09*(65-84.8506)) -log(la))/0.09
   61: }
   62: l65bisinv(0.001)
   63: l65bisinv(1)
   64: 
   65: set.seed(128)
   66: zeroone<-runif(samplesize,min=0.001,max=1)
   67: lifelength <- lapply(zeroone,l65bisinv)
   68: 
   69: #lifelength <- rnorm(n=samplesize, mean=85, sd=16)
   70: 
   71: set.seed(124)
   72: # First interview 4th semester
   73: monthinterview <- runif(samplesize, min=9,max=12)
   74: st_98 <- rbinom(n=samplesize, size=1, prob=0.5)+1
   75: # state 2
   76: st_00 <- rbinom(n=samplesize, size=1, prob=0.2)+1
   77: # date of birth (simulating population in 1998 age 65+), uniformly?
   78: popage65110in1998<- runif(samplesize, min=65, max=110)
   79: #gender
   80: ragender <- rbinom(n=samplesize, size=1, prob=0.56)
   81: 
   82: yrinterview1 <- floor(r98iwmid)
   83: monthinw1 <- floor((monthinterview - floor(monthinterview))*12)+1
   84: 
   85: int_98 <- paste0(monthinw1,"/",yrinterview1)
   86: 
   87: r98iwmid <- 1998 + monthinterview/12
   88: rabdate <- r98iwmid - popage65110in1998 
   89: birthyr <- floor(rabdate)
   90: monthdb <- floor((rabdate - floor(rabdate))*12)+1
   91: brt <- paste0(monthdb,"/",birthyr)
   92: # date of death for dropping cases
   93: raddate <- rabdate + lifelength
   94: dateinterview2 <- 1998 + 2 + monthinterview/12
   95: #head(cbind(r98iwmid,dateinterview2,raddate),70)
   96: # people whose death occured before the interview will be dropped (radnate)
   97: radndate <- if_else(raddate < r98iwmid,  NA, raddate )
   98: #head(cbind(r98iwmid,dateinterview2,raddate,radndate,lifelength),70)
   99: # in order to avoid date of death known after last wave and potential bias
  100: # people whose death will occur after last interview will have an unkwonn (99/9999) date of death
  101: lastinterview<- dateinterview2
  102: radldate<- if_else(radndate > dateinterview2,  9999, radndate )
  103: head(cbind(r98iwmid,dateinterview2,raddate,radndate,lifelength,radldate),70)
  104: ddtyr <- if_else((!is.na(radldate) & radldate ==9999),  9999, floor(radldate))
  105: monthdd <- if_else((!is.na(radldate) & radldate ==9999),  99,floor((radldate - floor(radldate))*12)+1)
  106: #head(cbind(r98iwmid,dateinterview2,raddate,lifelength,radldate,ddtyr,monthdd),70)
  107: ddt <- if_else(!is.na(radldate),paste0(monthdd,"/",ddtyr), NA)
  108: #head(cbind(r98iwmid,dateinterview2,raddate,lifelength,radldate,ddt),70)
  109: weight <- rep(1, samplesize)
  110: # state 1 st_98
  111: ageatinterview1 <- r98iwmid - rabdate
  112: # interview 2 st 2000
  113: # same month of interview
  114: yrinterview2 <- floor(dateinterview2)
  115: monthinw2 <- floor((dateinterview2 - floor(dateinterview2))*12)+1
  116: int_00 <- paste0(monthinw2,"/",yrinterview2)
  117: ageatinterview2 <- dateinterview2 - rabdate
  118: # state 2
  119: st_00 <- if_else(raddate < dateinterview2, 3, st_00)
  120: hhidpn <- seq(1,samplesize)
  121: HRSSIMULdata <- data.frame(hhidpn,ragender, weight, brt, ddt, int_98, st_98, int_00, st_00)
  122: head(HRSSIMULdata,70)                             
  123: 
  124: HRSSIMULdata <- HRSSIMULdata %>% filter(!is.na(ddt))
  125: HRSSIMULdata <- HRSSIMULdata[,c("hhidpn","ragender", "weight", "brt", "ddt", "int_98", "st_98", "int_00", "st_00" )]
  126: head(HRSSIMULdata,70)                             
  127: #### export to txt file for IMaCh
  128: write.table(HRSSIMULdata,file="HRSSIMUL.txt",col.names=F,row.names=F,quote=F)
  129: 
  130: 
  131: #HRSSIMULdata<-HRSSIMULdata[,c("hhidpn","female","nhwhites","schlyrs","weight","brt","ddt","int_10","st_10","marpar_10","smoker_10","srh_10","int_12","st_12","marpar_12","smoker_12","srh_12","int_14","st_14","marpar_14","smoker_14","srh_14")]
  132: 
  133: ## # VARIABLE SELECTIONS
  134: ## dt1<-dat[,c( "hhidpn",   # ID number of the respondent
  135: ##             "ragender",  # respondent's gender 1M or 2F, mean 1.56
  136: ##             "rabdate",   # Respondent's birth date (1890.0  to    1995.0)
  137: ##             "raddate",   # Respondent's death date (1917.0  to   2019.0)
  138: ##             "rahispan",  # Mexican-American and Other Hispanic are recoded to "1." 
  139: ##             "raracem",   # Race-masked: White/Caucasian 1, Black/African American 2, Other 3, Missing . 
  140: ##             "raeduc",    # Education: Years of
  141: ##             "r11mstat",  # Marital status at wave 11: .J Webonterview missing, .M Other missing. Married 1
  142: ##             "r12mstat",    #  Married spouse absent 2
  143: ##             "r13mstat",    #  Partnered  3 Separated 4 Divorced  5
  144: ##             "r14mstat",    #  Widowed 7, Never married 8
  145: ##             "r11iwmid",    #  Date of interview at wave 11
  146: ##             "r12iwmid",    #
  147: ##             "r13iwmid",    #
  148: ##             "r14iwmid",    #
  149: ##             "s11ddate",    # Date of death (from wave 11) 1669
  150: ##             "s12ddate",    # 967
  151: ##             "s13ddate",    # 381
  152: ##             "s14ddate",    #   8
  153: ##             "r11cesd",    # CESD score at 11, 19400, mean 1.54
  154: ##             "r12cesd",     #
  155: ##             "r13cesd",     #
  156: ##             "r14cesd",     #
  157: ##             "r11agey_m",  # Age at mid wave 11 66.85
  158: ##             "r11adla",    # Sum of ADLs at wave 11, 0.41 
  159: ##             "r12adla",     # 0.43
  160: ##             "r13adla",     # 0.40
  161: ##             "r14adla",     # 0.39
  162: ##             "r11wtresp"   # Weight
  163: ##             )]
  164: 
  165: # INDIVIDUAL SELECTIONS:
  166: # Respondents in 2012, aged 50 and older, with no missing information on marital status in 2012
  167: #dt2<-dt1 %>% filter(!is.na(r11mstat) & r11agey_m>=50);nrow(dm_bis)
  168: #write.csv2(dt2,file="hrs12xSAS_noM.csv")

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>