--- /dev/null
+# Simulating data for IMaCh tests
+# Feinuo Sun and Nicolas Brouard (INED, July 2023)
+#
+install.packages("dplyr")
+if(!require("tidyverse")){
+ install.packages("tidyverse",repos="http://cran.r-project.org")
+# install.packages("tidyverse")
+ require("tidyverse")
+ }
+# LIBRARIES
+library(haven)
+samplesize<- 10000
+
+# Which Life table?
+# Simulating a gompertz law of mortality: slope of line mu_m (about 9% increase per year)
+# and a_m, modal age at death. From whose we can compute the life expectancy e_65.
+
+install.packages("expint",repos="http://cran.r-project.org")
+library(expint)
+am <- 85
+mum <- 9/100
+expint(exp(mum*(65-am)))
+am <- 84.8506
+exp(exp(mum*(65-am)))/mum*expint(exp(mum*(65-am)))
+# e(65) = 18.10904 years
+e65 <- exp(exp(mum*(65-am)))/mum*expint(exp(mum*(65-am)))
+
+e65 <- exp(exp(mum*(65-am)))/mum*expint(exp(mum*(65-am)))
+# Life table from 0
+l <- function(a,am,mum){
+ exp(-exp(mum*(a-am)))/exp(-exp(-mum*am))
+}
+l65 <- function(a,am,mum){
+ exp(-exp(mum*(a-am)))/exp(-exp(mum*(65-am)))
+}
+l65(65,am,mum)
+#l65<-l(65,am,mum) # 0.84
+l65(100,am,mum)
+
+#curve(l65(x,am,mum), from = 65, to = 100)
+# Add a line:
+#curve(1 - l65(x,am,mum), add = TRUE, col = "red")
+
+# inverse function from l(a) find a.
+linv <- function(la,am,mum){
+ am+log(exp(-mum*am) -log(la))/mum
+}
+linv(0.001,am,mum)
+linv(1,am,mum)
+
+l65inv <- function(la,am,mum){
+ am+log(exp(mum*(65-am)) -log(la))/mum
+}
+l65inv(0.001,am,mum)
+l65inv(1,am,mum)
+
+am
+mum
+l65bisinv <- function(la){
+ 84.8506+log(exp(0.09*(65-84.8506)) -log(la))/0.09
+}
+l65bisinv(0.001)
+l65bisinv(1)
+
+set.seed(128)
+zeroone<-runif(samplesize,min=0.001,max=1)
+lifelength <- lapply(zeroone,l65bisinv)
+
+#lifelength <- rnorm(n=samplesize, mean=85, sd=16)
+
+set.seed(124)
+# First interview 4th semester
+monthinterview <- runif(samplesize, min=9,max=12)
+st_98 <- rbinom(n=samplesize, size=1, prob=0.5)+1
+# state 2
+st_00 <- rbinom(n=samplesize, size=1, prob=0.2)+1
+# date of birth (simulating population in 1998 age 65+), uniformly?
+popage65110in1998<- runif(samplesize, min=65, max=110)
+#gender
+ragender <- rbinom(n=samplesize, size=1, prob=0.56)
+
+yrinterview1 <- floor(r98iwmid)
+monthinw1 <- floor((monthinterview - floor(monthinterview))*12)+1
+
+int_98 <- paste0(monthinw1,"/",yrinterview1)
+
+r98iwmid <- 1998 + monthinterview/12
+rabdate <- r98iwmid - popage65110in1998
+birthyr <- floor(rabdate)
+monthdb <- floor((rabdate - floor(rabdate))*12)+1
+brt <- paste0(monthdb,"/",birthyr)
+# date of death for dropping cases
+raddate <- rabdate + lifelength
+dateinterview2 <- 1998 + 2 + monthinterview/12
+#head(cbind(r98iwmid,dateinterview2,raddate),70)
+# people whose death occured before the interview will be dropped (radnate)
+radndate <- if_else(raddate < r98iwmid, NA, raddate )
+#head(cbind(r98iwmid,dateinterview2,raddate,radndate,lifelength),70)
+# in order to avoid date of death known after last wave and potential bias
+# people whose death will occur after last interview will have an unkwonn (99/9999) date of death
+lastinterview<- dateinterview2
+radldate<- if_else(radndate > dateinterview2, 9999, radndate )
+head(cbind(r98iwmid,dateinterview2,raddate,radndate,lifelength,radldate),70)
+ddtyr <- if_else((!is.na(radldate) & radldate ==9999), 9999, floor(radldate))
+monthdd <- if_else((!is.na(radldate) & radldate ==9999), 99,floor((radldate - floor(radldate))*12)+1)
+#head(cbind(r98iwmid,dateinterview2,raddate,lifelength,radldate,ddtyr,monthdd),70)
+ddt <- if_else(!is.na(radldate),paste0(monthdd,"/",ddtyr), NA)
+#head(cbind(r98iwmid,dateinterview2,raddate,lifelength,radldate,ddt),70)
+weight <- rep(1, samplesize)
+# state 1 st_98
+ageatinterview1 <- r98iwmid - rabdate
+# interview 2 st 2000
+# same month of interview
+yrinterview2 <- floor(dateinterview2)
+monthinw2 <- floor((dateinterview2 - floor(dateinterview2))*12)+1
+int_00 <- paste0(monthinw2,"/",yrinterview2)
+ageatinterview2 <- dateinterview2 - rabdate
+# state 2
+st_00 <- if_else(raddate < dateinterview2, 3, st_00)
+hhidpn <- seq(1,samplesize)
+HRSSIMULdata <- data.frame(hhidpn,ragender, weight, brt, ddt, int_98, st_98, int_00, st_00)
+head(HRSSIMULdata,70)
+
+HRSSIMULdata <- HRSSIMULdata %>% filter(!is.na(ddt))
+HRSSIMULdata <- HRSSIMULdata[,c("hhidpn","ragender", "weight", "brt", "ddt", "int_98", "st_98", "int_00", "st_00" )]
+head(HRSSIMULdata,70)
+#### export to txt file for IMaCh
+write.table(HRSSIMULdata,file="HRSSIMUL.txt",col.names=F,row.names=F,quote=F)
+
+
+#HRSSIMULdata<-HRSSIMULdata[,c("hhidpn","female","nhwhites","schlyrs","weight","brt","ddt","int_10","st_10","marpar_10","smoker_10","srh_10","int_12","st_12","marpar_12","smoker_12","srh_12","int_14","st_14","marpar_14","smoker_14","srh_14")]
+
+## # VARIABLE SELECTIONS
+## dt1<-dat[,c( "hhidpn", # ID number of the respondent
+## "ragender", # respondent's gender 1M or 2F, mean 1.56
+## "rabdate", # Respondent's birth date (1890.0 to 1995.0)
+## "raddate", # Respondent's death date (1917.0 to 2019.0)
+## "rahispan", # Mexican-American and Other Hispanic are recoded to "1."
+## "raracem", # Race-masked: White/Caucasian 1, Black/African American 2, Other 3, Missing .
+## "raeduc", # Education: Years of
+## "r11mstat", # Marital status at wave 11: .J Webonterview missing, .M Other missing. Married 1
+## "r12mstat", # Married spouse absent 2
+## "r13mstat", # Partnered 3 Separated 4 Divorced 5
+## "r14mstat", # Widowed 7, Never married 8
+## "r11iwmid", # Date of interview at wave 11
+## "r12iwmid", #
+## "r13iwmid", #
+## "r14iwmid", #
+## "s11ddate", # Date of death (from wave 11) 1669
+## "s12ddate", # 967
+## "s13ddate", # 381
+## "s14ddate", # 8
+## "r11cesd", # CESD score at 11, 19400, mean 1.54
+## "r12cesd", #
+## "r13cesd", #
+## "r14cesd", #
+## "r11agey_m", # Age at mid wave 11 66.85
+## "r11adla", # Sum of ADLs at wave 11, 0.41
+## "r12adla", # 0.43
+## "r13adla", # 0.40
+## "r14adla", # 0.39
+## "r11wtresp" # Weight
+## )]
+
+# INDIVIDUAL SELECTIONS:
+# Respondents in 2012, aged 50 and older, with no missing information on marital status in 2012
+#dt2<-dt1 %>% filter(!is.na(r11mstat) & r11agey_m>=50);nrow(dm_bis)
+#write.csv2(dt2,file="hrs12xSAS_noM.csv")