From ac9c89d14b66d5c1b9307670369869cf465ab05e Mon Sep 17 00:00:00 2001 From: "N. Brouard" Date: Fri, 14 Jul 2023 11:25:16 +0000 Subject: [PATCH] Summary: R program to provie simulated data for IMaCh --- src/simulation.R | 168 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 168 insertions(+) create mode 100644 src/simulation.R diff --git a/src/simulation.R b/src/simulation.R new file mode 100644 index 0000000..4da38d7 --- /dev/null +++ b/src/simulation.R @@ -0,0 +1,168 @@ +# Simulating data for IMaCh tests +# Feinuo Sun and Nicolas Brouard (INED, July 2023) +# +install.packages("dplyr") +if(!require("tidyverse")){ + install.packages("tidyverse",repos="http://cran.r-project.org") +# install.packages("tidyverse") + require("tidyverse") + } +# LIBRARIES +library(haven) +samplesize<- 10000 + +# Which Life table? +# Simulating a gompertz law of mortality: slope of line mu_m (about 9% increase per year) +# and a_m, modal age at death. From whose we can compute the life expectancy e_65. + +install.packages("expint",repos="http://cran.r-project.org") +library(expint) +am <- 85 +mum <- 9/100 +expint(exp(mum*(65-am))) +am <- 84.8506 +exp(exp(mum*(65-am)))/mum*expint(exp(mum*(65-am))) +# e(65) = 18.10904 years +e65 <- exp(exp(mum*(65-am)))/mum*expint(exp(mum*(65-am))) + +e65 <- exp(exp(mum*(65-am)))/mum*expint(exp(mum*(65-am))) +# Life table from 0 +l <- function(a,am,mum){ + exp(-exp(mum*(a-am)))/exp(-exp(-mum*am)) +} +l65 <- function(a,am,mum){ + exp(-exp(mum*(a-am)))/exp(-exp(mum*(65-am))) +} +l65(65,am,mum) +#l65<-l(65,am,mum) # 0.84 +l65(100,am,mum) + +#curve(l65(x,am,mum), from = 65, to = 100) +# Add a line: +#curve(1 - l65(x,am,mum), add = TRUE, col = "red") + +# inverse function from l(a) find a. +linv <- function(la,am,mum){ + am+log(exp(-mum*am) -log(la))/mum +} +linv(0.001,am,mum) +linv(1,am,mum) + +l65inv <- function(la,am,mum){ + am+log(exp(mum*(65-am)) -log(la))/mum +} +l65inv(0.001,am,mum) +l65inv(1,am,mum) + +am +mum +l65bisinv <- function(la){ + 84.8506+log(exp(0.09*(65-84.8506)) -log(la))/0.09 +} +l65bisinv(0.001) +l65bisinv(1) + +set.seed(128) +zeroone<-runif(samplesize,min=0.001,max=1) +lifelength <- lapply(zeroone,l65bisinv) + +#lifelength <- rnorm(n=samplesize, mean=85, sd=16) + +set.seed(124) +# First interview 4th semester +monthinterview <- runif(samplesize, min=9,max=12) +st_98 <- rbinom(n=samplesize, size=1, prob=0.5)+1 +# state 2 +st_00 <- rbinom(n=samplesize, size=1, prob=0.2)+1 +# date of birth (simulating population in 1998 age 65+), uniformly? +popage65110in1998<- runif(samplesize, min=65, max=110) +#gender +ragender <- rbinom(n=samplesize, size=1, prob=0.56) + +yrinterview1 <- floor(r98iwmid) +monthinw1 <- floor((monthinterview - floor(monthinterview))*12)+1 + +int_98 <- paste0(monthinw1,"/",yrinterview1) + +r98iwmid <- 1998 + monthinterview/12 +rabdate <- r98iwmid - popage65110in1998 +birthyr <- floor(rabdate) +monthdb <- floor((rabdate - floor(rabdate))*12)+1 +brt <- paste0(monthdb,"/",birthyr) +# date of death for dropping cases +raddate <- rabdate + lifelength +dateinterview2 <- 1998 + 2 + monthinterview/12 +#head(cbind(r98iwmid,dateinterview2,raddate),70) +# people whose death occured before the interview will be dropped (radnate) +radndate <- if_else(raddate < r98iwmid, NA, raddate ) +#head(cbind(r98iwmid,dateinterview2,raddate,radndate,lifelength),70) +# in order to avoid date of death known after last wave and potential bias +# people whose death will occur after last interview will have an unkwonn (99/9999) date of death +lastinterview<- dateinterview2 +radldate<- if_else(radndate > dateinterview2, 9999, radndate ) +head(cbind(r98iwmid,dateinterview2,raddate,radndate,lifelength,radldate),70) +ddtyr <- if_else((!is.na(radldate) & radldate ==9999), 9999, floor(radldate)) +monthdd <- if_else((!is.na(radldate) & radldate ==9999), 99,floor((radldate - floor(radldate))*12)+1) +#head(cbind(r98iwmid,dateinterview2,raddate,lifelength,radldate,ddtyr,monthdd),70) +ddt <- if_else(!is.na(radldate),paste0(monthdd,"/",ddtyr), NA) +#head(cbind(r98iwmid,dateinterview2,raddate,lifelength,radldate,ddt),70) +weight <- rep(1, samplesize) +# state 1 st_98 +ageatinterview1 <- r98iwmid - rabdate +# interview 2 st 2000 +# same month of interview +yrinterview2 <- floor(dateinterview2) +monthinw2 <- floor((dateinterview2 - floor(dateinterview2))*12)+1 +int_00 <- paste0(monthinw2,"/",yrinterview2) +ageatinterview2 <- dateinterview2 - rabdate +# state 2 +st_00 <- if_else(raddate < dateinterview2, 3, st_00) +hhidpn <- seq(1,samplesize) +HRSSIMULdata <- data.frame(hhidpn,ragender, weight, brt, ddt, int_98, st_98, int_00, st_00) +head(HRSSIMULdata,70) + +HRSSIMULdata <- HRSSIMULdata %>% filter(!is.na(ddt)) +HRSSIMULdata <- HRSSIMULdata[,c("hhidpn","ragender", "weight", "brt", "ddt", "int_98", "st_98", "int_00", "st_00" )] +head(HRSSIMULdata,70) +#### export to txt file for IMaCh +write.table(HRSSIMULdata,file="HRSSIMUL.txt",col.names=F,row.names=F,quote=F) + + +#HRSSIMULdata<-HRSSIMULdata[,c("hhidpn","female","nhwhites","schlyrs","weight","brt","ddt","int_10","st_10","marpar_10","smoker_10","srh_10","int_12","st_12","marpar_12","smoker_12","srh_12","int_14","st_14","marpar_14","smoker_14","srh_14")] + +## # VARIABLE SELECTIONS +## dt1<-dat[,c( "hhidpn", # ID number of the respondent +## "ragender", # respondent's gender 1M or 2F, mean 1.56 +## "rabdate", # Respondent's birth date (1890.0 to 1995.0) +## "raddate", # Respondent's death date (1917.0 to 2019.0) +## "rahispan", # Mexican-American and Other Hispanic are recoded to "1." +## "raracem", # Race-masked: White/Caucasian 1, Black/African American 2, Other 3, Missing . +## "raeduc", # Education: Years of +## "r11mstat", # Marital status at wave 11: .J Webonterview missing, .M Other missing. Married 1 +## "r12mstat", # Married spouse absent 2 +## "r13mstat", # Partnered 3 Separated 4 Divorced 5 +## "r14mstat", # Widowed 7, Never married 8 +## "r11iwmid", # Date of interview at wave 11 +## "r12iwmid", # +## "r13iwmid", # +## "r14iwmid", # +## "s11ddate", # Date of death (from wave 11) 1669 +## "s12ddate", # 967 +## "s13ddate", # 381 +## "s14ddate", # 8 +## "r11cesd", # CESD score at 11, 19400, mean 1.54 +## "r12cesd", # +## "r13cesd", # +## "r14cesd", # +## "r11agey_m", # Age at mid wave 11 66.85 +## "r11adla", # Sum of ADLs at wave 11, 0.41 +## "r12adla", # 0.43 +## "r13adla", # 0.40 +## "r14adla", # 0.39 +## "r11wtresp" # Weight +## )] + +# INDIVIDUAL SELECTIONS: +# Respondents in 2012, aged 50 and older, with no missing information on marital status in 2012 +#dt2<-dt1 %>% filter(!is.na(r11mstat) & r11agey_m>=50);nrow(dm_bis) +#write.csv2(dt2,file="hrs12xSAS_noM.csv") -- 2.43.0