Title: | Random Forest with Multivariate Longitudinal Predictors |
---|---|
Description: | Based on random forest principle, 'DynForest' is able to include multiple longitudinal predictors to provide individual predictions. Longitudinal predictors are modeled through the random forest. The methodology is fully described for a survival outcome in: Devaux, Helmer, Genuer & Proust-Lima (2023) <doi: 10.1177/09622802231206477>. |
Authors: | Anthony Devaux [aut, cre] , Robin Genuer [aut] , Cécile Proust-Lima [aut] , Louis Capitaine [aut] |
Maintainer: | Anthony Devaux <[email protected]> |
License: | LGPL (>= 3) |
Version: | 1.2.1 |
Built: | 2024-10-31 01:16:59 UTC |
Source: | https://github.com/anthonydevaux/dynforest |
Compute the grouped importance of variables (gVIMP) statistic
compute_gvimp( dynforest_obj, IBS.min = 0, IBS.max = NULL, group = NULL, ncores = NULL, seed = 1234 )
compute_gvimp( dynforest_obj, IBS.min = 0, IBS.max = NULL, group = NULL, ncores = NULL, seed = 1234 )
dynforest_obj |
dynforest_obj |
IBS.min |
(Only with survival outcome) Minimal time to compute the Integrated Brier Score. Default value is set to 0. |
IBS.max |
(Only with survival outcome) Maximal time to compute the Integrated Brier Score. Default value is set to the maximal time-to-event found. |
group |
A list of groups with the name of the predictors assigned in each group |
ncores |
Number of cores used to grow trees in parallel. Default value is the number of cores of the computer-1. |
seed |
Seed to replicate results |
compute_gvimp()
function returns a list with the following elements:
Inputs |
A list of 3 elements: Longitudinal , Numeric and Factor . Each element contains the names of the predictors |
group |
A list of each group defined in group argument |
gVIMP |
A numeric vector containing the gVIMP for each group defined in group argument |
tree_oob_err |
A numeric vector containing the OOB error for each tree needed to compute the VIMP statistic |
IBS.range |
A vector containing the IBS min and max |
data(pbc2) # Get Gaussian distribution for longitudinal predictors pbc2$serBilir <- log(pbc2$serBilir) pbc2$SGOT <- log(pbc2$SGOT) pbc2$albumin <- log(pbc2$albumin) pbc2$alkaline <- log(pbc2$alkaline) # Sample 100 subjects set.seed(1234) id <- unique(pbc2$id) id_sample <- sample(id, 100) id_row <- which(pbc2$id%in%id_sample) pbc2_train <- pbc2[id_row,] timeData_train <- pbc2_train[,c("id","time", "serBilir","SGOT", "albumin","alkaline")] # Create object with longitudinal association for each predictor timeVarModel <- list(serBilir = list(fixed = serBilir ~ time, random = ~ time), SGOT = list(fixed = SGOT ~ time + I(time^2), random = ~ time + I(time^2)), albumin = list(fixed = albumin ~ time, random = ~ time), alkaline = list(fixed = alkaline ~ time, random = ~ time)) # Build fixed data fixedData_train <- unique(pbc2_train[,c("id","age","drug","sex")]) # Build outcome data Y <- list(type = "surv", Y = unique(pbc2_train[,c("id","years","event")])) # Run dynforest function res_dyn <- dynforest(timeData = timeData_train, fixedData = fixedData_train, timeVar = "time", idVar = "id", timeVarModel = timeVarModel, Y = Y, ntree = 50, nodesize = 5, minsplit = 5, cause = 2, ncores = 2, seed = 1234) # Compute gVIMP statistic res_dyn_gVIMP <- compute_gvimp(dynforest_obj = res_dyn, group = list(group1 = c("serBilir","SGOT"), group2 = c("albumin","alkaline")), ncores = 2, seed = 1234)
data(pbc2) # Get Gaussian distribution for longitudinal predictors pbc2$serBilir <- log(pbc2$serBilir) pbc2$SGOT <- log(pbc2$SGOT) pbc2$albumin <- log(pbc2$albumin) pbc2$alkaline <- log(pbc2$alkaline) # Sample 100 subjects set.seed(1234) id <- unique(pbc2$id) id_sample <- sample(id, 100) id_row <- which(pbc2$id%in%id_sample) pbc2_train <- pbc2[id_row,] timeData_train <- pbc2_train[,c("id","time", "serBilir","SGOT", "albumin","alkaline")] # Create object with longitudinal association for each predictor timeVarModel <- list(serBilir = list(fixed = serBilir ~ time, random = ~ time), SGOT = list(fixed = SGOT ~ time + I(time^2), random = ~ time + I(time^2)), albumin = list(fixed = albumin ~ time, random = ~ time), alkaline = list(fixed = alkaline ~ time, random = ~ time)) # Build fixed data fixedData_train <- unique(pbc2_train[,c("id","age","drug","sex")]) # Build outcome data Y <- list(type = "surv", Y = unique(pbc2_train[,c("id","years","event")])) # Run dynforest function res_dyn <- dynforest(timeData = timeData_train, fixedData = fixedData_train, timeVar = "time", idVar = "id", timeVarModel = timeVarModel, Y = Y, ntree = 50, nodesize = 5, minsplit = 5, cause = 2, ncores = 2, seed = 1234) # Compute gVIMP statistic res_dyn_gVIMP <- compute_gvimp(dynforest_obj = res_dyn, group = list(group1 = c("serBilir","SGOT"), group2 = c("albumin","alkaline")), ncores = 2, seed = 1234)
Compute the Out-Of-Bag error (OOB error)
compute_ooberror(dynforest_obj, IBS.min = 0, IBS.max = NULL, ncores = NULL)
compute_ooberror(dynforest_obj, IBS.min = 0, IBS.max = NULL, ncores = NULL)
dynforest_obj |
dynforest_obj |
IBS.min |
(Only with survival outcome) Minimal time to compute the Integrated Brier Score. Default value is set to 0. |
IBS.max |
(Only with survival outcome) Maximal time to compute the Integrated Brier Score. Default value is set to the maximal time-to-event found. |
ncores |
Number of cores used to grow trees in parallel. Default value is the number of cores of the computer-1. |
compute_ooberror()
function return a list with the following elements:
data |
A list containing the data used to grow the trees |
rf |
A table with each tree in column. Provide multiple characteristics about the tree building |
type |
Outcome type |
times |
A numeric vector containing the time-to-event for all subjects |
cause |
Indicating the cause of interest |
causes |
A numeric vector containing the causes indicator |
Inputs |
A list of 3 elements: Longitudinal , Numeric and Factor . Each element contains the names of the predictors |
Longitudinal.model |
A list of longitudinal markers containing the formula used for modeling in the random forest |
param |
A list containing the hyperparameters |
oob.err |
A numeric vector containing the OOB error for each subject |
oob.pred |
Outcome prediction for all subjects |
IBS.range |
A vector containing the IBS min and max |
data(pbc2) # Get Gaussian distribution for longitudinal predictors pbc2$serBilir <- log(pbc2$serBilir) pbc2$SGOT <- log(pbc2$SGOT) pbc2$albumin <- log(pbc2$albumin) pbc2$alkaline <- log(pbc2$alkaline) # Sample 100 subjects set.seed(1234) id <- unique(pbc2$id) id_sample <- sample(id, 100) id_row <- which(pbc2$id%in%id_sample) pbc2_train <- pbc2[id_row,] timeData_train <- pbc2_train[,c("id","time", "serBilir","SGOT", "albumin","alkaline")] # Create object with longitudinal association for each predictor timeVarModel <- list(serBilir = list(fixed = serBilir ~ time, random = ~ time), SGOT = list(fixed = SGOT ~ time + I(time^2), random = ~ time + I(time^2)), albumin = list(fixed = albumin ~ time, random = ~ time), alkaline = list(fixed = alkaline ~ time, random = ~ time)) # Build fixed data fixedData_train <- unique(pbc2_train[,c("id","age","drug","sex")]) # Build outcome data Y <- list(type = "surv", Y = unique(pbc2_train[,c("id","years","event")])) # Run dynforest function res_dyn <- dynforest(timeData = timeData_train, fixedData = fixedData_train, timeVar = "time", idVar = "id", timeVarModel = timeVarModel, Y = Y, ntree = 50, nodesize = 5, minsplit = 5, cause = 2, ncores = 2, seed = 1234) # Compute OOB error res_dyn_OOB <- compute_ooberror(dynforest_obj = res_dyn, ncores = 2)
data(pbc2) # Get Gaussian distribution for longitudinal predictors pbc2$serBilir <- log(pbc2$serBilir) pbc2$SGOT <- log(pbc2$SGOT) pbc2$albumin <- log(pbc2$albumin) pbc2$alkaline <- log(pbc2$alkaline) # Sample 100 subjects set.seed(1234) id <- unique(pbc2$id) id_sample <- sample(id, 100) id_row <- which(pbc2$id%in%id_sample) pbc2_train <- pbc2[id_row,] timeData_train <- pbc2_train[,c("id","time", "serBilir","SGOT", "albumin","alkaline")] # Create object with longitudinal association for each predictor timeVarModel <- list(serBilir = list(fixed = serBilir ~ time, random = ~ time), SGOT = list(fixed = SGOT ~ time + I(time^2), random = ~ time + I(time^2)), albumin = list(fixed = albumin ~ time, random = ~ time), alkaline = list(fixed = alkaline ~ time, random = ~ time)) # Build fixed data fixedData_train <- unique(pbc2_train[,c("id","age","drug","sex")]) # Build outcome data Y <- list(type = "surv", Y = unique(pbc2_train[,c("id","years","event")])) # Run dynforest function res_dyn <- dynforest(timeData = timeData_train, fixedData = fixedData_train, timeVar = "time", idVar = "id", timeVarModel = timeVarModel, Y = Y, ntree = 50, nodesize = 5, minsplit = 5, cause = 2, ncores = 2, seed = 1234) # Compute OOB error res_dyn_OOB <- compute_ooberror(dynforest_obj = res_dyn, ncores = 2)
Extract characteristics from the trees building process
compute_vardepth(dynforest_obj)
compute_vardepth(dynforest_obj)
dynforest_obj |
dynforest_obj |
compute_vardepth function return a list with the following elements:
min_depth |
A table providing for each feature in row: the average depth and the rank |
var_node_depth |
A table providing for each tree in column the minimal depth for each feature in row. NA indicates that the feature was not used for the corresponding tree |
var_count |
A table providing for each tree in column the number of times where the feature is used (in row). 0 value indicates that the feature was not used for the corresponding tree |
data(pbc2) # Get Gaussian distribution for longitudinal predictors pbc2$serBilir <- log(pbc2$serBilir) pbc2$SGOT <- log(pbc2$SGOT) pbc2$albumin <- log(pbc2$albumin) pbc2$alkaline <- log(pbc2$alkaline) # Sample 100 subjects set.seed(1234) id <- unique(pbc2$id) id_sample <- sample(id, 100) id_row <- which(pbc2$id%in%id_sample) pbc2_train <- pbc2[id_row,] timeData_train <- pbc2_train[,c("id","time", "serBilir","SGOT", "albumin","alkaline")] # Create object with longitudinal association for each predictor timeVarModel <- list(serBilir = list(fixed = serBilir ~ time, random = ~ time), SGOT = list(fixed = SGOT ~ time + I(time^2), random = ~ time + I(time^2)), albumin = list(fixed = albumin ~ time, random = ~ time), alkaline = list(fixed = alkaline ~ time, random = ~ time)) # Build fixed data fixedData_train <- unique(pbc2_train[,c("id","age","drug","sex")]) # Build outcome data Y <- list(type = "surv", Y = unique(pbc2_train[,c("id","years","event")])) # Run dynforest function res_dyn <- dynforest(timeData = timeData_train, fixedData = fixedData_train, timeVar = "time", idVar = "id", timeVarModel = timeVarModel, Y = Y, ntree = 50, nodesize = 5, minsplit = 5, cause = 2, ncores = 2, seed = 1234) # Run compute_vardepth function res_varDepth <- compute_vardepth(res_dyn)
data(pbc2) # Get Gaussian distribution for longitudinal predictors pbc2$serBilir <- log(pbc2$serBilir) pbc2$SGOT <- log(pbc2$SGOT) pbc2$albumin <- log(pbc2$albumin) pbc2$alkaline <- log(pbc2$alkaline) # Sample 100 subjects set.seed(1234) id <- unique(pbc2$id) id_sample <- sample(id, 100) id_row <- which(pbc2$id%in%id_sample) pbc2_train <- pbc2[id_row,] timeData_train <- pbc2_train[,c("id","time", "serBilir","SGOT", "albumin","alkaline")] # Create object with longitudinal association for each predictor timeVarModel <- list(serBilir = list(fixed = serBilir ~ time, random = ~ time), SGOT = list(fixed = SGOT ~ time + I(time^2), random = ~ time + I(time^2)), albumin = list(fixed = albumin ~ time, random = ~ time), alkaline = list(fixed = alkaline ~ time, random = ~ time)) # Build fixed data fixedData_train <- unique(pbc2_train[,c("id","age","drug","sex")]) # Build outcome data Y <- list(type = "surv", Y = unique(pbc2_train[,c("id","years","event")])) # Run dynforest function res_dyn <- dynforest(timeData = timeData_train, fixedData = fixedData_train, timeVar = "time", idVar = "id", timeVarModel = timeVarModel, Y = Y, ntree = 50, nodesize = 5, minsplit = 5, cause = 2, ncores = 2, seed = 1234) # Run compute_vardepth function res_varDepth <- compute_vardepth(res_dyn)
Compute the importance of variables (VIMP) statistic
compute_vimp( dynforest_obj, IBS.min = 0, IBS.max = NULL, ncores = NULL, seed = 1234 )
compute_vimp( dynforest_obj, IBS.min = 0, IBS.max = NULL, ncores = NULL, seed = 1234 )
dynforest_obj |
dynforest_obj |
IBS.min |
(Only with survival outcome) Minimal time to compute the Integrated Brier Score. Default value is set to 0. |
IBS.max |
(Only with survival outcome) Maximal time to compute the Integrated Brier Score. Default value is set to the maximal time-to-event found. |
ncores |
Number of cores used to grow trees in parallel. Default value is the number of cores of the computer-1. |
seed |
Seed to replicate results |
compute_vimp()
function returns a list with the following elements:
Inputs |
A list of 3 elements: Longitudinal , Numeric and Factor . Each element contains the names of the predictors |
Importance |
A list of 3 elements: Longitudinal , Numeric and Factor . Each element contains a numeric vector of VIMP statistic predictor in Inputs value |
tree_oob_err |
A numeric vector containing the OOB error for each tree needed to compute the VIMP statistic |
IBS.range |
A vector containing the IBS min and max |
data(pbc2) # Get Gaussian distribution for longitudinal predictors pbc2$serBilir <- log(pbc2$serBilir) pbc2$SGOT <- log(pbc2$SGOT) pbc2$albumin <- log(pbc2$albumin) pbc2$alkaline <- log(pbc2$alkaline) # Sample 100 subjects set.seed(1234) id <- unique(pbc2$id) id_sample <- sample(id, 100) id_row <- which(pbc2$id%in%id_sample) pbc2_train <- pbc2[id_row,] timeData_train <- pbc2_train[,c("id","time", "serBilir","SGOT", "albumin","alkaline")] # Create object with longitudinal association for each predictor timeVarModel <- list(serBilir = list(fixed = serBilir ~ time, random = ~ time), SGOT = list(fixed = SGOT ~ time + I(time^2), random = ~ time + I(time^2)), albumin = list(fixed = albumin ~ time, random = ~ time), alkaline = list(fixed = alkaline ~ time, random = ~ time)) # Build fixed data fixedData_train <- unique(pbc2_train[,c("id","age","drug","sex")]) # Build outcome data Y <- list(type = "surv", Y = unique(pbc2_train[,c("id","years","event")])) # Run dynforest function res_dyn <- dynforest(timeData = timeData_train, fixedData = fixedData_train, timeVar = "time", idVar = "id", timeVarModel = timeVarModel, Y = Y, ntree = 50, nodesize = 5, minsplit = 5, cause = 2, ncores = 2, seed = 1234) # Compute VIMP statistic res_dyn_VIMP <- compute_vimp(dynforest_obj = res_dyn, ncores = 2, seed = 1234)
data(pbc2) # Get Gaussian distribution for longitudinal predictors pbc2$serBilir <- log(pbc2$serBilir) pbc2$SGOT <- log(pbc2$SGOT) pbc2$albumin <- log(pbc2$albumin) pbc2$alkaline <- log(pbc2$alkaline) # Sample 100 subjects set.seed(1234) id <- unique(pbc2$id) id_sample <- sample(id, 100) id_row <- which(pbc2$id%in%id_sample) pbc2_train <- pbc2[id_row,] timeData_train <- pbc2_train[,c("id","time", "serBilir","SGOT", "albumin","alkaline")] # Create object with longitudinal association for each predictor timeVarModel <- list(serBilir = list(fixed = serBilir ~ time, random = ~ time), SGOT = list(fixed = SGOT ~ time + I(time^2), random = ~ time + I(time^2)), albumin = list(fixed = albumin ~ time, random = ~ time), alkaline = list(fixed = alkaline ~ time, random = ~ time)) # Build fixed data fixedData_train <- unique(pbc2_train[,c("id","age","drug","sex")]) # Build outcome data Y <- list(type = "surv", Y = unique(pbc2_train[,c("id","years","event")])) # Run dynforest function res_dyn <- dynforest(timeData = timeData_train, fixedData = fixedData_train, timeVar = "time", idVar = "id", timeVarModel = timeVarModel, Y = Y, ntree = 50, nodesize = 5, minsplit = 5, cause = 2, ncores = 2, seed = 1234) # Compute VIMP statistic res_dyn_VIMP <- compute_vimp(dynforest_obj = res_dyn, ncores = 2, seed = 1234)
Simulated dataset 1 with continuous outcome
Longitudinal dataset with 1200 rows and 13 columns for 200 subjects
Subject identifier
Time measurement
Continuous time-fixed predictor 1
Continuous time-fixed predictor 2
Binary time-fixed predictor 1
Binary time-fixed predictor 2
Continuous time-dependent predictor 1
Continuous time-dependent predictor 2
Continuous time-dependent predictor 3
Continuous time-dependent predictor 4
Continuous time-dependent predictor 5
Continuous time-dependent predictor 6
Continuous outcome
data(data_simu1)
data(data_simu1)
Simulated dataset 2 with continuous outcome
Longitudinal dataset with 1200 rows and 13 columns for 200 subjects
Subject identifier
Time measurement
Continuous time-fixed predictor 1
Continuous time-fixed predictor 2
Binary time-fixed predictor 1
Binary time-fixed predictor 2
Continuous time-dependent predictor 1
Continuous time-dependent predictor 2
Continuous time-dependent predictor 3
Continuous time-dependent predictor 4
Continuous time-dependent predictor 5
Continuous time-dependent predictor 6
Continuous outcome
data(data_simu2)
data(data_simu2)
Build a random forest using multivariate longitudinal endogenous covariates
dynforest( timeData = NULL, fixedData = NULL, idVar = NULL, timeVar = NULL, timeVarModel = NULL, Y = NULL, ntree = 200, mtry = NULL, nodesize = 1, minsplit = 2, cause = 1, nsplit_option = "quantile", ncores = NULL, seed = 1234, verbose = TRUE )
dynforest( timeData = NULL, fixedData = NULL, idVar = NULL, timeVar = NULL, timeVarModel = NULL, Y = NULL, ntree = 200, mtry = NULL, nodesize = 1, minsplit = 2, cause = 1, nsplit_option = "quantile", ncores = NULL, seed = 1234, verbose = TRUE )
timeData |
A data.frame containing the id and time measurements variables and the time-dependent predictors. |
fixedData |
A data.frame containing the id variable and the time-fixed predictors. Categorical variables should be characterized as factor. |
idVar |
A character indicating the name of variable to identify the subjects |
timeVar |
A character indicating the name of time variable |
timeVarModel |
A list for each time-dependent predictors containing a list of formula for fixed and random part from the mixed model |
Y |
A list of output which should contain: |
ntree |
Number of trees to grow. Default value set to 200. |
mtry |
Number of candidate variables randomly drawn at each node of the trees. This parameter should be tuned by minimizing the OOB error. Default is defined as the square root of the number of predictors. |
nodesize |
Minimal number of subjects required in both child nodes to split. Cannot be smaller than 1. |
minsplit |
(Only with survival outcome) Minimal number of events required to split the node. Cannot be smaller than 2. |
cause |
(Only with competing events) Number indicates the event of interest. |
nsplit_option |
A character indicates how the values are chosen to build the two groups for the splitting rule (only for continuous predictors). Values are chosen using deciles ( |
ncores |
Number of cores used to grow trees in parallel. Default value is the number of cores of the computer-1. |
seed |
Seed to replicate results |
verbose |
A logical controlling the function progress. Default is |
The function currently supports survival (competing or single event), continuous or categorical outcome.
FUTUR IMPLEMENTATIONS:
Continuous longitudinal outcome
Functional data analysis
dynforest function returns a list with the following elements:
data |
A list containing the data used to grow the trees |
rf |
A table with each tree in column. Provide multiple characteristics about the tree building |
type |
Outcome type |
times |
A numeric vector containing the time-to-event for all subjects |
cause |
Indicating the cause of interest |
causes |
A numeric vector containing the causes indicator |
Inputs |
A list of 3 elements: Longitudinal , Numeric and Factor . Each element contains the names of the predictors |
Longitudinal.model |
A list of longitudinal markers containing the formula used for modeling in the random forest |
param |
A list containing the hyperparameters |
comput.time |
Computation time |
Anthony Devaux ([email protected])
Devaux A., Helmer C., Genuer R., Proust-Lima C. (2023). Random survival forests with multivariate longitudinal endogenous covariates. SMMR doi:10.1177/09622802231206477
Devaux A., Proust-Lima C., Genuer R. (2023). Random Forests for time-fixed and time-dependent predictors: The DynForest R package. arXiv doi:10.48550/arXiv.2302.02670
summary.dynforest()
compute_ooberror()
compute_vimp()
compute_gvimp()
predict.dynforest()
plot.dynforest()
data(pbc2) # Get Gaussian distribution for longitudinal predictors pbc2$serBilir <- log(pbc2$serBilir) pbc2$SGOT <- log(pbc2$SGOT) pbc2$albumin <- log(pbc2$albumin) pbc2$alkaline <- log(pbc2$alkaline) # Sample 100 subjects set.seed(1234) id <- unique(pbc2$id) id_sample <- sample(id, 100) id_row <- which(pbc2$id%in%id_sample) pbc2_train <- pbc2[id_row,] timeData_train <- pbc2_train[,c("id","time", "serBilir","SGOT", "albumin","alkaline")] # Create object with longitudinal association for each predictor timeVarModel <- list(serBilir = list(fixed = serBilir ~ time, random = ~ time), SGOT = list(fixed = SGOT ~ time + I(time^2), random = ~ time + I(time^2)), albumin = list(fixed = albumin ~ time, random = ~ time), alkaline = list(fixed = alkaline ~ time, random = ~ time)) # Build fixed data fixedData_train <- unique(pbc2_train[,c("id","age","drug","sex")]) # Build outcome data Y <- list(type = "surv", Y = unique(pbc2_train[,c("id","years","event")])) # Run dynforest function res_dyn <- dynforest(timeData = timeData_train, fixedData = fixedData_train, timeVar = "time", idVar = "id", timeVarModel = timeVarModel, Y = Y, ntree = 50, nodesize = 5, minsplit = 5, cause = 2, ncores = 2, seed = 1234)
data(pbc2) # Get Gaussian distribution for longitudinal predictors pbc2$serBilir <- log(pbc2$serBilir) pbc2$SGOT <- log(pbc2$SGOT) pbc2$albumin <- log(pbc2$albumin) pbc2$alkaline <- log(pbc2$alkaline) # Sample 100 subjects set.seed(1234) id <- unique(pbc2$id) id_sample <- sample(id, 100) id_row <- which(pbc2$id%in%id_sample) pbc2_train <- pbc2[id_row,] timeData_train <- pbc2_train[,c("id","time", "serBilir","SGOT", "albumin","alkaline")] # Create object with longitudinal association for each predictor timeVarModel <- list(serBilir = list(fixed = serBilir ~ time, random = ~ time), SGOT = list(fixed = SGOT ~ time + I(time^2), random = ~ time + I(time^2)), albumin = list(fixed = albumin ~ time, random = ~ time), alkaline = list(fixed = alkaline ~ time, random = ~ time)) # Build fixed data fixedData_train <- unique(pbc2_train[,c("id","age","drug","sex")]) # Build outcome data Y <- list(type = "surv", Y = unique(pbc2_train[,c("id","years","event")])) # Run dynforest function res_dyn <- dynforest(timeData = timeData_train, fixedData = fixedData_train, timeVar = "time", idVar = "id", timeVarModel = timeVarModel, Y = Y, ntree = 50, nodesize = 5, minsplit = 5, cause = 2, ncores = 2, seed = 1234)
Extract some information about the split for a tree by user
get_tree(dynforest_obj, tree)
get_tree(dynforest_obj, tree)
dynforest_obj |
dynforest_obj |
tree |
Integer indicating the tree identifier |
A table sorted by the node/leaf identifier with each row representing a node/leaf. Each column provides information about the splits:
type |
The nature of the predictor (Longitudinal for longitudinal predictor, Numeric for continuous predictor or Factor for categorical predictor) if the node was split, Leaf otherwise |
var_split |
The predictor used for the split defined by its order in timeData and fixedData |
feature |
The feature used for the split defined by its position in random statistic |
threshold |
The threshold used for the split (only with Longitudinal and Numeric ). No information is returned for Factor |
N |
The number of subjects in the node/leaf |
Nevent |
The number of events of interest in the node/leaf (only with survival outcome) |
depth |
the depth level of the node/leaf |
data(pbc2) # Get Gaussian distribution for longitudinal predictors pbc2$serBilir <- log(pbc2$serBilir) pbc2$SGOT <- log(pbc2$SGOT) pbc2$albumin <- log(pbc2$albumin) pbc2$alkaline <- log(pbc2$alkaline) # Sample 100 subjects set.seed(1234) id <- unique(pbc2$id) id_sample <- sample(id, 100) id_row <- which(pbc2$id%in%id_sample) pbc2_train <- pbc2[id_row,] timeData_train <- pbc2_train[,c("id","time", "serBilir","SGOT", "albumin","alkaline")] # Create object with longitudinal association for each predictor timeVarModel <- list(serBilir = list(fixed = serBilir ~ time, random = ~ time), SGOT = list(fixed = SGOT ~ time + I(time^2), random = ~ time + I(time^2)), albumin = list(fixed = albumin ~ time, random = ~ time), alkaline = list(fixed = alkaline ~ time, random = ~ time)) # Build fixed data fixedData_train <- unique(pbc2_train[,c("id","age","drug","sex")]) # Build outcome data Y <- list(type = "surv", Y = unique(pbc2_train[,c("id","years","event")])) # Run dynforest function res_dyn <- dynforest(timeData = timeData_train, fixedData = fixedData_train, timeVar = "time", idVar = "id", timeVarModel = timeVarModel, Y = Y, ntree = 50, nodesize = 5, minsplit = 5, cause = 2, ncores = 2, seed = 1234) # Extract split information from tree 4 res_tree4 <- get_tree(dynforest_obj = res_dyn, tree = 4)
data(pbc2) # Get Gaussian distribution for longitudinal predictors pbc2$serBilir <- log(pbc2$serBilir) pbc2$SGOT <- log(pbc2$SGOT) pbc2$albumin <- log(pbc2$albumin) pbc2$alkaline <- log(pbc2$alkaline) # Sample 100 subjects set.seed(1234) id <- unique(pbc2$id) id_sample <- sample(id, 100) id_row <- which(pbc2$id%in%id_sample) pbc2_train <- pbc2[id_row,] timeData_train <- pbc2_train[,c("id","time", "serBilir","SGOT", "albumin","alkaline")] # Create object with longitudinal association for each predictor timeVarModel <- list(serBilir = list(fixed = serBilir ~ time, random = ~ time), SGOT = list(fixed = SGOT ~ time + I(time^2), random = ~ time + I(time^2)), albumin = list(fixed = albumin ~ time, random = ~ time), alkaline = list(fixed = alkaline ~ time, random = ~ time)) # Build fixed data fixedData_train <- unique(pbc2_train[,c("id","age","drug","sex")]) # Build outcome data Y <- list(type = "surv", Y = unique(pbc2_train[,c("id","years","event")])) # Run dynforest function res_dyn <- dynforest(timeData = timeData_train, fixedData = fixedData_train, timeVar = "time", idVar = "id", timeVarModel = timeVarModel, Y = Y, ntree = 50, nodesize = 5, minsplit = 5, cause = 2, ncores = 2, seed = 1234) # Extract split information from tree 4 res_tree4 <- get_tree(dynforest_obj = res_dyn, tree = 4)
Extract nodes identifiers for a given tree
get_treenodes(dynforest_obj, tree = NULL)
get_treenodes(dynforest_obj, tree = NULL)
dynforest_obj |
dynforest_obj |
tree |
Integer indicating the tree identifier |
Extract nodes identifiers for a given tree
data(pbc2) # Get Gaussian distribution for longitudinal predictors pbc2$serBilir <- log(pbc2$serBilir) pbc2$SGOT <- log(pbc2$SGOT) pbc2$albumin <- log(pbc2$albumin) pbc2$alkaline <- log(pbc2$alkaline) # Sample 100 subjects set.seed(1234) id <- unique(pbc2$id) id_sample <- sample(id, 100) id_row <- which(pbc2$id%in%id_sample) pbc2_train <- pbc2[id_row,] timeData_train <- pbc2_train[,c("id","time", "serBilir","SGOT", "albumin","alkaline")] # Create object with longitudinal association for each predictor timeVarModel <- list(serBilir = list(fixed = serBilir ~ time, random = ~ time), SGOT = list(fixed = SGOT ~ time + I(time^2), random = ~ time + I(time^2)), albumin = list(fixed = albumin ~ time, random = ~ time), alkaline = list(fixed = alkaline ~ time, random = ~ time)) # Build fixed data fixedData_train <- unique(pbc2_train[,c("id","age","drug","sex")]) # Build outcome data Y <- list(type = "surv", Y = unique(pbc2_train[,c("id","years","event")])) # Run dynforest function res_dyn <- dynforest(timeData = timeData_train, fixedData = fixedData_train, timeVar = "time", idVar = "id", timeVarModel = timeVarModel, Y = Y, ntree = 50, nodesize = 5, minsplit = 5, cause = 2, ncores = 2, seed = 1234) # Extract nodes identifiers for a given tree get_treenodes(dynforest_obj = res_dyn, tree = 1)
data(pbc2) # Get Gaussian distribution for longitudinal predictors pbc2$serBilir <- log(pbc2$serBilir) pbc2$SGOT <- log(pbc2$SGOT) pbc2$albumin <- log(pbc2$albumin) pbc2$alkaline <- log(pbc2$alkaline) # Sample 100 subjects set.seed(1234) id <- unique(pbc2$id) id_sample <- sample(id, 100) id_row <- which(pbc2$id%in%id_sample) pbc2_train <- pbc2[id_row,] timeData_train <- pbc2_train[,c("id","time", "serBilir","SGOT", "albumin","alkaline")] # Create object with longitudinal association for each predictor timeVarModel <- list(serBilir = list(fixed = serBilir ~ time, random = ~ time), SGOT = list(fixed = SGOT ~ time + I(time^2), random = ~ time + I(time^2)), albumin = list(fixed = albumin ~ time, random = ~ time), alkaline = list(fixed = alkaline ~ time, random = ~ time)) # Build fixed data fixedData_train <- unique(pbc2_train[,c("id","age","drug","sex")]) # Build outcome data Y <- list(type = "surv", Y = unique(pbc2_train[,c("id","years","event")])) # Run dynforest function res_dyn <- dynforest(timeData = timeData_train, fixedData = fixedData_train, timeVar = "time", idVar = "id", timeVarModel = timeVarModel, Y = Y, ntree = 50, nodesize = 5, minsplit = 5, cause = 2, ncores = 2, seed = 1234) # Extract nodes identifiers for a given tree get_treenodes(dynforest_obj = res_dyn, tree = 1)
pbc2 data from Mayo clinic
Longitudinal dataset with 1945 rows and 19 columns for 312 patients
Patient identifier
Time measurement
Presence of ascites (Yes/No)
Presence of hepatomegaly (Yes/No)
Blood vessel malformations in the skin (Yes/No)
Edema levels (No edema/edema no diuretics/edema despite diuretics)
Level of serum bilirubin
Level of serum cholesterol
Level of albumin
Level of alkaline phosphatase
Level of aspartate aminotransferase
Platelet count
Prothrombin time
Histologic stage of disease
Drug treatment (D-penicillmain/Placebo)
Age at enrollment
Sex of patient
Time-to-event in years
Event indicator: 0 (alive), 1 (transplanted) and 2 (dead)
pbc2 joineRML
data(pbc2)
data(pbc2)
This function displays a plot of CIF for a given node and tree (for class dynforest
), the most predictive variables with the minimal depth (for class dynforestvardepth
), the variable importance (for class dynforestvimp
) or the grouped variable importance (for class dynforestgvimp
).
## S3 method for class 'dynforest' plot(x, tree = NULL, nodes = NULL, id = NULL, max_tree = NULL, ...) ## S3 method for class 'dynforestvardepth' plot(x, plot_level = c("predictor", "feature"), ...) ## S3 method for class 'dynforestvimp' plot(x, PCT = FALSE, ordering = TRUE, ...) ## S3 method for class 'dynforestgvimp' plot(x, PCT = FALSE, ...) ## S3 method for class 'dynforestpred' plot(x, id = NULL, ...)
## S3 method for class 'dynforest' plot(x, tree = NULL, nodes = NULL, id = NULL, max_tree = NULL, ...) ## S3 method for class 'dynforestvardepth' plot(x, plot_level = c("predictor", "feature"), ...) ## S3 method for class 'dynforestvimp' plot(x, PCT = FALSE, ordering = TRUE, ...) ## S3 method for class 'dynforestgvimp' plot(x, PCT = FALSE, ...) ## S3 method for class 'dynforestpred' plot(x, id = NULL, ...)
x |
Object inheriting from classes |
tree |
For |
nodes |
For |
id |
For |
max_tree |
For |
... |
Optional parameters to be passed to the low level function |
plot_level |
For |
PCT |
For |
ordering |
For |
plot()
function displays:
With dynforestvardepth |
the minimal depth for each predictor/feature |
With dynforestvimp |
the VIMP for each predictor |
With dynforestgvimp |
the grouped-VIMP for each given group |
dynforest()
compute_ooberror()
compute_vimp()
compute_gvimp()
compute_vardepth()
data(pbc2) # Get Gaussian distribution for longitudinal predictors pbc2$serBilir <- log(pbc2$serBilir) pbc2$SGOT <- log(pbc2$SGOT) pbc2$albumin <- log(pbc2$albumin) pbc2$alkaline <- log(pbc2$alkaline) # Sample 100 subjects set.seed(1234) id <- unique(pbc2$id) id_sample <- sample(id, 100) id_row <- which(pbc2$id%in%id_sample) pbc2_train <- pbc2[id_row,] timeData_train <- pbc2_train[,c("id","time", "serBilir","SGOT", "albumin","alkaline")] # Create object with longitudinal association for each predictor timeVarModel <- list(serBilir = list(fixed = serBilir ~ time, random = ~ time), SGOT = list(fixed = SGOT ~ time + I(time^2), random = ~ time + I(time^2)), albumin = list(fixed = albumin ~ time, random = ~ time), alkaline = list(fixed = alkaline ~ time, random = ~ time)) # Build fixed data fixedData_train <- unique(pbc2_train[,c("id","age","drug","sex")]) # Build outcome data Y <- list(type = "surv", Y = unique(pbc2_train[,c("id","years","event")])) # Run dynforest function res_dyn <- dynforest(timeData = timeData_train, fixedData = fixedData_train, timeVar = "time", idVar = "id", timeVarModel = timeVarModel, Y = Y, ntree = 50, nodesize = 5, minsplit = 5, cause = 2, ncores = 2, seed = 1234) # Plot estimated CIF at nodes 17 and 32 plot(x = res_dyn, tree = 1, nodes = c(17,32)) # Run var_depth function res_varDepth <- compute_vardepth(res_dyn) # Plot minimal depth plot(x = res_varDepth, plot_level = "feature") # Compute VIMP statistic res_dyn_VIMP <- compute_vimp(dynforest_obj = res_dyn, ncores = 2) # Plot VIMP plot(x = res_dyn_VIMP, PCT = TRUE) # Compute gVIMP statistic res_dyn_gVIMP <- compute_gvimp(dynforest_obj = res_dyn, group = list(group1 = c("serBilir","SGOT"), group2 = c("albumin","alkaline")), ncores = 2) # Plot gVIMP plot(x = res_dyn_gVIMP, PCT = TRUE) # Sample 5 subjects to predict the event set.seed(123) id_pred <- sample(id, 5) # Create predictors objects pbc2_pred <- pbc2[which(pbc2$id%in%id_pred),] timeData_pred <- pbc2_pred[,c("id", "time", "serBilir", "SGOT", "albumin", "alkaline")] fixedData_pred <- unique(pbc2_pred[,c("id","age","drug","sex")]) # Predict the CIF function for the new subjects with landmark time at 4 years pred_dyn <- predict(object = res_dyn, timeData = timeData_pred, fixedData = fixedData_pred, idVar = "id", timeVar = "time", t0 = 4) # Plot predicted CIF for subjects 26 and 110 plot(x = pred_dyn, id = c(26, 110))
data(pbc2) # Get Gaussian distribution for longitudinal predictors pbc2$serBilir <- log(pbc2$serBilir) pbc2$SGOT <- log(pbc2$SGOT) pbc2$albumin <- log(pbc2$albumin) pbc2$alkaline <- log(pbc2$alkaline) # Sample 100 subjects set.seed(1234) id <- unique(pbc2$id) id_sample <- sample(id, 100) id_row <- which(pbc2$id%in%id_sample) pbc2_train <- pbc2[id_row,] timeData_train <- pbc2_train[,c("id","time", "serBilir","SGOT", "albumin","alkaline")] # Create object with longitudinal association for each predictor timeVarModel <- list(serBilir = list(fixed = serBilir ~ time, random = ~ time), SGOT = list(fixed = SGOT ~ time + I(time^2), random = ~ time + I(time^2)), albumin = list(fixed = albumin ~ time, random = ~ time), alkaline = list(fixed = alkaline ~ time, random = ~ time)) # Build fixed data fixedData_train <- unique(pbc2_train[,c("id","age","drug","sex")]) # Build outcome data Y <- list(type = "surv", Y = unique(pbc2_train[,c("id","years","event")])) # Run dynforest function res_dyn <- dynforest(timeData = timeData_train, fixedData = fixedData_train, timeVar = "time", idVar = "id", timeVarModel = timeVarModel, Y = Y, ntree = 50, nodesize = 5, minsplit = 5, cause = 2, ncores = 2, seed = 1234) # Plot estimated CIF at nodes 17 and 32 plot(x = res_dyn, tree = 1, nodes = c(17,32)) # Run var_depth function res_varDepth <- compute_vardepth(res_dyn) # Plot minimal depth plot(x = res_varDepth, plot_level = "feature") # Compute VIMP statistic res_dyn_VIMP <- compute_vimp(dynforest_obj = res_dyn, ncores = 2) # Plot VIMP plot(x = res_dyn_VIMP, PCT = TRUE) # Compute gVIMP statistic res_dyn_gVIMP <- compute_gvimp(dynforest_obj = res_dyn, group = list(group1 = c("serBilir","SGOT"), group2 = c("albumin","alkaline")), ncores = 2) # Plot gVIMP plot(x = res_dyn_gVIMP, PCT = TRUE) # Sample 5 subjects to predict the event set.seed(123) id_pred <- sample(id, 5) # Create predictors objects pbc2_pred <- pbc2[which(pbc2$id%in%id_pred),] timeData_pred <- pbc2_pred[,c("id", "time", "serBilir", "SGOT", "albumin", "alkaline")] fixedData_pred <- unique(pbc2_pred[,c("id","age","drug","sex")]) # Predict the CIF function for the new subjects with landmark time at 4 years pred_dyn <- predict(object = res_dyn, timeData = timeData_pred, fixedData = fixedData_pred, idVar = "id", timeVar = "time", t0 = 4) # Plot predicted CIF for subjects 26 and 110 plot(x = pred_dyn, id = c(26, 110))
Prediction using dynamic random forests
## S3 method for class 'dynforest' predict( object, timeData = NULL, fixedData = NULL, idVar, timeVar, t0 = NULL, ... )
## S3 method for class 'dynforest' predict( object, timeData = NULL, fixedData = NULL, idVar, timeVar, t0 = NULL, ... )
object |
|
timeData |
A data.frame containing the id and time measurements variables and the time-dependent predictors. |
fixedData |
A data.frame containing the id variable and the time-fixed predictors. Non-continuous variables should be characterized as factor. |
idVar |
A character indicating the name of variable to identify the subjects |
timeVar |
A character indicating the name of time variable |
t0 |
Landmark time |
... |
Optional parameters to be passed to the low level function |
Return the outcome of interest for the new subjects: matrix of probability of event of interest in survival mode, average value in regression mode and most likely value in classification mode
data(pbc2) # Get Gaussian distribution for longitudinal predictors pbc2$serBilir <- log(pbc2$serBilir) pbc2$SGOT <- log(pbc2$SGOT) pbc2$albumin <- log(pbc2$albumin) pbc2$alkaline <- log(pbc2$alkaline) # Sample 100 subjects set.seed(1234) id <- unique(pbc2$id) id_sample <- sample(id, 100) id_row <- which(pbc2$id%in%id_sample) pbc2_train <- pbc2[id_row,] timeData_train <- pbc2_train[,c("id","time", "serBilir","SGOT", "albumin","alkaline")] # Create object with longitudinal association for each predictor timeVarModel <- list(serBilir = list(fixed = serBilir ~ time, random = ~ time), SGOT = list(fixed = SGOT ~ time + I(time^2), random = ~ time + I(time^2)), albumin = list(fixed = albumin ~ time, random = ~ time), alkaline = list(fixed = alkaline ~ time, random = ~ time)) # Build fixed data fixedData_train <- unique(pbc2_train[,c("id","age","drug","sex")]) # Build outcome data Y <- list(type = "surv", Y = unique(pbc2_train[,c("id","years","event")])) # Run dynforest function res_dyn <- dynforest(timeData = timeData_train, fixedData = fixedData_train, timeVar = "time", idVar = "id", timeVarModel = timeVarModel, Y = Y, ntree = 50, nodesize = 5, minsplit = 5, cause = 2, ncores = 2, seed = 1234) # Sample 5 subjects to predict the event set.seed(123) id_pred <- sample(id, 5) # Create predictors objects pbc2_pred <- pbc2[which(pbc2$id%in%id_pred),] timeData_pred <- pbc2_pred[,c("id", "time", "serBilir", "SGOT", "albumin", "alkaline")] fixedData_pred <- unique(pbc2_pred[,c("id","age","drug","sex")]) # Predict the CIF function for the new subjects with landmark time at 4 years pred_dyn <- predict(object = res_dyn, timeData = timeData_pred, fixedData = fixedData_pred, idVar = "id", timeVar = "time", t0 = 4)
data(pbc2) # Get Gaussian distribution for longitudinal predictors pbc2$serBilir <- log(pbc2$serBilir) pbc2$SGOT <- log(pbc2$SGOT) pbc2$albumin <- log(pbc2$albumin) pbc2$alkaline <- log(pbc2$alkaline) # Sample 100 subjects set.seed(1234) id <- unique(pbc2$id) id_sample <- sample(id, 100) id_row <- which(pbc2$id%in%id_sample) pbc2_train <- pbc2[id_row,] timeData_train <- pbc2_train[,c("id","time", "serBilir","SGOT", "albumin","alkaline")] # Create object with longitudinal association for each predictor timeVarModel <- list(serBilir = list(fixed = serBilir ~ time, random = ~ time), SGOT = list(fixed = SGOT ~ time + I(time^2), random = ~ time + I(time^2)), albumin = list(fixed = albumin ~ time, random = ~ time), alkaline = list(fixed = alkaline ~ time, random = ~ time)) # Build fixed data fixedData_train <- unique(pbc2_train[,c("id","age","drug","sex")]) # Build outcome data Y <- list(type = "surv", Y = unique(pbc2_train[,c("id","years","event")])) # Run dynforest function res_dyn <- dynforest(timeData = timeData_train, fixedData = fixedData_train, timeVar = "time", idVar = "id", timeVarModel = timeVarModel, Y = Y, ntree = 50, nodesize = 5, minsplit = 5, cause = 2, ncores = 2, seed = 1234) # Sample 5 subjects to predict the event set.seed(123) id_pred <- sample(id, 5) # Create predictors objects pbc2_pred <- pbc2[which(pbc2$id%in%id_pred),] timeData_pred <- pbc2_pred[,c("id", "time", "serBilir", "SGOT", "albumin", "alkaline")] fixedData_pred <- unique(pbc2_pred[,c("id","age","drug","sex")]) # Predict the CIF function for the new subjects with landmark time at 4 years pred_dyn <- predict(object = res_dyn, timeData = timeData_pred, fixedData = fixedData_pred, idVar = "id", timeVar = "time", t0 = 4)
This function displays a brief summary regarding the trees (for class dynforest
), a data frame with variable importance (for class dynforestvimp
) or the grouped variable importance (for class dynforestgvimp
).
## S3 method for class 'dynforest' print(x, ...) ## S3 method for class 'dynforestvimp' print(x, ...) ## S3 method for class 'dynforestgvimp' print(x, ...) ## S3 method for class 'dynforestvardepth' print(x, ...) ## S3 method for class 'dynforestoob' print(x, ...) ## S3 method for class 'dynforestpred' print(x, ...)
## S3 method for class 'dynforest' print(x, ...) ## S3 method for class 'dynforestvimp' print(x, ...) ## S3 method for class 'dynforestgvimp' print(x, ...) ## S3 method for class 'dynforestvardepth' print(x, ...) ## S3 method for class 'dynforestoob' print(x, ...) ## S3 method for class 'dynforestpred' print(x, ...)
x |
Object inheriting from classes |
... |
Optional parameters to be passed to the low level function |
dynforest()
compute_ooberror()
compute_vimp()
compute_gvimp()
compute_vardepth()
predict.dynforest()
data(pbc2) # Get Gaussian distribution for longitudinal predictors pbc2$serBilir <- log(pbc2$serBilir) pbc2$SGOT <- log(pbc2$SGOT) pbc2$albumin <- log(pbc2$albumin) pbc2$alkaline <- log(pbc2$alkaline) # Sample 100 subjects set.seed(1234) id <- unique(pbc2$id) id_sample <- sample(id, 100) id_row <- which(pbc2$id%in%id_sample) pbc2_train <- pbc2[id_row,] timeData_train <- pbc2_train[,c("id","time", "serBilir","SGOT", "albumin","alkaline")] # Create object with longitudinal association for each predictor timeVarModel <- list(serBilir = list(fixed = serBilir ~ time, random = ~ time), SGOT = list(fixed = SGOT ~ time + I(time^2), random = ~ time + I(time^2)), albumin = list(fixed = albumin ~ time, random = ~ time), alkaline = list(fixed = alkaline ~ time, random = ~ time)) # Build fixed data fixedData_train <- unique(pbc2_train[,c("id","age","drug","sex")]) # Build outcome data Y <- list(type = "surv", Y = unique(pbc2_train[,c("id","years","event")])) # Run dynforest function res_dyn <- dynforest(timeData = timeData_train, fixedData = fixedData_train, timeVar = "time", idVar = "id", timeVarModel = timeVarModel, Y = Y, ntree = 50, nodesize = 5, minsplit = 5, cause = 2, ncores = 2, seed = 1234) # Print function print(res_dyn) # Compute VIMP statistic res_dyn_VIMP <- compute_vimp(dynforest_obj = res_dyn, ncores = 2, seed = 1234) # Print function print(res_dyn_VIMP) # Compute gVIMP statistic res_dyn_gVIMP <- compute_gvimp(dynforest_obj = res_dyn, group = list(group1 = c("serBilir","SGOT"), group2 = c("albumin","alkaline")), ncores = 2, seed = 1234) # Print function print(res_dyn_gVIMP) # Run var_depth function res_varDepth <- compute_vardepth(res_dyn) # Print function print(res_varDepth)
data(pbc2) # Get Gaussian distribution for longitudinal predictors pbc2$serBilir <- log(pbc2$serBilir) pbc2$SGOT <- log(pbc2$SGOT) pbc2$albumin <- log(pbc2$albumin) pbc2$alkaline <- log(pbc2$alkaline) # Sample 100 subjects set.seed(1234) id <- unique(pbc2$id) id_sample <- sample(id, 100) id_row <- which(pbc2$id%in%id_sample) pbc2_train <- pbc2[id_row,] timeData_train <- pbc2_train[,c("id","time", "serBilir","SGOT", "albumin","alkaline")] # Create object with longitudinal association for each predictor timeVarModel <- list(serBilir = list(fixed = serBilir ~ time, random = ~ time), SGOT = list(fixed = SGOT ~ time + I(time^2), random = ~ time + I(time^2)), albumin = list(fixed = albumin ~ time, random = ~ time), alkaline = list(fixed = alkaline ~ time, random = ~ time)) # Build fixed data fixedData_train <- unique(pbc2_train[,c("id","age","drug","sex")]) # Build outcome data Y <- list(type = "surv", Y = unique(pbc2_train[,c("id","years","event")])) # Run dynforest function res_dyn <- dynforest(timeData = timeData_train, fixedData = fixedData_train, timeVar = "time", idVar = "id", timeVarModel = timeVarModel, Y = Y, ntree = 50, nodesize = 5, minsplit = 5, cause = 2, ncores = 2, seed = 1234) # Print function print(res_dyn) # Compute VIMP statistic res_dyn_VIMP <- compute_vimp(dynforest_obj = res_dyn, ncores = 2, seed = 1234) # Print function print(res_dyn_VIMP) # Compute gVIMP statistic res_dyn_gVIMP <- compute_gvimp(dynforest_obj = res_dyn, group = list(group1 = c("serBilir","SGOT"), group2 = c("albumin","alkaline")), ncores = 2, seed = 1234) # Print function print(res_dyn_gVIMP) # Run var_depth function res_varDepth <- compute_vardepth(res_dyn) # Print function print(res_varDepth)
Display the summary of dynforest
## S3 method for class 'dynforest' summary(object, ...) ## S3 method for class 'dynforestoob' summary(object, ...)
## S3 method for class 'dynforest' summary(object, ...) ## S3 method for class 'dynforestoob' summary(object, ...)
object |
|
... |
Optional parameters to be passed to the low level function |
Return some information about the random forest
data(pbc2) # Get Gaussian distribution for longitudinal predictors pbc2$serBilir <- log(pbc2$serBilir) pbc2$SGOT <- log(pbc2$SGOT) pbc2$albumin <- log(pbc2$albumin) pbc2$alkaline <- log(pbc2$alkaline) # Sample 100 subjects set.seed(1234) id <- unique(pbc2$id) id_sample <- sample(id, 100) id_row <- which(pbc2$id%in%id_sample) pbc2_train <- pbc2[id_row,] timeData_train <- pbc2_train[,c("id","time", "serBilir","SGOT", "albumin","alkaline")] # Create object with longitudinal association for each predictor timeVarModel <- list(serBilir = list(fixed = serBilir ~ time, random = ~ time), SGOT = list(fixed = SGOT ~ time + I(time^2), random = ~ time + I(time^2)), albumin = list(fixed = albumin ~ time, random = ~ time), alkaline = list(fixed = alkaline ~ time, random = ~ time)) # Build fixed data fixedData_train <- unique(pbc2_train[,c("id","age","drug","sex")]) # Build outcome data Y <- list(type = "surv", Y = unique(pbc2_train[,c("id","years","event")])) # Run dynforest function res_dyn <- dynforest(timeData = timeData_train, fixedData = fixedData_train, timeVar = "time", idVar = "id", timeVarModel = timeVarModel, Y = Y, ntree = 50, nodesize = 5, minsplit = 5, cause = 2, ncores = 2, seed = 1234) # Compute OOB error res_dyn_OOB <- compute_ooberror(dynforest_obj = res_dyn, ncores = 2) # dynforest summary summary(object = res_dyn_OOB)
data(pbc2) # Get Gaussian distribution for longitudinal predictors pbc2$serBilir <- log(pbc2$serBilir) pbc2$SGOT <- log(pbc2$SGOT) pbc2$albumin <- log(pbc2$albumin) pbc2$alkaline <- log(pbc2$alkaline) # Sample 100 subjects set.seed(1234) id <- unique(pbc2$id) id_sample <- sample(id, 100) id_row <- which(pbc2$id%in%id_sample) pbc2_train <- pbc2[id_row,] timeData_train <- pbc2_train[,c("id","time", "serBilir","SGOT", "albumin","alkaline")] # Create object with longitudinal association for each predictor timeVarModel <- list(serBilir = list(fixed = serBilir ~ time, random = ~ time), SGOT = list(fixed = SGOT ~ time + I(time^2), random = ~ time + I(time^2)), albumin = list(fixed = albumin ~ time, random = ~ time), alkaline = list(fixed = alkaline ~ time, random = ~ time)) # Build fixed data fixedData_train <- unique(pbc2_train[,c("id","age","drug","sex")]) # Build outcome data Y <- list(type = "surv", Y = unique(pbc2_train[,c("id","years","event")])) # Run dynforest function res_dyn <- dynforest(timeData = timeData_train, fixedData = fixedData_train, timeVar = "time", idVar = "id", timeVarModel = timeVarModel, Y = Y, ntree = 50, nodesize = 5, minsplit = 5, cause = 2, ncores = 2, seed = 1234) # Compute OOB error res_dyn_OOB <- compute_ooberror(dynforest_obj = res_dyn, ncores = 2) # dynforest summary summary(object = res_dyn_OOB)