Fused lasso for high-dimensional regression over groups. This package implements the model described in Dondelinger et al. (2016).
library('devtools')
install_github('FrankD/fuser')
See also the included vignette.
library(fuser)
set.seed(123)
# Generate simple heterogeneous dataset
= 4 # number of groups
k = 100 # number of covariates
p = 15 # number of samples per group
n.group = 0.05 # observation noise sd
sigma = rep(1:k, each=n.group) # group indicators
groups
# sparse linear coefficients
= matrix(0, p, k)
beta = rbinom(p*k, 1, 0.025/k) # Independent coefficients
nonzero.ind = rbinom(p, 1, 0.025) # shared coefficients
nonzero.shared which(nonzero.ind==1)] = rnorm(sum(nonzero.ind), 1, 0.25)
beta[which(nonzero.shared==1),] = rnorm(sum(nonzero.shared), -1, 0.25)
beta[
= lapply(1:k, function(k.i) matrix(rnorm(n.group*p),n.group, p)) # covariates
X = sapply(1:k, function(k.i) X[[k.i]] %*% beta[,k.i] + rnorm(n.group, 0, sigma)) # response
y = do.call('rbind', X)
X
# Pairwise Fusion strength hyperparameters (tau(k,k'))
# Same for all pairs in this example
= matrix(1, k, k)
G
# Use L1 fusion to estimate betas (with near-optimal sparsity and
# information sharing among groups)
= fusedLassoProximal(X, y, groups, lambda=0.001, tol=9e-5,
beta.estimate gamma=0.001, G, intercept=FALSE,
num.it=2000)
# Generate block diagonal matrices for L2 fusion approach
= generateBlockDiagonalMatrices(X, y, groups, G)
transformed.data
# Use L2 fusion to estimate betas (with near-optimal information sharing among groups)
= fusedL2DescentGLMNet(transformed.data$X, transformed.data$X.fused,
beta.estimate $Y, groups, lambda=c(0,0.001,0.1,1),
transformed.datagamma=0.001)