# SplitBregman: enforce sparsity

Authors
Affiliations
Francesco Picetti
Image and Sound Processing Lab, Politecnico di Milano
Ettore Biondi
SeismoLab, California Institute of Technology

# The Split-Bregman Algorithm: Sparsity Enforcing Inversions¶

@Author: Francesco Picetti - picettifrancesco@gmail.com

In this notebook we will show the Split-Bregman algorithm, as described by .

In its generalized unconstrained formulation (the same we handle in this library), this algorithm takes what we call GeneralizedLasso (see occamypy.problem module):

$$$\arg \min_\mathbf{x} \Vert \mathbf{A} \mathbf{x} - \mathbf{d}\Vert_2^2 + \varepsilon \Vert \mathbf{R}_{L1} \mathbf{x}\Vert_1,$$$

being $\mathbf{x}$ our model, $\mathbf{d}$ the observed data, $\mathbf{A}$ a linear modeling operator, $\mathbf{R}_{L1}$ the sparsity promoting regularizer and its weight $\varepsilon$.

#### Import modules¶

# Importing necessary modules
import numpy as np
import occamypy as o

# Plotting
from matplotlib import rcParams
from mpl_toolkits.axes_grid1 import make_axes_locatable
import matplotlib.pyplot as plt
rcParams.update({
'image.cmap'     : 'gray',
'image.aspect'   : 'auto',
'image.interpolation': None,
'axes.grid'      : True,
'figure.figsize' : (12, 3),
'savefig.dpi'    : 300,
'axes.labelsize' : 14,
'axes.titlesize' : 16,
'font.size'      : 14,
'legend.fontsize': 14,
'xtick.labelsize': 14,
'ytick.labelsize': 14,
'text.usetex'    : True,
'font.family'    : 'serif',
'font.serif'     : 'Latin Modern Roman',
})

## Example 1: 1D Ricker Deconvolution¶

In this example we want to invert a 1D seismic trace for the Earth's reflectivity. We suppose the wavelet to be known, yielding to a deterministic deconvolution problem.

# load wavelet
wav = o.VectorNumpy(np.load("./data/ricker20.npy"), ax_info=[o.AxInfo(n=51, o=0., d=0.002, l="time [s]")])

x = o.VectorNumpy(np.load("./data/reflectivity1D.npy"), ax_info=[o.AxInfo(n=1000, o=0., d=0.002, l="time [s]")])

# instantating operator
W = o.ConvND(x, wav)

# Generating true recorded trace
d = W * x

# Compute spectra
nfft = 4096
x_spectra = np.fft.fft(x.getNdArray(), nfft)
d_spectra = np.fft.fft(d.getNdArray(), nfft)
w_spectra = np.fft.fft(wav.getNdArray(), nfft)
freq = np.fft.fftfreq(nfft, wav.ax_info[0].d)
time_axis = x.ax_info[0].plot()
xlim = x.ax_info[0].o, x.ax_info[0].last
xlbl = x.ax_info[0].l
fig, ax = plt.subplots()
plt.plot(freq[:nfft//4], np.abs(x_spectra)[:nfft//4], 'k',  label='model')
plt.plot(freq[:nfft//4], np.abs(d_spectra)[:nfft//4], 'b',  label='data')
plt.legend(loc='upper right')
plt.xlabel("Frequency [Hz]"), plt.ylabel("Amplitude")
plt.xlim(0,100), plt.ylim(0,30)
plt.show()

fig, ax = plt.subplots(1,2)
ax[0].plot(wav.ax_info[0].plot(), wav.plot())
ax[0].set_title('Wavelet')
ax[0].set_xlabel("Time [s]"), ax[0].set_ylabel("Amplitude")
ax[0].set_xlim(0,0.1), ax[0].set_ylim(-0.5,1)

ax[1].plot(freq[:nfft//4], np.abs(w_spectra)[:nfft//4])
ax[1].set_title('Wavelet Spectrum')
ax[1].set_xlabel("Frequency [Hz]"), ax[1].set_ylabel("Amplitude")
ax[1].set_xlim(0,100), ax[1].set_ylim(0,12)
plt.show()
fig, ax = plt.subplots()
ax.plot(time_axis, x.plot(), 'k',  label='model')
ax.plot(time_axis, d.plot(), 'b',  label='data')
ax.legend(loc='upper left')
ax.set_ylim(-1., 1.), ax.set_xlim(*xlim)
ax.set_xlabel(xlbl), ax.set_ylabel("Amplitude")

axins1 = ax.inset_axes([0.42, 0.55, 0.1, 0.4])
axins1.plot(time_axis, x.plot(), 'k')
axins1.plot(time_axis, d.plot(), 'b')
axins1.set_xlim(0.76, 0.8)
axins1.set_ylim(-.1, .1)
axins1.set_xticklabels('')
axins1.set_yticklabels('')
ax.indicate_inset_zoom(axins1)

axins2 = ax.inset_axes([0.55, 0.55, 0.1, 0.4])
axins2.plot(time_axis, x.plot(), 'k')
axins2.plot(time_axis, d.plot(), 'b')
axins2.set_xlim(1.51, 1.59)
axins2.set_ylim(-.1, .1)
axins2.set_xticklabels('')
axins2.set_yticklabels('')
ax.indicate_inset_zoom(axins2)

axins3 = ax.inset_axes([0.49, 0.05, 0.1, 0.4])
axins3.plot(time_axis, x.plot(), 'k')
axins3.plot(time_axis, d.plot(), 'b')
axins3.set_xlim(1.0, 1.2)
axins3.set_ylim(-.1, .1)
axins3.set_xticklabels('')
axins3.set_yticklabels('')
ax.indicate_inset_zoom(axins3)

plt.suptitle("Model and Data")
plt.show()

Conjugate gradient is a fast and powerful adjoint-state algorithm for solving problems in the form:

$$$\hat{\mathbf{x}} = \arg \min_\mathbf{x} \Vert \mathbf{W}\mathbf{x} - \mathbf{d}\Vert_2^2$$$
# instantiate a linear L2 problem
problemCG = o.LeastSquares(x.clone().zero(), d, W)

# define the solver with 10'000 iterations
CG = o.CG(o.BasicStopper(10000))
CG.setDefaults(save_obj=True)

# solve the problem
CG.run(problemCG, verbose=False)
fig, ax = plt.subplots()
ax.plot(time_axis, x.plot(), 'b', lw=2, label='true model')
ax.plot(time_axis, problemCG.model.plot(), 'r',  label='CG')
ax.legend(loc='upper left')
ax.set_ylim(-1., 1.), ax.set_xlim(*xlim)
ax.set_xlabel(xlbl), ax.set_ylabel("Amplitude")

axins1 = ax.inset_axes([0.42, 0.55, 0.1, 0.4])
axins1.plot(time_axis, x.plot(), 'b', lw=2)
axins1.plot(time_axis, problemCG.model.plot(), 'r')
axins1.set_xlim(0.76, 0.8)
axins1.set_ylim(-.1, .1)
axins1.set_xticklabels('')
axins1.set_yticklabels('')
ax.indicate_inset_zoom(axins1)

axins2 = ax.inset_axes([0.55, 0.55, 0.1, 0.4])
axins2.plot(time_axis, x.plot(), 'b', lw=2)
axins2.plot(time_axis, problemCG.model.plot(), 'r')
axins2.set_xlim(1.51, 1.59)
axins2.set_ylim(-.1, .1)
axins2.set_xticklabels('')
axins2.set_yticklabels('')
ax.indicate_inset_zoom(axins2)

axins3 = ax.inset_axes([0.49, 0.05, 0.1, 0.4])
axins3.plot(time_axis, x.plot(), 'b', lw=2)
axins3.plot(time_axis, problemCG.model.plot(), 'r')
axins3.set_xlim(1.0, 1.2)
axins3.set_ylim(-.1, .1)
axins3.set_xticklabels('')
axins3.set_yticklabels('')
ax.indicate_inset_zoom(axins3)

plt.suptitle('Conjugate Gradient, %d it' % len(CG.obj))
plt.show()

#### Impose sparsity in the solution: FISTA¶

Iterative Shrinkage-Thresholding Algorithms solve the so-called LASSO problem:

$$$\hat{\mathbf{x}} = \arg \min_\mathbf{x} \Vert \mathbf{W}\mathbf{x} - \mathbf{d}\Vert_2^2 + \lambda \Vert \mathbf{x}\Vert_1$$$

If we provide the operator's maximum eigenvalue $\eta$ we can build a fast version of this algorithm by imposing the step $\alpha < 1/\eta^2$ as proposed in .

compute maximum eigenvalue

maxeig = W.powerMethod()
print('FISTA α=%.6e' % (1/(maxeig**2)))
# define the LASSO problem
problemFISTA = o.Lasso(x.clone().zero(), d, W, lambda_value=1, op_norm=maxeig**2)

# instantiate the fast solver with 1000 iterations
FISTA = o.ISTA(o.BasicStopper(10000), fast=True)
FISTA.setDefaults(save_obj=True)

# solve the problem
FISTA.run(problemFISTA, verbose=False)
fig, ax = plt.subplots()
ax.plot(time_axis, x.plot(), 'b', lw=2, label='true model')
ax.plot(time_axis, problemFISTA.model.plot(), 'r',  label='FISTA')
ax.legend(loc='upper left')
ax.set_ylim(-1., 1.), ax.set_xlim(*xlim)
ax.set_xlabel(xlbl), ax.set_ylabel("Amplitude")

axins1 = ax.inset_axes([0.42, 0.55, 0.1, 0.4])
axins1.plot(time_axis, x.plot(), 'b', lw=2)
axins1.plot(time_axis, problemFISTA.model.plot(), 'r')
axins1.set_xlim(0.76, 0.8)
axins1.set_ylim(-.1, .1)
axins1.set_xticklabels('')
axins1.set_yticklabels('')
ax.indicate_inset_zoom(axins1)

axins2 = ax.inset_axes([0.55, 0.55, 0.1, 0.4])
axins2.plot(time_axis, x.plot(), 'b', lw=2)
axins2.plot(time_axis, problemFISTA.model.plot(), 'r')
axins2.set_xlim(1.51, 1.59)
axins2.set_ylim(-.1, .1)
axins2.set_xticklabels('')
axins2.set_yticklabels('')
ax.indicate_inset_zoom(axins2)

axins3 = ax.inset_axes([0.49, 0.05, 0.1, 0.4])
axins3.plot(time_axis, x.plot(), 'b', lw=2)
axins3.plot(time_axis, problemFISTA.model.plot(), 'r')
axins3.set_xlim(1.0, 1.2)
axins3.set_ylim(-.1, .1)
axins3.set_xticklabels('')
axins3.set_yticklabels('')
ax.indicate_inset_zoom(axins3)

plt.suptitle(r'Fast-ISTA, $\lambda=$%.e, %d it' % (problemFISTA.lambda_value, len(FISTA.obj)))
plt.show()

Note that FISTA is able to recover the model's kinematics but not the true amplitude. Indeed, this algorithm is very sensitive to $\lambda$ parameter. Let's try another solver.

#### Split-Bregman for LASSO problem¶

# define the Linear Regularized Problem
problemSB = o.GeneralizedLasso(x.clone().zero(), d, W, reg=o.Identity(x), eps=.1)

# instantiate the Split-Bregman solver
SB = o.SplitBregman(o.BasicStopper(1000), niter_inner=3, niter_solver=5,
linear_solver='LSQR', breg_weight=1., warm_start=True)
SB.setDefaults(save_obj=True)
# solve the problem
SB.run(problemSB, verbose=False, inner_verbose=False)
its = "%d-%d-%d" % (len(SB.obj), SB.niter_inner, SB.niter_solver)
fig, ax = plt.subplots()
ax.plot(time_axis, x.plot(), 'b', lw=2, label='true model')
ax.plot(time_axis, problemSB.model.plot(), 'r',  label='SB')
ax.legend(loc='upper left')
ax.set_ylim(-1., 1.), ax.set_xlim(*xlim)
ax.set_xlabel(xlbl), ax.set_ylabel("Amplitude")

axins1 = ax.inset_axes([0.42, 0.55, 0.1, 0.4])
axins1.plot(time_axis, x.plot(), 'b', lw=2)
axins1.plot(time_axis, problemSB.model.plot(), 'r')
axins1.set_xlim(0.76, 0.8)
axins1.set_ylim(-.1, .1)
axins1.set_xticklabels('')
axins1.set_yticklabels('')
ax.indicate_inset_zoom(axins1)

axins2 = ax.inset_axes([0.55, 0.55, 0.1, 0.4])
axins2.plot(time_axis, x.plot(), 'b', lw=2)
axins2.plot(time_axis, problemSB.model.plot(), 'r')
axins2.set_xlim(1.51, 1.59)
axins2.set_ylim(-.1, .1)
axins2.set_xticklabels('')
axins2.set_yticklabels('')
ax.indicate_inset_zoom(axins2)

axins3 = ax.inset_axes([0.49, 0.05, 0.1, 0.4])
axins3.plot(time_axis, x.plot(), 'b', lw=2)
axins3.plot(time_axis, problemSB.model.plot(), 'r')
axins3.set_xlim(1.0, 1.2)
axins3.set_ylim(-.1, .1)
axins3.set_xticklabels('')
axins3.set_yticklabels('')
ax.indicate_inset_zoom(axins3)

plt.suptitle(r'Split-Bregman %s, $\varepsilon=%.e$' % (its, problemSB.eps))
plt.show()
fig, ax = plt.subplots()
ax.semilogy(time_axis, np.abs(x.plot()), 'b', label="true model")
ax.semilogy(time_axis, np.abs(problemSB.model.plot()), 'r', label="SB")
ax.set_xlim(*xlim), ax.set_ylim(1e-8, 1)
ax.legend(loc='upper left')
ax.set_xlabel(xlbl), ax.set_ylabel("Amplitude")

plt.suptitle(r'Split-Bregman %s, $\varepsilon=%.e$' % (its, problemSB.eps))
plt.show()

The results is much better than FISTA output: the dynamics is almost-perfectly recovered.

## Example 2: 1D Velocity Deconvolution¶

Now we create a synthetic velocity profile and suppose to have recorded a smooth version of it. The deconvolution problem aims at recovering the sharp model, and this is done by using a Total Variation regularizer.

# create the true model
nx = 201
x = o.VectorNumpy((nx,), ax_info=[o.AxInfo(201, 0, 1, "depth [sample]")]).zero()
x[20:30] = 10.
x[50:75] = -5.
x[100:150] = 2.5
x[175:180] = 7.5

# instantiate the blurring operator
G = o.GaussianFilter(x, (2.,))

# simulate data
d = G * x
fig, ax = plt.subplots()
ax.plot(x.plot(), 'k',  label='model')
ax.plot(d.plot(), 'b',  label='data')
ax.autoscale(enable=True, axis='x', tight=True)
ax.set_ylim(-6, 12)
plt.legend(loc="upper center")
ax.set_xlabel(x.ax_info[0].l), ax.set_ylabel("Amplitude")
plt.show()

As we can see, the model presents some flat regions. Let's have a look to its first derivative:

D = o.FirstDerivative(x, stencil='forward')
Dx = D * x
fig, ax = plt.subplots()
ax.plot(x.plot(), 'k',  label='model')
ax.set_ylim(-12, 12)
ax.autoscale(enable=True, axis='x', tight=True)
plt.legend(loc="upper center")
ax.set_xlabel("Depth"), ax.set_ylabel("Amplitude")
plt.show()

The model derivative is sparse! We can set up a SplitBregman solver:

problem = o.GeneralizedLasso(x.clone().zero(), d, G, reg=D, eps=.1)

SB = o.SplitBregman(o.BasicStopper(200), niter_inner=3, niter_solver=10,
linear_solver='LSQR', breg_weight=1., warm_start=True)
SB.setDefaults(save_obj=True, save_model=True)

SB.run(problem, verbose=False, inner_verbose=False)
its = "%d-%d-%d" % (len(SB.obj), SB.niter_inner, SB.niter_solver)

fig, ax = plt.subplots()
ax.plot(x.plot(), 'b', lw=2, label="true model")
ax.plot(problem.model.plot(), 'r', label="SB")
ax.legend(loc="upper center")
ax.set_ylim(-6, 12)
ax.autoscale(enable=True, axis='x', tight=True)
ax.set_xlabel(x.ax_info[0].l), ax.set_ylabel("Amplitude")

plt.suptitle(r'Split-Bregman %s, $\varepsilon=%.e$' % (its, problem.eps))
plt.show()

Let's check the convergence:

fig, ax = plt.subplots()
ax.semilogy(SB.obj / SB.obj[0], 'k', lw=2, label='Obj')
ax.semilogy(SB.obj_terms[:, 0] / SB.obj[0], 'b--', label=r"$0.5 \Vert \mathbf{Gx-d} \Vert_2^2$")
ax.semilogy(SB.obj_terms[:, 1] / SB.obj[0], 'r--', label=r"$\varepsilon \Vert \mathbf{Dx}\Vert_1$")
ax.set_ylim(1e-7, 1), ax.set_xlim(0, SB.stopper.niter)
ax.set_xlabel("Iterations"), ax.set_ylabel("Amplitude")
ax.legend(loc="right")

plt.suptitle(r'Split-Bregman %s, $\varepsilon=%.e$' % (its, problem.eps))
plt.show()

We can see that the L1 term reaches its final value after few iterations. Let's see the inverted model after 2 iterations:

fig, ax = plt.subplots()
ax.plot(x.plot(), 'b', lw=2, label="true model")
ax.plot(SB.model[2].plot(), 'r', label="SB")
ax.legend(loc="upper center")
ax.set_ylim(-6, 12)
ax.autoscale(enable=True, axis='x', tight=True)
ax.set_xlabel(x.ax_info[0].l), ax.set_ylabel("Amplitude")

plt.suptitle(r'Split-Bregman %s after 2, $\varepsilon=%.e$' % (its, problem.eps))
plt.show()

Just for comparison, let's solve the problem with CG and FISTA.

problemCG = o.LeastSquares(x.clone().zero(), d, G)
CG = o.CG(o.BasicStopper(1000))
CG.setDefaults(save_obj=True)
CG.run(problemCG, verbose=False)

fig, ax = plt.subplots()
ax.plot(x.plot(), 'b', lw=2, label="true model")
ax.plot(problemCG.model.plot(), 'r', label="CG")
ax.legend(loc="upper center")
ax.set_ylim(-6, 12)
ax.autoscale(enable=True, axis='x', tight=True)
ax.set_xlabel(x.ax_info[0].l), ax.set_ylabel("Amplitude")

plt.suptitle('Conjugate Gradient, %d it' % len(CG.obj))
plt.show()
# compute maximum eigenvalue
maxeig = G.powerMethod()
print('FISTA α=%.6e' % (1/(maxeig**2)))
problemFISTA = o.Lasso(x.clone().zero(), d, G, lambda_value=.1, op_norm=maxeig**2)
FISTA = o.ISTA(o.BasicStopper(1000), fast=True)
FISTA.setDefaults(save_obj=True)
FISTA.run(problemFISTA, verbose=False)

fig, ax = plt.subplots(figsize=(12, 3))
ax.plot(x.plot(), 'b', lw=2, label="true model")
ax.plot(problemFISTA.model.plot(), 'r', label="FISTA")

ax.legend(loc="upper center")
ax.set_ylim(-6, 12)
ax.autoscale(enable=True, axis='x', tight=True)
ax.set_xlabel(x.ax_info[0].l), ax.set_ylabel("Amplitude")

plt.suptitle(r'Fast ISTA, $\lambda=$%.e, %d it' % (problemFISTA.lambda_value, len(FISTA.obj)))
plt.show()

## Example 3: 2D Phantom Deconvolution¶

In this example we reconstruct a phantom CT image starting from a blurred acquisition. Again, we regularize the inversion by imposing the first derivative to be sparse

# model

# operator
G = o.GaussianFilter(x, (3.,3.))

# data
d = G * x
fig, ax = plt.subplots(1, 2, figsize=(11,4))
ax[0].imshow(x.plot(), cmap='bone', clim=(0,1))
ax[0].grid(False)
ax[0].set_title('Model')
ax[1].imshow(d.plot(), cmap='bone', clim=(0,1))
ax[1].grid(False)
ax[1].set_title('Data')
plt.show()

Problem and solver:

D = o.Gradient(x)
Dx = D * x

fig, ax = plt.subplots(1, 2, figsize=(11,4))
ax[0].imshow(Dx.vecs[1].plot(), cmap='bone')
ax[0].grid(False)
ax[0].set_title(r'$\nabla_h(x)$')
ax[1].imshow(Dx.vecs[0].plot(), cmap='bone')
ax[1].grid(False)
ax[1].set_title(r'$\nabla_v(x)$')
plt.show()

Just for comparison, let's solve the problem with CG.

problemCG = o.LeastSquares(x.clone().zero(), d, G)
CG = o.CG(o.BasicStopper(1000))
CG.setDefaults(save_obj=True)
CG.run(problemCG, verbose=False)
fig, ax = plt.subplots(1, 2, figsize=(11,4))
ax[0].imshow(x.plot(), cmap='bone', clim=(0,1))
ax[0].grid(False)
ax[0].set_title('Model')
ax[1].imshow(problemCG.model.plot(), cmap='bone', clim=(0,1))
ax[1].grid(False)
ax[1].set_title('Conjugate Gradient, %d it' % len(CG.obj))
plt.show()

See the ringing effect there? Let's remove it by means of a regularizer!

problemSB = o.GeneralizedLasso(x.clone().zero(), d, G, reg=D, eps=1e-3)
SB = o.SplitBregman(o.BasicStopper(niter=200), niter_inner=3, niter_solver=10,
linear_solver='LSQR', breg_weight=1., warm_start=True)
SB.setDefaults(save_obj=True)
SB.run(problemSB, verbose=False, inner_verbose=False)
its = "%d-%d-%d" % (len(SB.obj), SB.niter_inner, SB.niter_solver)

fig, ax = plt.subplots(1, 2, figsize=(11,4))
ax[0].imshow(x.plot(), cmap='bone', clim=(0,1))
ax[0].grid(False)
ax[0].set_title('Model')
ax[1].imshow(problemSB.model.plot(), cmap='bone', clim=(0,1))
ax[1].grid(False)
ax[1].set_title(r'SB %s, $\varepsilon=%.e$' % (its, problemSB.eps))
plt.show()

Again, let's evaluate the objective function terms

fig, ax = plt.subplots()
ax.semilogy(SB.obj / SB.obj[0], 'k', lw=2, label='Obj')
ax.semilogy(SB.obj_terms[:, 0] / SB.obj[0], 'b--', label=r"$0.5 \Vert \mathbf{Gx-d} \Vert_2^2$")
ax.semilogy(SB.obj_terms[:, 1] / SB.obj[0], 'r--', label=r"$\varepsilon \Vert \mathbf{Dx}\Vert_1$")
ax.set_ylim(1e-8, 1), ax.set_xlim(0, SB.stopper.niter)
ax.set_xlabel("Iterations")
ax.legend(loc="right")

plt.suptitle(r'Split-Bregman %s, $\varepsilon=%.e$' % (its, problem.eps))
plt.show()
References
1. Goldstein, T., & Osher, S. (2009). The Split Bregman Method for L1-Regularized Problems. SIAM Journal on Imaging Sciences, 2(2), 323–343. 10.1137/080725891
2. Beck, A., & Teboulle, M. (2009). A Fast Iterative Shrinkage-Thresholding Algorithm for Linear Inverse Problems. SIAM Journal on Imaging Sciences, 2(1), 183–202. 10.1137/080716542