'Linearize optimization of non-overlapping items along a sequence

This is a follow-up to my previous question here. I have a optimization model that tries to find the highest coverage of a set of probe to a sequence. I approached it by creating an overlap matrix as shown below.

import pyomo
import pyomo.environ as pe
import pyomo.opt as po
import numpy as np
import matplotlib.pyplot as plt

# Initialise all sequences and probes
sequence = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
probes = ["a", "b", "c", "d", "e", "f", "g", "h"]
probe_starts = {"a": 0, "b": 1, "c": 4, "d": 5, "e": 6, "f": 8, "g": 13, "h": 12}
probe_ends = {"a": 2, "b": 2, "c": 6, "d": 6, "e": 8, "f": 11, "g": 15, "h": 14}
probe_lengths = {
    p: e - s + 1 for (p, s), e in zip(probe_starts.items(), probe_ends.values())
}

# Create a matrix of probes against probes to check for overlap
def is_overlapping(x, y):
    x_start, x_end = x
    y_start, y_end = y
    return (
        (x_start >= y_start and x_start <= y_end)
        or (x_end >= y_start and x_end <= y_end)
        or (y_start >= x_start and y_start <= x_end)
        or (y_end >= x_start and y_end <= x_end)
    )

overlap = {}
matrix = np.zeros((len(probes), len(probes)))
for row, x in enumerate(zip(probe_starts.values(), probe_ends.values())):
    for col, y in enumerate(zip(probe_starts.values(), probe_ends.values())):
        matrix[row, col] = is_overlapping(x, y)
    overlap[probes[row]] = list(matrix[row].astype(int))

I now build up my model as normal, adding a constraint that if one probe is assigned than any overlapping probes cannot be assigned.

# Model definition
model = pe.ConcreteModel()
model.probes = pe.Set(initialize=probes)
model.lengths = pe.Param(model.probes, initialize=probe_lengths)
model.overlap = pe.Param(model.probes, initialize=overlap, domain=pe.Any)
model.assign = pe.Var(model.probes, domain=pe.Boolean)

# Objective - highest coverage
obj = sum(model.assign[p] * probe_lengths[p] for p in model.probes)
model.objective = pe.Objective(expr=obj, sense=pe.maximize)

# Constraints
model.no_overlaps = pe.ConstraintList()
for query in model.probes:
    model.no_overlaps.add(
        sum(
            [
                model.assign[query] * model.assign[p]
                for idx, p in enumerate(model.probes)
                if model.overlap[query][idx]
            ]
        )
        <= 1
    )

This works when solving with the quadratic BONMIN solver as shown below. However, when scaling up to a few thousand probes with significantly more overlap then this becomes prohibitively slowly.

solver = po.SolverFactory("BONMIN")
results = solver.solve(model)

visualize = np.zeros((len(probes), len(sequence)))
for idx, (start, end, val) in enumerate(
    zip(probe_starts.values(), probe_ends.values(), model.assign.get_values().values())
):
    visualize[idx, start : end + 1] = val + 1

plt.imshow(visualize)
plt.yticks(ticks=range(len(probes)), labels=probes)
plt.xticks(range(len(sequence)))
plt.colorbar()
plt.show()

Any suggestions regarding how to convert this into a linear problem would be appreciated. Thanks in advance!

Solution 1:^[1]

You can attack this as an Integer Program (IP). There are 2 variables you need: one to indicate whether a probe has been "assigned" and another to indicate (or count) if a spot s in the sequence is covered by probe p in order to do the accounting.

It also helps to chop up the sequence into subsets (shown) that are indexed by the probes which could cover them, if assigned.

There is probably a dynamic programming approach to this as well that somebody might chip in. This works...

Code:

# model to make non-contiguous connections across a sequence
# with objective to "cover" as many points in sequence as possible

import pyomo.environ as pe

sequence = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
probes = ["a", "b", "c", "d", "e", "f", "g", "h"]
probe_starts = {"a": 0, "b": 1, "c": 4, "d": 5, "e": 6, "f": 8, "g": 13, "h": 12}
probe_ends = {"a": 2, "b": 2, "c": 6, "d": 6, "e": 8, "f": 11, "g": 15, "h": 14}

# sequence = [0, 1, 2, 3, 4, 5]
# probes = ["a", "b", "c"]
# probe_starts = {"a": 0, "b": 2, "c": 3}
# probe_ends = {"a": 2, "b": 4, "c": 5}

coverages = {p:[t for t in sequence if t>=probe_starts[p] and t<=probe_ends[p]] for p in probes}

# Model definition
model = pe.ConcreteModel()

model.sequence          = pe.Set(initialize=sequence)
model.probes            = pe.Set(initialize=probes)
# make an indexed set as convenience of probes:coverage ...
model.covers            = pe.Set(model.probes, within=model.sequence, initialize=coverages)
model.covers_flat_set   = pe.Set(initialize=[(p,s) for p in probes for s in model.covers[p]])

model.assign    = pe.Var(model.probes, domain=pe.Binary)  # 1 if probe p is used...
model.covered   = pe.Var(model.covers_flat_set, domain=pe.Binary)  # s is covered by p

# model.pprint()

# Objective
obj = sum(model.covered[p, s] for (p, s) in model.covers_flat_set)
model.objective = pe.Objective(expr=obj, sense=pe.maximize)

# Constraints

# selected probe must cover the associated points between start and end, if assigned
def cover(model, p):
    return sum(model.covered[p, s] for s in model.covers[p]) == len(model.covers[p])*model.assign[p]
model.C1 = pe.Constraint(model.probes, rule=cover)

# cannot cover any point by more than 1 probe
def over_cover(model, s):
    cov_options = [(p,s) for p in model.probes if (p, s) in model.covers_flat_set]
    if not cov_options:
        return pe.Constraint.Skip  # no possible coverages
    return sum(model.covered[p, s] for (p, s) in cov_options) <= 1
model.C2 = pe.Constraint(model.sequence, rule=over_cover)

solver = pe.SolverFactory('glpk')
result = solver.solve(model)
print(result)

#model.display()

# el-cheapo visualization...
for s in model.sequence:
    probe = None
    print(f'{s:3d}', end='')
    for p in model.probes:
        if (p, s) in model.covers_flat_set and model.assign[p].value:
            probe = p
    if probe:
        print(f' {probe}')
    else:
        print()

Yields:

Problem: 
- Name: unknown
  Lower bound: 13.0
  Upper bound: 13.0
  Number of objectives: 1
  Number of constraints: 24
  Number of variables: 32
  Number of nonzeros: 55
  Sense: maximize
Solver: 
- Status: ok
  Termination condition: optimal
  Statistics: 
    Branch and bound: 
      Number of bounded subproblems: 5
      Number of created subproblems: 5
  Error rc: 0
  Time: 0.007474184036254883
Solution: 
- number of solutions: 0
  number of solutions displayed: 0

  0 a
  1 a
  2 a
  3
  4 c
  5 c
  6 c
  7
  8 f
  9 f
 10 f
 11 f
 12 h
 13 h
 14 h
 15
 16
[Finished in 609ms]

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution	Source
Solution 1	AirSquid

'Linearize optimization of non-overlapping items along a sequence

Solution 1:[1]

Code:

Yields:

Sources

Related Questions

Solution 1:^[1]