Bicluster

  • simultaneously cluster rows and columns of a data matrix
  • determines a submatrix of the original data matrix with some desired properties
  • Types
    • constant values, constant rows, or constant columns
    • unusually high or low values
    • submatrices with low variance
    • correlated rows or columns

Block Diagonal

  • each row and each column belongs to exactly one bicluster
In [2]:
import numpy as np
from matplotlib import pyplot as plt

from sklearn.datasets import make_biclusters
from sklearn.cluster import SpectralCoclustering
In [5]:
# Generate Bicluster Data
data, rows, columns = make_biclusters(
    shape=(300, 300), n_clusters=5, noise=5,
    shuffle=False, random_state=0)

plt.matshow(data, cmap=plt.cm.Blues)
Out[5]:
<matplotlib.image.AxesImage at 0x7fd9cd11ec50>
In [6]:
# Shuffle
rng = np.random.RandomState(0)
row_idx = rng.permutation(data.shape[0])
col_idx = rng.permutation(data.shape[1])
data = data[row_idx][:, col_idx]

plt.matshow(data, cmap=plt.cm.Blues)
Out[6]:
<matplotlib.image.AxesImage at 0x7fd9cd379a10>
In [7]:
# Cluster
model = SpectralCoclustering(n_clusters=5, random_state=0)
model.fit(data)
Out[7]:
SpectralCoclustering(init='k-means++', mini_batch=False, n_clusters=5,
                     n_init=10, n_jobs=None, n_svd_vecs=None, random_state=0,
                     svd_method='randomized')
In [11]:
# Rearrange the Clustered Data
fit_data = data[np.argsort(model.row_labels_)]
fit_data = fit_data[:, np.argsort(model.column_labels_)]

plt.matshow(fit_data, cmap=plt.cm.Blues)
Out[11]:
<matplotlib.image.AxesImage at 0x7fd9cf0ee610>

Checkerboard

  • each row belongs to all column clusters, and each column belongs to all row clusters
In [12]:
from sklearn.datasets import make_checkerboard
from sklearn.cluster import SpectralBiclustering
In [14]:
# Generate Bicluster Data
n_clusters = (4, 3)
data, rows, columns = make_checkerboard(
    shape=(300, 300), n_clusters=n_clusters, noise=10,
    shuffle=False, random_state=0)

plt.matshow(data, cmap=plt.cm.Blues)
Out[14]:
<matplotlib.image.AxesImage at 0x7fd9cf4e7590>
In [16]:
# Shuffle Clusters
rng = np.random.RandomState(0)
row_idx = rng.permutation(data.shape[0])
col_idx = rng.permutation(data.shape[1])
data = data[row_idx][:, col_idx]

plt.matshow(data, cmap=plt.cm.Blues)
Out[16]:
<matplotlib.image.AxesImage at 0x7fd9cf79b810>
In [19]:
# Cluster
model = SpectralBiclustering(n_clusters=n_clusters, method='log', random_state=0)
model.fit(data)
Out[19]:
SpectralBiclustering(init='k-means++', method='log', mini_batch=False, n_best=3,
                     n_clusters=(4, 3), n_components=6, n_init=10, n_jobs=None,
                     n_svd_vecs=None, random_state=0, svd_method='randomized')
In [20]:
# Rearrange the Clustered Data
fit_data = data[np.argsort(model.row_labels_)]
fit_data = fit_data[:, np.argsort(model.column_labels_)]

plt.matshow(fit_data, cmap=plt.cm.Blues)
Out[20]:
<matplotlib.image.AxesImage at 0x7fd9cfd67490>