Bicluster¶

simultaneously cluster rows and columns of a data matrix
determines a submatrix of the original data matrix with some desired properties
Types
- constant values, constant rows, or constant columns
- unusually high or low values
- submatrices with low variance
- correlated rows or columns

Block Diagonal¶

each row and each column belongs to exactly one bicluster

import numpy as np
from matplotlib import pyplot as plt

from sklearn.datasets import make_biclusters
from sklearn.cluster import SpectralCoclustering

# Generate Bicluster Data
data, rows, columns = make_biclusters(
    shape=(300, 300), n_clusters=5, noise=5,
    shuffle=False, random_state=0)

plt.matshow(data, cmap=plt.cm.Blues)

<matplotlib.image.AxesImage at 0x7fd9cd11ec50>

# Shuffle
rng = np.random.RandomState(0)
row_idx = rng.permutation(data.shape[0])
col_idx = rng.permutation(data.shape[1])
data = data[row_idx][:, col_idx]

plt.matshow(data, cmap=plt.cm.Blues)

<matplotlib.image.AxesImage at 0x7fd9cd379a10>

# Cluster
model = SpectralCoclustering(n_clusters=5, random_state=0)
model.fit(data)

SpectralCoclustering(init='k-means++', mini_batch=False, n_clusters=5,
                     n_init=10, n_jobs=None, n_svd_vecs=None, random_state=0,
                     svd_method='randomized')

# Rearrange the Clustered Data
fit_data = data[np.argsort(model.row_labels_)]
fit_data = fit_data[:, np.argsort(model.column_labels_)]

plt.matshow(fit_data, cmap=plt.cm.Blues)

<matplotlib.image.AxesImage at 0x7fd9cf0ee610>

Checkerboard¶

each row belongs to all column clusters, and each column belongs to all row clusters

from sklearn.datasets import make_checkerboard
from sklearn.cluster import SpectralBiclustering

# Generate Bicluster Data
n_clusters = (4, 3)
data, rows, columns = make_checkerboard(
    shape=(300, 300), n_clusters=n_clusters, noise=10,
    shuffle=False, random_state=0)

plt.matshow(data, cmap=plt.cm.Blues)

<matplotlib.image.AxesImage at 0x7fd9cf4e7590>

# Shuffle Clusters
rng = np.random.RandomState(0)
row_idx = rng.permutation(data.shape[0])
col_idx = rng.permutation(data.shape[1])
data = data[row_idx][:, col_idx]

plt.matshow(data, cmap=plt.cm.Blues)

<matplotlib.image.AxesImage at 0x7fd9cf79b810>

# Cluster
model = SpectralBiclustering(n_clusters=n_clusters, method='log', random_state=0)
model.fit(data)

SpectralBiclustering(init='k-means++', method='log', mini_batch=False, n_best=3,
                     n_clusters=(4, 3), n_components=6, n_init=10, n_jobs=None,
                     n_svd_vecs=None, random_state=0, svd_method='randomized')

# Rearrange the Clustered Data
fit_data = data[np.argsort(model.row_labels_)]
fit_data = fit_data[:, np.argsort(model.column_labels_)]

plt.matshow(fit_data, cmap=plt.cm.Blues)

<matplotlib.image.AxesImage at 0x7fd9cfd67490>