Some algorithms take distance or similarity as a parameter. Here we demonstrate their effects on the solution space.

#%matplotlib inline
import numpy as np
import pandas as pd
import openensembles as oe
import clustering_algorithms as ca
import matplotlib.pyplot as plt
from sklearn import datasets

n_samples = 800

no_structure = np.random.rand(n_samples, 2), None
X,y = no_structure

n_samples = 300
#blobs = datasets.make_blobs(n_samples=n_samples, random_state=10,centers=5,cluster_std=2)
#X, y = blobs

df = pd.DataFrame(X)
d =, [1,2])

plot = d.plot_data('parent')

Demonstration of varying distance for algorithm based on distance metrics

distances = ['euclidean', 'l1', 'manhattan', 'minkowski', 'l2', 'cosine', 'mahalanobis']


c = oe.cluster(d)
for distance in distances:
    out_name = distance
    c.cluster('parent', algorithm, out_name, K=K, Require_Unique=True, distance=distance, linkage='complete')

names = c.labels.keys()
figs =[]
fig = 0
for name in names:
    figH = d.plot_data('parent', fig_num=fig, class_labels=c.labels[name], title=name)

mi = c.MI(MI_type='adjusted')
fig = mi.plot()


Demonstration of varying distance for algorithm based on affinity metrics

c = oe.cluster(d)
c.cluster('parent', 'spectral', 'spectral_default', K=5, Require_Unique=True)

data_plot = d.plot_data('parent', class_labels=c.labels['spectral_default'], title='Default Parameters')
distances = ['euclidean', 'l1', 'manhattan', 'minkowski', 'l2', 'cosine', 'mahalanobis']


c = oe.cluster(d)
for distance in distances:
    out_name = distance
    c.cluster('parent', algorithm, out_name, K=K, Require_Unique=True, distance=distance)

names = c.labels.keys()
figs =[]
fig = 0
for name in names:
    d.plot_data('parent', fig_num=fig, class_labels=c.labels[name], title=name)

mi = c.MI(MI_type='adjusted')
f = mi.plot()


Call clustering with more control by using ‘precomputed’ distances and affinities.

distance = 'euclidean'
D = ca.returnDistanceMatrix(d.D['parent'], distance)
S = ca.convertDistanceToSimilarity(D, beta) # use a different weighting factor for conversion (default is 1)

c = oe.cluster(d)
c.cluster('parent', algorithm, out_name, K=K, Require_Unique=True, affinity='precomputed', M=S)

titleStr="Precomputed Similarity from distance=%s and beta=%0.2f"%(distance,beta)
d.plot_data('parent', class_labels=c.labels[out_name], title=titleStr)


DBSCAN calls with precomputed distances


n_samples = 300
blobs = datasets.make_blobs(n_samples=n_samples, random_state=10,centers=5,cluster_std=2)
X, y = blobs
df = pd.DataFrame(X)
d =, [1,2])
d_plot = d.plot_data('parent')

distance = 'cosine'

c = oe.cluster(d)
c.cluster('parent', algorithm, 'default_DBSCAN', K=K, Require_Unique=True)
for distance in distances:
    c.cluster('parent', algorithm, distance, K=K, Require_Unique=True, distance=distance)

    D = ca.returnDistanceMatrix(d.D['parent'], distance)
    c.cluster('parent', algorithm, distance+"_precomputed", K=K, Require_Unique=True, distance='precomputed', M=D)

names = c.labels.keys()
figs =[]
fig = 0
for name in names:
    d.plot_data('parent', fig_num=fig, class_labels=c.labels[name], title=name)

mi = c.MI(MI_type='adjusted')
f = mi.plot()

/Users/kmn4mj/GIT/public/OpenEnsembles/openensembles/ UserWarning: Number of unique clusters 6 returned does not match number requested 3 for solution: default_DBSCAN
  warnings.warn("Number of unique clusters %d returned does not match number requested %d for solution: %s"%(len(uniqueClusters), K, output_name), UserWarning)
/Users/kmn4mj/GIT/public/OpenEnsembles/openensembles/ UserWarning: Number of unique clusters 6 returned does not match number requested 3 for solution: euclidean
  warnings.warn("Number of unique clusters %d returned does not match number requested %d for solution: %s"%(len(uniqueClusters), K, output_name), UserWarning)
/Users/kmn4mj/GIT/public/OpenEnsembles/openensembles/ UserWarning: Number of unique clusters 6 returned does not match number requested 3 for solution: euclidean_precomputed
  warnings.warn("Number of unique clusters %d returned does not match number requested %d for solution: %s"%(len(uniqueClusters), K, output_name), UserWarning)
/Users/kmn4mj/GIT/public/OpenEnsembles/openensembles/ UserWarning: Number of unique clusters 2 returned does not match number requested 3 for solution: l1
  warnings.warn("Number of unique clusters %d returned does not match number requested %d for solution: %s"%(len(uniqueClusters), K, output_name), UserWarning)
/Users/kmn4mj/GIT/public/OpenEnsembles/openensembles/ UserWarning: Number of unique clusters 2 returned does not match number requested 3 for solution: l1_precomputed
  warnings.warn("Number of unique clusters %d returned does not match number requested %d for solution: %s"%(len(uniqueClusters), K, output_name), UserWarning)
/Users/kmn4mj/GIT/public/OpenEnsembles/openensembles/ UserWarning: Number of unique clusters 2 returned does not match number requested 3 for solution: manhattan
  warnings.warn("Number of unique clusters %d returned does not match number requested %d for solution: %s"%(len(uniqueClusters), K, output_name), UserWarning)
/Users/kmn4mj/GIT/public/OpenEnsembles/openensembles/ UserWarning: Number of unique clusters 2 returned does not match number requested 3 for solution: manhattan_precomputed
  warnings.warn("Number of unique clusters %d returned does not match number requested %d for solution: %s"%(len(uniqueClusters), K, output_name), UserWarning)
/Users/kmn4mj/GIT/public/OpenEnsembles/openensembles/ UserWarning: Number of unique clusters 6 returned does not match number requested 3 for solution: minkowski
  warnings.warn("Number of unique clusters %d returned does not match number requested %d for solution: %s"%(len(uniqueClusters), K, output_name), UserWarning)
/Users/kmn4mj/GIT/public/OpenEnsembles/openensembles/ UserWarning: Number of unique clusters 6 returned does not match number requested 3 for solution: minkowski_precomputed
  warnings.warn("Number of unique clusters %d returned does not match number requested %d for solution: %s"%(len(uniqueClusters), K, output_name), UserWarning)
/Users/kmn4mj/GIT/public/OpenEnsembles/openensembles/ UserWarning: Number of unique clusters 6 returned does not match number requested 3 for solution: l2
  warnings.warn("Number of unique clusters %d returned does not match number requested %d for solution: %s"%(len(uniqueClusters), K, output_name), UserWarning)
/Users/kmn4mj/GIT/public/OpenEnsembles/openensembles/ UserWarning: Number of unique clusters 6 returned does not match number requested 3 for solution: l2_precomputed
  warnings.warn("Number of unique clusters %d returned does not match number requested %d for solution: %s"%(len(uniqueClusters), K, output_name), UserWarning)
/Users/kmn4mj/GIT/public/OpenEnsembles/openensembles/ UserWarning: Number of unique clusters 1 returned does not match number requested 3 for solution: cosine
  warnings.warn("Number of unique clusters %d returned does not match number requested %d for solution: %s"%(len(uniqueClusters), K, output_name), UserWarning)
/Users/kmn4mj/GIT/public/OpenEnsembles/openensembles/ UserWarning: Number of unique clusters 1 returned does not match number requested 3 for solution: cosine_precomputed
  warnings.warn("Number of unique clusters %d returned does not match number requested %d for solution: %s"%(len(uniqueClusters), K, output_name), UserWarning)
[ ]: