Some algorithms take distance or similarity as a parameter. Here we demonstrate their effects on the solution space.

[1]:
#%matplotlib inline
import numpy as np
import pandas as pd
import openensembles as oe
import clustering_algorithms as ca
import matplotlib.pyplot as plt
from sklearn import datasets


n_samples = 800
np.random.seed(0)

no_structure = np.random.rand(n_samples, 2), None
X,y = no_structure


np.random.seed(0)
n_samples = 300
#blobs = datasets.make_blobs(n_samples=n_samples, random_state=10,centers=5,cluster_std=2)
#X, y = blobs


df = pd.DataFrame(X)
d = oe.data(df, [1,2])




plot = d.plot_data('parent')
../_images/Examples_Demonstrate_Distance_Affinity_effects_1_0.png

Demonstration of varying distance for algorithm based on distance metrics

[2]:
distances = ['euclidean', 'l1', 'manhattan', 'minkowski', 'l2', 'cosine', 'mahalanobis']

[3]:

algorithm='agglomerative'
c = oe.cluster(d)
K=5
for distance in distances:
    out_name = distance
    c.cluster('parent', algorithm, out_name, K=K, Require_Unique=True, distance=distance, linkage='complete')

#PLOT
names = c.labels.keys()
figs =[]
fig = 0
for name in names:
    figH = d.plot_data('parent', fig_num=fig, class_labels=c.labels[name], title=name)
    plt.show()
    fig+=1

#COMPARE SOLUTIONS
mi = c.MI(MI_type='adjusted')
fig = mi.plot()

../_images/Examples_Demonstrate_Distance_Affinity_effects_4_0.png
../_images/Examples_Demonstrate_Distance_Affinity_effects_4_1.png
../_images/Examples_Demonstrate_Distance_Affinity_effects_4_2.png
../_images/Examples_Demonstrate_Distance_Affinity_effects_4_3.png
../_images/Examples_Demonstrate_Distance_Affinity_effects_4_4.png
../_images/Examples_Demonstrate_Distance_Affinity_effects_4_5.png
../_images/Examples_Demonstrate_Distance_Affinity_effects_4_6.png
../_images/Examples_Demonstrate_Distance_Affinity_effects_4_7.png

Demonstration of varying distance for algorithm based on affinity metrics

[4]:
c = oe.cluster(d)
c.cluster('parent', 'spectral', 'spectral_default', K=5, Require_Unique=True)

data_plot = d.plot_data('parent', class_labels=c.labels['spectral_default'], title='Default Parameters')
../_images/Examples_Demonstrate_Distance_Affinity_effects_6_0.png
[5]:
distances = ['euclidean', 'l1', 'manhattan', 'minkowski', 'l2', 'cosine', 'mahalanobis']

[7]:


algorithm='spectral'
c = oe.cluster(d)
K=5
for distance in distances:
    out_name = distance
    c.cluster('parent', algorithm, out_name, K=K, Require_Unique=True, distance=distance)

#PLOT
names = c.labels.keys()
figs =[]
fig = 0
for name in names:
    d.plot_data('parent', fig_num=fig, class_labels=c.labels[name], title=name)
    plt.show()
    fig+=1

#COMPARE SOLUTIONS
mi = c.MI(MI_type='adjusted')
f = mi.plot()


../_images/Examples_Demonstrate_Distance_Affinity_effects_8_0.png
../_images/Examples_Demonstrate_Distance_Affinity_effects_8_1.png
../_images/Examples_Demonstrate_Distance_Affinity_effects_8_2.png
../_images/Examples_Demonstrate_Distance_Affinity_effects_8_3.png
../_images/Examples_Demonstrate_Distance_Affinity_effects_8_4.png
../_images/Examples_Demonstrate_Distance_Affinity_effects_8_5.png
../_images/Examples_Demonstrate_Distance_Affinity_effects_8_6.png
../_images/Examples_Demonstrate_Distance_Affinity_effects_8_7.png

Call clustering with more control by using ‘precomputed’ distances and affinities.

[8]:
distance = 'euclidean'
D = ca.returnDistanceMatrix(d.D['parent'], distance)
beta=0.25
S = ca.convertDistanceToSimilarity(D, beta) # use a different weighting factor for conversion (default is 1)
algorithm='spectral'
K=5

out_name=distance
c = oe.cluster(d)
c.cluster('parent', algorithm, out_name, K=K, Require_Unique=True, affinity='precomputed', M=S)

#plt.close()
titleStr="Precomputed Similarity from distance=%s and beta=%0.2f"%(distance,beta)
d.plot_data('parent', class_labels=c.labels[out_name], title=titleStr)

[8]:
../_images/Examples_Demonstrate_Distance_Affinity_effects_10_0.png
../_images/Examples_Demonstrate_Distance_Affinity_effects_10_1.png

DBSCAN calls with precomputed distances

[9]:

np.random.seed(0)
n_samples = 300
blobs = datasets.make_blobs(n_samples=n_samples, random_state=10,centers=5,cluster_std=2)
X, y = blobs
df = pd.DataFrame(X)
d = oe.data(df, [1,2])
d_plot = d.plot_data('parent')




../_images/Examples_Demonstrate_Distance_Affinity_effects_12_0.png
[10]:
algorithm='DBSCAN'
distance = 'cosine'
K=3

out_name=distance
c = oe.cluster(d)
c.cluster('parent', algorithm, 'default_DBSCAN', K=K, Require_Unique=True)
for distance in distances:
    c.cluster('parent', algorithm, distance, K=K, Require_Unique=True, distance=distance)

    D = ca.returnDistanceMatrix(d.D['parent'], distance)
    c.cluster('parent', algorithm, distance+"_precomputed", K=K, Require_Unique=True, distance='precomputed', M=D)

#PLOT
names = c.labels.keys()
figs =[]
fig = 0
for name in names:
    d.plot_data('parent', fig_num=fig, class_labels=c.labels[name], title=name)
    plt.show()
    fig+=1

#COMPARE SOLUTIONS
mi = c.MI(MI_type='adjusted')
f = mi.plot()

/Users/kmn4mj/GIT/public/OpenEnsembles/openensembles/openensembles.py:581: UserWarning: Number of unique clusters 6 returned does not match number requested 3 for solution: default_DBSCAN
  warnings.warn("Number of unique clusters %d returned does not match number requested %d for solution: %s"%(len(uniqueClusters), K, output_name), UserWarning)
/Users/kmn4mj/GIT/public/OpenEnsembles/openensembles/openensembles.py:581: UserWarning: Number of unique clusters 6 returned does not match number requested 3 for solution: euclidean
  warnings.warn("Number of unique clusters %d returned does not match number requested %d for solution: %s"%(len(uniqueClusters), K, output_name), UserWarning)
/Users/kmn4mj/GIT/public/OpenEnsembles/openensembles/openensembles.py:581: UserWarning: Number of unique clusters 6 returned does not match number requested 3 for solution: euclidean_precomputed
  warnings.warn("Number of unique clusters %d returned does not match number requested %d for solution: %s"%(len(uniqueClusters), K, output_name), UserWarning)
/Users/kmn4mj/GIT/public/OpenEnsembles/openensembles/openensembles.py:581: UserWarning: Number of unique clusters 2 returned does not match number requested 3 for solution: l1
  warnings.warn("Number of unique clusters %d returned does not match number requested %d for solution: %s"%(len(uniqueClusters), K, output_name), UserWarning)
/Users/kmn4mj/GIT/public/OpenEnsembles/openensembles/openensembles.py:581: UserWarning: Number of unique clusters 2 returned does not match number requested 3 for solution: l1_precomputed
  warnings.warn("Number of unique clusters %d returned does not match number requested %d for solution: %s"%(len(uniqueClusters), K, output_name), UserWarning)
/Users/kmn4mj/GIT/public/OpenEnsembles/openensembles/openensembles.py:581: UserWarning: Number of unique clusters 2 returned does not match number requested 3 for solution: manhattan
  warnings.warn("Number of unique clusters %d returned does not match number requested %d for solution: %s"%(len(uniqueClusters), K, output_name), UserWarning)
/Users/kmn4mj/GIT/public/OpenEnsembles/openensembles/openensembles.py:581: UserWarning: Number of unique clusters 2 returned does not match number requested 3 for solution: manhattan_precomputed
  warnings.warn("Number of unique clusters %d returned does not match number requested %d for solution: %s"%(len(uniqueClusters), K, output_name), UserWarning)
/Users/kmn4mj/GIT/public/OpenEnsembles/openensembles/openensembles.py:581: UserWarning: Number of unique clusters 6 returned does not match number requested 3 for solution: minkowski
  warnings.warn("Number of unique clusters %d returned does not match number requested %d for solution: %s"%(len(uniqueClusters), K, output_name), UserWarning)
/Users/kmn4mj/GIT/public/OpenEnsembles/openensembles/openensembles.py:581: UserWarning: Number of unique clusters 6 returned does not match number requested 3 for solution: minkowski_precomputed
  warnings.warn("Number of unique clusters %d returned does not match number requested %d for solution: %s"%(len(uniqueClusters), K, output_name), UserWarning)
/Users/kmn4mj/GIT/public/OpenEnsembles/openensembles/openensembles.py:581: UserWarning: Number of unique clusters 6 returned does not match number requested 3 for solution: l2
  warnings.warn("Number of unique clusters %d returned does not match number requested %d for solution: %s"%(len(uniqueClusters), K, output_name), UserWarning)
/Users/kmn4mj/GIT/public/OpenEnsembles/openensembles/openensembles.py:581: UserWarning: Number of unique clusters 6 returned does not match number requested 3 for solution: l2_precomputed
  warnings.warn("Number of unique clusters %d returned does not match number requested %d for solution: %s"%(len(uniqueClusters), K, output_name), UserWarning)
/Users/kmn4mj/GIT/public/OpenEnsembles/openensembles/openensembles.py:581: UserWarning: Number of unique clusters 1 returned does not match number requested 3 for solution: cosine
  warnings.warn("Number of unique clusters %d returned does not match number requested %d for solution: %s"%(len(uniqueClusters), K, output_name), UserWarning)
/Users/kmn4mj/GIT/public/OpenEnsembles/openensembles/openensembles.py:581: UserWarning: Number of unique clusters 1 returned does not match number requested 3 for solution: cosine_precomputed
  warnings.warn("Number of unique clusters %d returned does not match number requested %d for solution: %s"%(len(uniqueClusters), K, output_name), UserWarning)
../_images/Examples_Demonstrate_Distance_Affinity_effects_13_1.png
../_images/Examples_Demonstrate_Distance_Affinity_effects_13_2.png
../_images/Examples_Demonstrate_Distance_Affinity_effects_13_3.png
../_images/Examples_Demonstrate_Distance_Affinity_effects_13_4.png
../_images/Examples_Demonstrate_Distance_Affinity_effects_13_5.png
../_images/Examples_Demonstrate_Distance_Affinity_effects_13_6.png
../_images/Examples_Demonstrate_Distance_Affinity_effects_13_7.png
../_images/Examples_Demonstrate_Distance_Affinity_effects_13_8.png
../_images/Examples_Demonstrate_Distance_Affinity_effects_13_9.png
../_images/Examples_Demonstrate_Distance_Affinity_effects_13_10.png
../_images/Examples_Demonstrate_Distance_Affinity_effects_13_11.png
../_images/Examples_Demonstrate_Distance_Affinity_effects_13_12.png
../_images/Examples_Demonstrate_Distance_Affinity_effects_13_13.png
../_images/Examples_Demonstrate_Distance_Affinity_effects_13_14.png
../_images/Examples_Demonstrate_Distance_Affinity_effects_13_15.png
../_images/Examples_Demonstrate_Distance_Affinity_effects_13_16.png
[ ]: