"""
This package contains modules for the clustering toolkit.

For GraphLab Create beta this toolkit contains only the KMeans++ method. Given a
number of clusters, KMeans++ iteratively chooses the best cluster centers and
assigns nearby points to the best cluster. If no points change cluster
membership between iterations, the algorithm terminates. The GraphLab kmeans
method returns a :class:`~graphlab.kmeans.KmeansModel`, which conatains a
**clusterid** DataFrame and a **cluster_info** object that contains the location
of the cluster centers and other cluster statistics.

For this example we use the iris dataset downloaded from the UCI datasets, and
use the :py:class:`~graphlab.SFrame` library to standardize the features
to have zero mean and unit standard deviation.

.. sourcecode:: python

    >>> from graphlab import kmeans, SFrame
    >>> dataurl = 'http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
    >>> iris_sframe = SFrame.read_csv(dataurl,
                                      header=False,
                                      column_type_hints={'X1': float, 'X2':float, 'X3': float, 'X4': float})
    >>> del iris_sframe['X5']  # the iris species

    >>> for col in ['X1', 'X2', 'X3', 'X4']:
            iris_sframe[col] = (iris_sframe[col] - iris_sframe[col].mean()) / iris_sframe[col].std()

    >>> out = kmeans.create(iris_sframe, num_clusters=3)

    >>> cluster_info = out.get('cluster_info')
          X1        X2        X3        X4          __within_distance__  __size__
     0 -1.014579  0.842307 -1.304878 -1.255129             6.962867        50
     1 -0.011396 -0.872885  0.376884  0.311654             6.993177        56
     2  1.167434  0.153778  1.003145  1.029633             6.615748        44

     [3 rows x 6 columns]

    >>> cluster_ids = out.get('clusterid').head()
        clusterid  distance
     0          0  0.231622
     1          0  0.978145
     2          0  0.635606
     3          0  0.887279
     4          0  0.426725
"""

__all__ = ['kmeans']

import kmeans
