"""
This module provides utilities for doing text processing.

Note that standard SArray utilities can be used for transforming text data
into "bag of words" format, where a document is represented as a
dictionary mapping unique words with the number of times that word occurs
in the document. See :py:func:`~graphlab.SArray.count_words` for more
details. Also, see :py:func:`~graphlab.SFrame.pack` and
:py:func:`~graphlab.SFrame.unstack` for ways of creating SArrays
containing dictionary types.

We provide methods for learning topic models, which can be useful for modeling
large document collections. See
:py:func:`~graphlab.toolkits.text.topic_model.create` for more, as well as the
`How-Tos <http://www.graphlab.com/learn/how-to>`_, data science `Gallery
<http://www.graphlab.com/learn/gallery>`_, and `text analysis chapter of
the User Guide
<http://www.graphlab.com/learn/userguide.html#Modeling_data_Text_analysis>`_.

"""

import topic_model
import util

from graphlab.toolkits.text.util import tf_idf
from graphlab.toolkits.text.util import stopwords
from graphlab.toolkits.text.util import random_split
from graphlab.toolkits.text.util import parse_sparse
from graphlab.toolkits.text.util import parse_docword


