"""
.. warning:: This product is currently in a beta release. The API reference is
subject to change.

This package defines the GraphLab SGraph, Vertex, and Edge objects. The SGraph is
a directed graph, consisting of a set of Vertex objects and Edges that connect
pairs of Vertices. The methods in this module are available from the top level
import of the graphlab package.

"""

import graphlab.connect.main as glconnect
from graphlab.data_structures.sframe import SFrame
from graphlab.data_structures.sarray import SArray
from graphlab.data_structures.gframe import GFrame, VERTEX_GFRAME, EDGE_GFRAME
from graphlab.cython.cy_graph import UnityGraphProxy
from graphlab.cython.context import debug_trace as cython_context
from graphlab.util import make_internal_url
import graphlab.canvas
import graphlab.canvas.inspect
from graphlab.deps import pandas as pd
from graphlab.deps import HAS_PANDAS
import inspect
import copy

## \internal Default column name for vertex id.
_VID_COLUMN = '__id'

## \internal Default column name for source vid.
_SRC_VID_COLUMN = '__src_id'

## \internal Default column name for target vid.
_DST_VID_COLUMN = '__dst_id'


#/**************************************************************************/
#/*                                                                        */
#/*                         SGraph Related Classes                         */
#/*                                                                        */
#/**************************************************************************/
class Vertex(object):
    """
    A vertex object, consisting of a vertex ID and a dictionary of vertex
    attributes. The vertex ID can be an integer, string, or float.

    Parameters
    ----------
    vid : int | string | float
        Vertex ID.

    attr : dict
        Vertex attributes. A Dictionary of string keys and values with one of the
        following types: int, float, string, array of floats.

    _series : pandas.Series
        Internal. Do not use.

    Returns
    -------
    out : Vertex
        A new vertex object.
    """

    __slots__ = ['vid', 'attr']

    def __init__(self, vid, attr={}, _series=None):
        """__init__(self, vid, attr={})
        Construct a new vertex.
        """
        if not _series is None:
            self.vid = _series[_VID_COLUMN]
            self.attr = _series.to_dict()
            self.attr.pop(_VID_COLUMN)
        else:
            self.vid = vid
            self.attr = attr

    def __repr__(self):
        return "V(" + str(self.vid) + ", " + str(self.attr) + ")"

    def __str__(self):
        return "V(" + str(self.vid) + ", " + str(self.attr) + ")"


class Edge(object):
    """
    A directed edge between two vertices, consisting of source vertex ID,
    destination vertex ID, and a dictionary of edge attributes.

    Parameters
    ----------
    src_vid : int | string | float
        Source vertex ID.

    dst_vid : int | string | float
        Target vertex ID.

    attr : dicts
        Edge attributes. A Dictionary of string keys and values with one of the
        following types: integer, float, string, array of floats.

    _series : pandas.Series
        Do not use.

    Returns
    -------
    out : Edge
        A new edge object.
    """

    __slots__ = ['src_vid', 'dst_vid', 'attr']

    def __init__(self, src_vid, dst_vid, attr={}, _series=None):
        """__init__(self, vid, attr={})
        Construct a new edge.
        """
        if not _series is None:
            self.src_vid = _series[_SRC_VID_COLUMN]
            self.dst_vid = _series[_DST_VID_COLUMN]
            self.attr = _series.to_dict()
            self.attr.pop(_SRC_VID_COLUMN)
            self.attr.pop(_DST_VID_COLUMN)
        else:
            self.src_vid = src_vid
            self.dst_vid = dst_vid
            self.attr = attr

    def __repr__(self):
        return ("E(" + str(self.src_vid) + " -> " + str(self.dst_vid) + ", " +
                str(self.attr) + ")")

    def __str__(self):
        return ("E(" + str(self.src_vid) + " -> " + str(self.dst_vid) + ", " +
                str(self.attr) + ")")


class SGraph(object):
    """
    A scalable graph data structure backed by :class:`~graphlab.SFrame`
    that allows arbitrary dictionary attributes on vertices
    and edges, and provides flexible vertex/edge query functions,
    and seamless transformation from(to) :class:`~graphlab.SFrame`.

    **Construction**

    There are several ways to create a :class:`~graphlab.SGraph`.
    The simplest one is to make an empty graph then add vertices and edges.

    >>> import graphlab
    >>> from graphlab import SGraph, Vertex, Edge, SFrame

    >>> g = SGraph()

    >>> verts = [Vertex(0, attr={'breed': 'labrador'}),
                 Vertex(1, attr={'breed': 'labrador'}),
                 Vertex(2, attr={'breed': 'vizsla'})]
    >>> g = g.add_vertices(verts)
    >>> g = g.add_edges(Edge(1, 2))

    Note that SGraphs are *structually immutable*. In the above code,
    add_vertices and add_edges both return a new graph; the old
    graph gets garbage collected.

    We can chain these steps together to make a new graph in a single line.

    >>> g = SGraph().add_vertices([Vertex(i) for i in range(10)]).add_edges(
            [Edge(i, i+1) for i in range(9)])

    SGraphs can also be created from just an edge list stored in an
    :class:`~graphlab.SFrame`. Vertices are added to the graph automatically
    based on the edge list, and columns of the SFrame not used as source or
    destination vertex IDs are assumed to be edge attributes. For this example
    we download a dataset of James Bond characters to an SFrame, then build the
    graph.

    >>> edge_data = SFrame.read_csv('http://s3.amazonaws.com/GraphLab-Datasets/bond/bond_edges.csv')
    >>> vertex_data = SFrame.read_csv('http://s3.amazonaws.com/GraphLab-Datasets/bond/bond_vertices.csv')
    >>> g = SGraph()
    >>> g = g.add_edges(edge_data, src_field='src', dst_field='dst')

    The SGraph constructor can also take in a vertex SFrame and edge SFrame directly.
    For example, we can construct the same James Bond graph using the following one-liner:

    >>> g = SGraph(vertices=vertex_data, edges=edge_data, vid_field='name',
                   src_field='src', dst_field='dst')

    Finally, a :class:`~graphlab.SGraph` can be created directly from a file,
    either local or remote, using the :py:func:`load_sgraph` method. Loading a
    graph with this method works with both the native binary save format and a
    variety of text formats.

    >>> g.save('james_bond') # the graph will be saved to a directory named 'james_bond'.
    >>> newgraph = graphlab.load_sgraph('james_bond')

    **Inspecting SGraph**

    Small graphs can be explored very efficiently with the :py:func:`show`
    method, which displays a plot of the graph. The vertex labels can be IDs or
    any vertex attribute.

    >>> g.show(vlabel='id', v_offset=0.05)

    For large graphs visual displays are difficult, but graph exploration can
    still be done with the :py:func:`summary` method---which prints the number
    of vertices and edges---or by retrieving subsets of edges and vertices.

    >>> print g.summary()
    {'num_edges': 21, 'num_vertices': 10}

    To retrieve the contents of a graph, the :py:func:`get_vertices` and
    :py:func:`get_edges` methods return SFrames. These functions can filter
    edges and vertices based on vertex IDs or attributes. Omitting IDs and
    attributes returns all vertices or edges.

    >>> verts = g.get_vertices(ids=['James Bond'])
            __id
    0  James Bond

    >>> bosses = g.get_edges(fields={'relation': 'worksfor'})
       relation       __src_id     __dst_id
    0  worksfor              M   James Bond
    1  worksfor              M   Moneypenny
    2  worksfor              M            Q
    3  worksfor  Elliot Carver  Henry Gupta
    4  worksfor  Elliot Carver    Gotz Otto

    >>> edges = g.get_edges()
    >>> print edges.head(5)
         relation        __src_id       __dst_id
    0    worksfor               M              Q
    1      friend  Inga Bergstorm     James Bond
    2   killed_by   Elliot Carver     James Bond
    3   engagedto    Paris Carver  Elliot Carver
    4  managed_by      Moneypenny              M


    **Modifying SGraph**

    SGraph is *structually immutable*, but the data stored on vertices and edges
    can be mutated.

    Modifying data on the SGraph can be done through 2 *special PROPERTIES*:
    :py:attr:`~graphlab.SGraph.vertices` and :py:attr:`~graphlab.SGraph.edges`.

    :py:attr:`~graphlab.SGraph.vertices` is a *special SFrame* object associated
    with the vertex data of the SGraph, and similarly
    :py:attr:`~graphlab.SGraph.edges` is a *special* SFrame associated with the edge
    data. The following example show the difference between the special
    graph-related SFrames and the normal SFrames.

    For example:

    >>> print g.edges.head(5)
         relation        __src_id       __dst_id
    0    worksfor               M              Q
    1      friend  Inga Bergstorm     James Bond
    2   killed_by   Elliot Carver     James Bond
    3   engagedto    Paris Carver  Elliot Carver
    4  managed_by      Moneypenny              M

    This code gives the exact same answer as

    >>> print g.get_edges().head(5)

    However, the difference is that the return value of g.get_edges() is a normal
    SFrame indepedent from g, whereas g.edges is bound to g.
    We can modify the edge data using this special edge SFrame.

    >>> # The following code MUTATES the relation attribute on the edges of g
    >>> g.edges['relation'] = g.edges['relation'].apply(lambda x: x[0].upper())
    >>> print g.get_edges().head(5)
         relation        __src_id       __dst_id
    0    W                      M              Q
    1    F         Inga Bergstorm     James Bond
    2    K          Elliot Carver     James Bond
    3    E           Paris Carver  Elliot Carver
    4    M             Moneypenny              M

    >>> # The following code DOES NOT MUTATE the relation attribute on the edges of g
    >>> e = g.get_edges()  # e is a normal SFrame independent of g.
    >>> e['relation'] = e['relation'].apply(lambda x: x[0].lower())
    >>> print g.get_edges().head(5)
         relation        __src_id       __dst_id
    0    W                      M              Q
    1    F         Inga Bergstorm     James Bond
    2    K          Elliot Carver     James Bond
    3    E           Paris Carver  Elliot Carver
    4    M             Moneypenny              M

    Calling a method like head(), tail(), or append() on a graph-related SFrame would result
    in a new instance of a regular SFrame. For example, the following code DOES NOT MUTATE g.

    >>> e = g.edges.head(5)
    >>> e['is_friend'] = e['relation'].apply(lambda x: x[0] == 'F')

    Another important difference of these two special SFrames are that the '__id',
    '__src_id', and '__dst_id' are not mutable because changing them will
    change the structure of the graph and SGraph is *structually immutable*.

    Otherwise, g.vertices and g.edges act like normal SFrames, and it makes
    mutating graph data super easy. For example, initializing (removing) an edge field
    is the same as adding (removing) a column to(from) an SFrame:

    >>> g.edges['weight'] = 1.0
    >>> del g.edges['weight']

    See Also
    --------
    Vertex, Edge, SFrame
    """

    __slots__ = ['__proxy__', '_vertices', '_edges']

    def __init__(self, vertices=None, edges=None, vid_field='__id',
                 src_field='__src_id', dst_field='__dst_id', _proxy=None):
        """
        __init__(vertices=None, edges=None, vid_field='__id', src_field='__src_id', dst_field='__dst_id')

        By default, construct an empty graph when vertices and edges are None.
        Otherwise construct an SGraph with given vertices and edges.

        Parameters
        ----------
        vertices : SFrame
            An SFrame containing vertex id columns and optional vertex data columns. Optional.
        edges : SFrame
            An SFrame containing source and target id columns and optional edge data columns. Optional.
        vid_field : str
            The name of vertex id column in the `vertices` SFrame. Optional
        src_field : str
            The name of source id column in the `edges` SFrame. Optional
        dst_field : str
            The name of target id column in the `edges` SFrame. Optional
        """
        if (_proxy is None):
            self.__proxy__ = UnityGraphProxy(glconnect.get_client())
            if vertices is not None:
                self.__proxy__ = self.add_vertices(vertices, vid_field).__proxy__
            if edges is not None:
                self.__proxy__ = self.add_edges(edges, src_field, dst_field).__proxy__
        else:
            self.__proxy__ = _proxy
        self._vertices = GFrame(self, VERTEX_GFRAME)
        self._edges = GFrame(self, EDGE_GFRAME)

    def __str__(self):
        """Returns a readable string representation summarizing the graph."""
        return "SGraph(%s)" % str(self.summary())

    def __repr__(self):
        """Returns a readable string representation summarizing the graph."""
        return "SGraph(%s)\nVertex Fields:%s\nEdge Fields:%s" % \
               (str(self.summary()), str(self.get_vertex_fields()), str(self.get_edge_fields()))

    @property
    def vertices(self):
        """
        The special vertex SFrame of the graph for vertex data manipulation.

        Modifying the columns of the returned SFrame will update the vertex data of the graph.
        To preserve the graph structure, the '__id' column of the returned SFrame is read-only.

        Examples
        --------
        >>> import graphlab
        >>> from graphlab import SGraph, Vertex
        >>> g = SGraph().add_vertices([Vertex('cat', {'fluffy': 1}),
                                       Vertex('dog', {'fluffy': 1, 'woof': 1}),
                                       Vertex('hippo', {})])

        Copy 'woof' into 'bark':

        >>> g.vertices['bark'] = g.vertices['woof']

        Remove 'woof'

        >>> del g.vertices['woof']

        Create a new field 'like_fish'

        >>> g.vertices['like_fish'] = g.vertices['__id'] == 'cat'
        >>> print g.vertices
        +-------+--------+------+-----------+
        |  __id | fluffy | bark | like_fish |
        +-------+--------+------+-----------+
        |  dog  |  1.0   | 1.0  |     0     |
        |  cat  |  1.0   | nan  |     1     |
        | hippo |  nan   | nan  |     0     |
        +-------+--------+------+-----------+
        [3 rows x 4 columns]

        Replace missing values with 0

        >>> for col in g.vertices.column_names():
        ...     if col != '__id':
        ...         g.vertices.fillna(col, 0)
        >>> print g.vertices
        +-------+--------+------+-----------+
        |  __id | fluffy | bark | like_fish |
        +-------+--------+------+-----------+
        |  dog  |  1.0   | 1.0  |     0     |
        |  cat  |  1.0   | 0.0  |     1     |
        | hippo |  0.0   | 0.0  |     0     |
        +-------+--------+------+-----------+
        [3 rows x 4 columns]
        """
        return self._vertices

    @property
    def edges(self):
        """
        The special edge SFrame of the graph for edge data manipulation.

        Modifying the columns of the returned SFrame will update the edge data of the graph.
        To preserve the graph structure, the '__src_id', and '__dst_id'
        columns of the returned SFrame are read-only.

        Please see examples in :py:attr:`~graphlab.SGraph.vertices`.
        """
        return self._edges

    def summary(self):
        """
        Return basic graph statistics in a dictionary.

        Returns
        -------
        out : dict
            A dictionary containing the number of vertices and edges.
        """
        ret = self.__proxy__.summary()
        return dict(ret.items())

    def get_vertices(self, ids=[], fields={}, format='sframe'):
        """
        Return a collection of vertices and their attributes.

        Parameters
        ----------

        ids : List [int | float | str]
            List of vertex IDs to retrieve. Only vertices in this list will be
            returned.

        fields : dict | pandas.DataFrame
            Dictionary specifying equality constraint on field values. For
            example ``{'gender': 'M'}``, returns only vertices whose 'gender'
            field is 'M'. ``None`` can be used to designate a wildcard. e.g.
            {'relationship': None} will find all vertices with the field
            'relationship' regardless of the value.

        format : {'sframe', 'list'}
            Output format. The SFrame output (default) contains a column
            __src_id with vertex IDs and a column for each vertex attribute.
            List output returns a list of Vertex objects.

        Returns
        -------
        out : Vertex list, pandas.DataFrame
            A Vertex list or a pandas dataframe of vertices.

        Examples
        --------
        Return all vertices in the graph.

        >>> g.get_vertices()

        Return vertex with ID of 1.

        >>> g.get_vertices(ids=1)

        Return vertices 1, 2, and 3.

        >>> g.get_vertices(ids=[1, 2, 3])

        Return vertices with the vertex attribute "gender".

        >>> g.get_vertices(fields={'gender': None})

        Return vertices with the vertex attribute "gender" equal to "M".

        >>> g.get_vertices(fields={'gender': 'M'})

        Return vertices 1, 2, and 3 that have the vertex attribute "gender" set to
        "M".

        >>> g.get_vertices(ids=[1, 2, 3], fields={'gender': 'M'})
        """
        if not hasattr(ids, '__iter__'):
            ids = [ids]

        with cython_context():
            sf = SFrame(_proxy=self.__proxy__.get_vertices(ids, fields))

        if (format == 'sframe'):
            return sf
        elif (format == 'dataframe'):
            assert HAS_PANDAS, 'Cannot use dataframe because Pandas is not avaiable or version is too low.'
            if sf.num_rows() == 0:
                return pd.DataFrame()
            else:
                df = sf.head(sf.num_rows()).to_dataframe()
                return df.set_index('__id')
        elif (format == 'list'):
            return _dataframe_to_vertex_list(sf.to_dataframe())
        else:
            raise ValueError("Invalid format specifier")

    def get_edges(self, src_ids=[], dst_ids=[], fields={}, format='sframe'):
        """
        Return a collection of edges and their attributes. This function can be
        used to find edges by vertex IDs, filter on edge attributes, or list
        in-out neighbors of graphs.

        Parameters
        ----------
        src_ids, dst_ids : list
            Parallel arrays of vertex IDs, with each pair corresponding to an
            edge to fetch. Only edges in this list are returned. ``None`` can be
            used to designate a wild-card. For instance, ``src_ids=[1, 2,
            None]``, ``dst_ids=[3, None, 5]`` will fetch the edge 1->3, all
            outgoing edges of 2 and all incoming edges of 5. src_id and dst_id
            may be left empty, which implies an array of all wildcards.

        fields : dict
            Dictionary specifying equality constraints on field values. For
            example ``{'relationship': 'following'}``, returns only edges whose
            'relationship' field equals 'following'. ``None`` can be used as a
            value to designate a wildcard. e.g. ``{'relationship': None}`` will
            find all edges with the field 'relationship' regardless of the
            value.

        format : {'sframe', 'list'}
            Output format. The 'sframe' output (default) contains columns
            __src_id and __dst_id with edge vertex IDs and a column for each
            edge attribute. List ouput returns a list of Edge objects.

        Returns
        -------
        out : SFrame | list [Edge]
            An SFrame or list of edges.

        Examples
        --------
        Return all edges in the graph.

        >>> g.get_edges()
        >>> g.get_edges(src_ids=[None], dst_ids=[None])

        Return edges with attribute "rating".

        >>> g.get_edges(fields={'rating': None})

        Return edges with the attribute "rating" of 5.

        >>> g.get_edges(fields={'rating': 5})

        Return edges 1 --> 3 and 2 --> 4 (if present in the graph).

        >>> g.get_edges(src_ids=[1, 2], dst_ids=[3, 4])

        Return edges 1 --> 3 and 2 --> 4 which have the edge attribute "rating"
        of 5.

        >>> g.get_edges(src_ids=[1, 2], dst_ids=[3, 4], fields={'rating': 5})

        Return out-edges of vertex 1 with the edge attribute "rating".

        >>> g.get_edges(src_ids=[1], dst_ids=[None], fields={'rating': None})

        Return in-edges of vertex 3.

        >>> g.get_edges(src_ids=[None], dst_ids=[3])

        Return out-edges of 1 and in-edges of 3. If edge 1 --> 3 exists, it will
        only be returned once.

        >>> g.get_edges(src_ids=[1, None], dst_ids=[None, 3])
        """
        if not hasattr(src_ids, '__iter__'):
            src_ids = [src_ids]
        if not hasattr(dst_ids, '__iter__'):
            dst_ids = [dst_ids]

        # implicit Nones
        if len(src_ids) == 0 and len(dst_ids) > 0:
            src_ids = [None] * len(dst_ids)
        # implicit Nones
        if len(dst_ids) == 0 and len(src_ids) > 0:
            dst_ids = [None] * len(src_ids)

        with cython_context():
            if (len(src_ids) + len(dst_ids) + len(fields)) == 0:
                sf = SFrame(_proxy=self.__proxy__.get_edges_lazy())
            else:
                sf = SFrame(_proxy=self.__proxy__.get_edges(src_ids, dst_ids, fields))

        if (format == 'sframe'):
            return sf
        if (format == 'dataframe'):
            assert HAS_PANDAS, 'Cannot use dataframe because Pandas is not avaiable or version is too low.'
            if sf.num_rows() == 0:
                return pd.DataFrame()
            else:
                return sf.head(sf.num_rows()).to_dataframe()
        elif (format == 'list'):
            return _dataframe_to_edge_list(sf.to_dataframe())
        else:
            raise ValueError("Invalid format specifier")

    def add_vertices(self, vertices, vid_field=None):
        """
        Add one or many vertices to the graph. Vertices should be input as a
        list of Vertex objects (if only one Vertex is added this will be a list
        of length 1), a pandas DataFrame, or an SFrame. If vertices are
        specified by SFrame or DataFrame, vid_field specifies which column
        contains the vertex ID. Remaining columns are assumed to hold additional
        vertex attributes. If these attributes are not already present in the
        graph, they are added, possibly overwriting existing vertices.

        Parameters
        ----------
        vertices : Vertex | List [Vertex] | pandas.Dataframe | SFrame
            Vertex data.

        vid_field : string
            Field in the Dataframe or SFrame to use as vertex ID. Required if
            vertices is an SFrame. If vertices is a DataFrame and vid_field is
            not specified, the row index is used as vertex ID.

        Returns
        -------
        out : SGraph
            A new graph with vertices added.

        Raises
        ------
        ValueError
            If the arguments do not match the required type.
        """
        sf = _vertex_data_to_sframe(vertices, vid_field)
        if (sf[_VID_COLUMN]==None).sum() > 0:
            raise ValueError('Vertex id column cannot contain missing value.'
                    ' Please use dropna() to drop the missing value from the input and try again.')

        with cython_context():
            proxy = self.__proxy__.add_vertices(sf.__proxy__, _VID_COLUMN)
            return SGraph(_proxy=proxy)

    def add_edges(self, edges, src_field=None, dst_field=None):
        """
        Add one or many edges to the graph. Edges should be input as a list of
        Edge objects (for a single edge, this will be a list of length 1), an
        SFrame, or a Pandas DataFrame. If the new edges are in an SFrame or
        DataFrame, then `src_field` and `dst_field` are required to specify the
        columns that contain the source and target vertex IDs; additional
        columns are treated as edge attributes. If the new edges contain
        attributes not already present in the graph, new graph attributes are
        created in the graph. This may overwrite existing edges that lack the
        new attributes.

        Parameters
        ----------
        edges : Edge | List [Edge] | pandas.Dataframe | SFrame
            Edge data. If the edges are in an SFrame or DataFrame, then
            `src_field` and `dst_field` are required to specify the columns that
            contain the source and target vertex IDs.

        src_field : string
            Field in the Pandas DataFrame or SFrame to use as source vertex IDs.
            Not required if edges is a list.

        dst_field : string
            Field in the Pandas DataFrame or SFrame to use as target vertex IDs.
            Not required if edges is a list.

        Returns
        -------
        out : SGraph
            A new graph with edges added.

        Raises
        ------
        ValueError
            If the arguments do not match the required types.
        """
        sf = _edge_data_to_sframe(edges, src_field, dst_field)
        if (sf[_SRC_VID_COLUMN]==None).sum() > 0:
            raise ValueError('source vid column cannot contain missing value. '
                    'Please use dropna() to drop the missing value from the input and try again.')
        if (sf[_DST_VID_COLUMN]==None).sum() > 0:
            raise ValueError('target vid column cannot contain missing value. '
                    'Please use dropna() to drop the missing value from the input and try again.')

        with cython_context():
            proxy = self.__proxy__.add_edges(sf.__proxy__, _SRC_VID_COLUMN, _DST_VID_COLUMN)
            return SGraph(_proxy=proxy)

    def get_fields(self):
        """
        Return a list of vertex and edge attribute fields in the graph.

        If a field is common to both vertex and edge attributes, it will
        show up twice in the returned list.
        """
        return self.get_vertex_fields() + self.get_edge_fields()

    def get_vertex_fields(self):
        """Return a list of vertex attribute fields in the graph."""
        with cython_context():
            return self.__proxy__.get_vertex_fields()

    def get_edge_fields(self):
        """Return a list of edge attribute fields in the graph."""
        with cython_context():
            return self.__proxy__.get_edge_fields()

    def select_fields(self, fields):
        """
        Return a new graph with only the selected fields.

        Notes
        -----
            Non-existent fields are ignored.

        Parameters
        ----------
        fields : list [string]
            A list of field names to select.

        Returns
        -------
        out : SGraph
            A new graph whose vertex and edge data are projected to the selected
            fields.

        """
        vfields = self.__proxy__.get_vertex_fields()
        efields = self.__proxy__.get_edge_fields()
        selected_vfields = []
        selected_efields = []
        for f in fields:
            if f in vfields:
                selected_vfields.append(f)
            elif f in efields:
                selected_efields.append(f)
            else:
                raise ValueError('Field %s not in graph' % f)

        with cython_context():
            proxy = self.__proxy__
            proxy = proxy.select_vertex_fields(selected_vfields)
            proxy = proxy.select_edge_fields(selected_efields)
            return SGraph(_proxy=proxy)

    def triple_apply(self, triple_apply_fn, mutated_fields, input_fields=None):
        '''
        Apply a transform function to each edge and its associated source and target vertices in parallel.
        Each edge is visited once and in parallel. Modification to vertex data will be protected by lock.

        The effect on the return graph is equivalent to the following pseudo code:

        >>> PARALLEL FOR (source, edge, target) AS triple in G:
        ...     LOCK (triple.source, triple.target)
        ...     (source, edge, target) = triple_apply_fn(triple)
        ...     UNLOCK (triple.source, triple.target)
        ... END PARALLEL FOR

        Parameters
        ----------
        triple_apply_fn : Function : (dict, dict, dict) -> (dict, dict, dict)
            The function takes as input a tuple of (source_data, edge_data, target_data)
            and returns a tuple of (new_source_data, new_target_data).
            All variables in the input and output tuples have dict type.

        mutated_fields : List[str] | str
            A list of column names for the fields that triple_apply_fn will be mutating.
            Note: columns that are actually mutated by the triple_apply_fn but not specified in
            mutated_fields will have undetermined effects.

        input_fields : List[str] | str
            A list of column names that the triple_apply_fn will have access to. Must be a superset
            of `mutated_fields`.  Optional, defaults to None, which grant access to all fields.

        Examples
        --------
        The following will return a new graph g2 and store the vertex degree
        count in the "degree" field.

        >>> import graphlab as gl
        >>> g = gl.load_sgraph('some_graph')
        >>> vdata = g.get_vertices()
        >>> vdata['degree'] = 0
        >>> g = g.add_vertices(vdata, vid_field='__id')
        >>> def degree_count_fn (src, edge, dst):
                src['degree'] += 1
                dst['degree'] += 1
                return (src, edge, dst)
        >>> g2 = g.triple_apply(degree_count_fn, mutated_fields=['degree'])
        >>> print g2.get_vertices(['degree'])

        Returns
        -------
        out : SGraph
            A new SGraph with updated vertex, and edge data.
            The updated fields are restricted to be the mutated_fields.

        Notes
        -----
        Currently the triple_apply does not support creating new fields
        in the lambda function.
        '''
        assert inspect.isfunction(triple_apply_fn), "Input must be a function"
        if not (type(mutated_fields) is list or type(mutated_fields) is str):
            raise TypeError('mutated_fields must be str or list of str')
        if not (input_fields is None or type(input_fields) is list or type(input_fields) is str):
            raise TypeError('input_fields must be str or list of str')
        if type(mutated_fields) == str:
            mutated_fields = [mutated_fields]
        if len(mutated_fields) is 0:
            raise ValueError('mutated_fields cannot be empty')
        for f in ['__id', '__src_id', '__dst_id']:
            if f in mutated_fields:
                raise ValueError('mutated_fields cannot contain %s' % f)

        # select input fields
        g = None
        if input_fields is None:
            input_fields = self.get_fields()
            g = self
        elif type(input_fields) is str:
            input_fields = [input_fields]
            g = self.select_fields(input_fields)

        # check input fields contains mutated fields
        if not set(mutated_fields).issubset(input_fields):
            raise ValueError('Mutated field \"%s\" does not exist in graph' % f)

        return SGraph(_proxy=g.__proxy__.lambda_triple_apply(triple_apply_fn, mutated_fields))

    def save(self, filename, format='auto'):
        """
        Save the graph to disk. If the graph is saved in binary format, the
        graph can be loaded using the ``load_sgraph`` method. Alternatively, it
        can be saved as "json" to get a human readable / portable graph
        representation.

        Parameters
        ----------
        filename : string
            Filename to use when saving the file. It can be either a local or remote url.

        format : {'auto', 'binary', 'json'}
            Optional file format. If not specified, will determine the format
            based on the filename.

        Raises
        ------
        ValueError
            If the format is not supported.

        See Also
        --------
        load_sgraph
        """
        if format is 'auto':
            if filename.endswith(('.json', '.json.gz')):
                format = 'json'
            else:
                format = 'binary'

        if format not in ['binary', 'json', 'csv']:
            raise ValueError('Invalid format: %s. Supported formats are: %s'
                             % (format, ['binary', 'json', 'csv']))
        with cython_context():
            self.__proxy__.save_graph(make_internal_url(filename), format)

    @graphlab.canvas.inspect.find_vars
    def show(self, vlabel=None, vlabel_hover=False, vcolor=[0.522, 0.741, 0.], highlight={},
             highlight_color=[0.69, 0., 0.498], node_size=300, elabel=None, elabel_hover=False,
             ecolor=[0.37, 0.33, 0.33], ewidth=1, v_offset=0.03, h_offset=0., arrows=False,
             vertex_positions=None):
        """
        Launch or update GraphLab Canvas and generate specified visualization view of the SGraph.
        This function only outputs plots for graphs with fewer than 1,000 edges and 1,000 vertices.

        Parameters
        ----------
        vlabel : string, optional
            Field name for the label on each vertex. The default is None,
            which omits vertex labels. Set to 'id' to use the vertex ID as the
            label.

        vlabel_hover : bool, optional
            If vlabel_hover is specified, determines how the labels should be presented.
            The default (False) will cause labels to always be shown.
            Specifying True will show labels only on mouse hover.

        vcolor : list of float, optional
            RGB triple for the primary vertex color. Default is green ([0.522, 0.741, 0.]).

        highlight : dict or list or SArray, optional
            As a dict, mapping of Vertex ID to RGB color triple (list of float, as in vcolor).
            As a list or SArray (DEPRECATED): Vertex IDs to highlight in a different color.

        highlight_color : list of float, optional
            RGB triple for the color of highlighted vertices, when the highlight parameter
            is a list or SArray. Default is fuchsia ([0.69, 0., 0.498]). For fine-grained control over vertex coloring,
            use the highlight parameter with a dictionary of Vertex IDs and color values.

        node_size : int, optional
            Size of plotted vertices.

        elabel : string, optional
            Field name for the label on each edge.

        elabel_hover : bool, optional
            If elabel_hover is specified, determines how the labels should be presented.
            The default (False) will cause labels to always be shown.
            Specifying True will show labels only on mouse hover.

        ecolor : string, optional
            RGB triple for edge color. Default is grey ([0.37, 0.33, 0.33]).

        ewidth : int, optional
            Edge width.

        v_offset : float, optional
            Vertical offset of vertex labels, as a fraction of total plot
            height. For example, the default of 0.03 moves the label 3% of the
            plot height higher in the canvas.

        h_offset : float, optional
            Horizontal offset of vertex labels, as a fraction of total plot
            width. For example, an offset of 0.03 moves the label 3% of the plot
            width to the right. Default is 0.0.

        arrows : bool, optional
            If True, draw arrows indicating edge direction.

        vertex_positions : tuple, optional
            If a 2-element tuple of column names in self.vertices is specified,
            those two columns will be used as the X and Y coordinates of vertices
            in the graph layout. If not specified, vertices will be arranged
            according to a standard graph layout algorithm without regard to
            vertex or edge attributes.

        Examples
        --------
        To plot a graph with fewer than 1,000 vertices and edges, call .show and
        provide relevant parameters.

        >>> sg = graphlab.SGraph()
        >>> sg = sg.add_edges([graphlab.Edge(i, i+1) for i in range(5)])
        >>> sg.show(highlight=[2,3], elabel='__src_id', elabel_hover=False)

        Notes
        -----
        Graphs larger than 1,000 vertices and edges cannot be displayed as-is.
        For such graphs, construct a subgraph by selecting some vertices and
        edges, then call .show on the result.
        """
        if isinstance(highlight, SArray):
            # convert to list
            highlight = list(highlight)
        if isinstance(highlight, list):
            # convert to dict
            highlight_color = [highlight_color] * len(highlight)
            highlight = dict(zip(highlight, highlight_color))
        return graphlab.canvas._show(variable=self, params={
            'vlabel': vlabel,
            'vlabel_hover': vlabel_hover,
            'vcolor': vcolor,
            'highlight': highlight,
            'node_size': node_size,
            'elabel': elabel,
            'elabel_hover': elabel_hover,
            'ecolor': ecolor,
            'ewidth': ewidth,
            'v_offset': v_offset,
            'h_offset': h_offset,
            'arrows': arrows,
            'vertex_positions': vertex_positions
        })

    def get_neighborhood(self, ids, radius=1, full_subgraph=True):
        """
        Return a graph containing the neighborhood around a set of target vertices,
        ignoring edge directions. Note that setting radius greater than two
        often results in a time-consuming query for a very large subgraph.

        Parameters
        ----------
        ids : list [int | float | str]
            List of target vertex IDs.

        radius : int, optional
            Radius of the neighborhood to return. Setting radius larger
            than 2 may result in a very large subgraph.

        full_subgraph : bool, optional
            If True, return all edges between vertices in a target node's
            neighborhood, also known as the subgraph induced by the target
            node's neighbors, or the egocentric network for the target node. If
            False, return only edges on paths of length <= radius from the
            target node, also known as the reachability graph.

        Returns
        -------
        out : Graph
            The subgraph with the neighborhoods around the target vertices.

        Examples
        --------

        References
        ----------
        - Marsden, P. (2002) `Egocentric and sociocentric measures of network
          centrality <http://www.sciencedirect.com/science/article/pii/S03788733
          02000163>`_.
        - `Wikipedia - Reachability <http://en.wikipedia.org/wiki/Reachability>`_
        """

        verts = ids

        ## find the vertices within radius (and the path edges)
        for i in range(radius):
            edges_out = self.get_edges(src_ids=verts)
            edges_in = self.get_edges(dst_ids=verts)

            verts = list(edges_in['__src_id']) + list(edges_in['__dst_id']) + \
                list(edges_out['__src_id']) + list(edges_out['__dst_id'])
            verts = set(verts)

        ## make a new graph to return and add the vertices
        g = SGraph()
        g = g.add_vertices(self.get_vertices(verts), vid_field='__id')

        ## add the requested edge set
        if full_subgraph is True:
            induced_edge_out = self.get_edges(src_ids=verts)
            induced_edge_in = self.get_edges(dst_ids=verts)
            df_induced = induced_edge_out.append(induced_edge_in)
            df_induced = df_induced.groupby(df_induced.column_names(), {})

            verts_sa = SArray(list(verts))
            edges = df_induced.filter_by(verts_sa, "__src_id")
            edges = edges.filter_by(verts_sa, "__dst_id")

        else:
            path_edges = edges_out.append(edges_in)
            edges = path_edges.groupby(path_edges.column_names(), {})

        g = g.add_edges(edges, src_field='__src_id', dst_field='__dst_id')
        return g


#/**************************************************************************/
#/*                                                                        */
#/*                            Module Function                             */
#/*                                                                        */
#/**************************************************************************/
def load_graph(filename, format='binary'):
    import warnings
    warnings.warn("load_graph has been renamed to load_sgraph. This function will be removed in the next release.", PendingDeprecationWarning)
    return load_sgraph(filename, format=format)


def load_sgraph(filename, format='binary'):
    """
    Load graph from text file or previously saved graph binary.

    Parameters
    ----------
    filename : string
        Location of the file. Can be a local path or a remote URL.

    format : {'binary', 'snap', 'csv', 'tsv'}
        Required input data format. Default to binary.

        - 'binary' is the native graph format obtained from `Graph.save`.
        - 'snap' is a tab separated edge list format with comments, used in
          the `Stanford Network Analysis Platform <http://snap.stanford.edu/snap/>`_.
        - 'csv' is a comma-separated edge list without header or comments.
    """
    if not format in ['binary', 'snap', 'csv', 'tsv']:
        raise ValueError('Invalid format: %s' % format)

    with cython_context():
        g = None
        if format is 'binary':
            proxy = glconnect.get_unity().load_graph(make_internal_url(filename))
            g = SGraph(_proxy=proxy)
        elif format is 'snap':
            sf = SFrame.read_csv(filename, comment_char='#', delimiter='\t',
                                 header=False, column_type_hints=int)
            g = SGraph().add_edges(sf, 'X1', 'X2')
        elif format is 'csv':
            sf = SFrame.read_csv(filename, header=False)
            g = SGraph().add_edges(sf, 'X1', 'X2')
        elif format is 'tsv':
            sf = SFrame.read_csv(filename, header=False, delimiter='\t')
            g = SGraph().add_edges(sf, 'X1', 'X2')
        g.summary()  # materialize
        return g


#/**************************************************************************/
#/*                                                                        */
#/*                            Helper Function                             */
#/*                                                                        */
#/**************************************************************************/
def _vertex_list_to_dataframe(ls, id_column_name):
    """
    Convert a list of vertices into dataframe.
    """
    assert HAS_PANDAS, 'Cannot use dataframe because Pandas is not avaiable or version is too low.'
    cols = reduce(set.union, (set(v.attr.keys()) for v in ls))
    df = pd.DataFrame({id_column_name: [v.vid for v in ls]})
    for c in cols:
        df[c] = [v.attr.get(c) for v in ls]
    return df


def _edge_list_to_dataframe(ls, src_column_name, dst_column_name):
    """
    Convert a list of edges into dataframe.
    """
    assert HAS_PANDAS, 'Cannot use dataframe because Pandas is not avaiable or version is too low.'
    cols = reduce(set.union, (set(e.attr.keys()) for e in ls))
    df = pd.DataFrame({
        src_column_name: [e.src_vid for e in ls],
        dst_column_name: [e.dst_vid for e in ls]})
    for c in cols:
        df[c] = [e.attr.get(c) for e in ls]
    return df


def _dataframe_to_vertex_list(df):
    """
    Convert dataframe into list of vertices, assuming that vertex ids are stored in _VID_COLUMN.
    """
    cols = df.columns
    if len(cols):
        assert _VID_COLUMN in cols, "Vertex DataFrame must contain column %s" % _VID_COLUMN
        df = df[cols].T
        ret = [Vertex(None, _series=df[col]) for col in df]
        return ret
    else:
        return []


def _dataframe_to_edge_list(df):
    """
    Convert dataframe into list of edges, assuming that source and target ids are stored in _SRC_VID_COLUMN, and _DST_VID_COLUMN respectively.
    """
    cols = df.columns
    if len(cols):
        assert _SRC_VID_COLUMN in cols, "Vertex DataFrame must contain column %s" % _SRC_VID_COLUMN
        assert _DST_VID_COLUMN in cols, "Vertex DataFrame must contain column %s" % _DST_VID_COLUMN
        df = df[cols].T
        ret = [Edge(None, None, _series=df[col]) for col in df]
        return ret
    else:
        return []


def _vertex_data_to_sframe(data, vid_field):
    """
    Convert data into a vertex data sframe. Using vid_field to identify
    the id column.
    The returned sframe will have id column name '__id'
    """
    if isinstance(data, SFrame):
        # '__id' already in the sframe, and it is ok to not specify vid_field
        if vid_field is None and _VID_COLUMN in data.column_names():
            return data
        if vid_field is None:
            raise ValueError("vid_field must be specified for SFrame input")
        data_copy = copy.copy(data)
        data_copy.rename({vid_field: _VID_COLUMN})
        return data_copy
    if (type(data) == Vertex):
        return SFrame(_vertex_list_to_dataframe(data, '__id'))
    elif (type(data) == list):
        return SFrame(_vertex_list_to_dataframe(data, '__id'))
    elif (HAS_PANDAS and type(data) == pd.DataFrame):
        if vid_field is None:
            # using the dataframe index as vertex id
            if data.index.is_unique:
                if not ("index" in data.columns):
                    # pandas reset_index() will insert a new column of name "index".
                    sf = SFrame(data.reset_index())  # "index"
                    sf.rename({'index': _VID_COLUMN})
                    return sf
                else:
                    # pandas reset_index() will insert a new column of name "level_0" if there exists a column named "index".
                    sf = SFrame(data.reset_index())  # "level_0"
                    sf.rename({'level_0': _VID_COLUMN})
                    return sf
            else:
                raise ValueError("Index of the vertices dataframe is not unique, \
                        try specifying vid_field name to use a column for vertex ids.")
        else:
            sf = SFrame(data)
            if _VID_COLUMN in sf.column_names():
                raise ValueError('%s reserved vid column name already exists in the SFrame' % _VID_COLUMN)
            sf.rename({vid_field: _VID_COLUMN})
            return sf
    else:
        raise TypeError('Edges type %s is Not supported.' % str(type(data)))


def _edge_data_to_sframe(data, src_field, dst_field):
    """
    Convert data into an edge data sframe. Using src_field and dst_field to identify
    the source and target id column.
    The returned sframe will have id column name '__src_id', '__dst_id'
    """
    if isinstance(data, SFrame):
        # '__src_vid' and '__dst_vid' already in the sframe, and
        # it is ok to not specify src_field and dst_field
        if src_field is None and dst_field is None and \
           _SRC_VID_COLUMN in data.column_names() and \
           _DST_VID_COLUMN in data.column_names():
            return data
        if src_field is None:
            raise ValueError("src_field must be specified for SFrame input")
        if dst_field is None:
            raise ValueError("dst_field must be specified for SFrame input")
        data_copy = copy.copy(data)
        if src_field == _DST_VID_COLUMN and dst_field == _SRC_VID_COLUMN:
            # special case when src_field = "__dst_id" and dst_field = "__src_id"
            # directly renaming will cause name collision
            dst_id_column = data_copy[_DST_VID_COLUMN]
            del data_copy[_DST_VID_COLUMN]
            data_copy.rename({_SRC_VID_COLUMN: _DST_VID_COLUMN})
            data_copy[_SRC_VID_COLUMN] = dst_id_column
        else:
            data_copy.rename({src_field: _SRC_VID_COLUMN, dst_field: _DST_VID_COLUMN})
        return data_copy
    elif HAS_PANDAS and type(data) == pd.DataFrame:
        if src_field is None:
            raise ValueError("src_field must be specified for Pandas input")
        if dst_field is None:
            raise ValueError("dst_field must be specified for Pandas input")
        sf = SFrame(data)
        if src_field == _DST_VID_COLUMN and dst_field == _SRC_VID_COLUMN:
            # special case when src_field = "__dst_id" and dst_field = "__src_id"
            # directly renaming will cause name collision
            dst_id_column = data_copy[_DST_VID_COLUMN]
            del sf[_DST_VID_COLUMN]
            sf.rename({_SRC_VID_COLUMN: _DST_VID_COLUMN})
            sf[_SRC_VID_COLUMN] = dst_id_column
        else:
            sf.rename({src_field: _SRC_VID_COLUMN, dst_field: _DST_VID_COLUMN})
        return sf
    elif type(data) == Edge:
        return SFrame(_edge_list_to_dataframe([data], _SRC_VID_COLUMN, _DST_VID_COLUMN))
    elif type(data) == list:
        return SFrame(_edge_list_to_dataframe(data, _SRC_VID_COLUMN, _DST_VID_COLUMN))
    else:
        raise TypeError('Edges type %s is Not supported.' % str(type(data)))

## Hack: overriding GFrame class name to make it appears as SFrame##
GFrame.__name__ = SFrame.__name__
GFrame.__module__ = SFrame.__module__
