import functools
import numpy as np
import pandas as pd
import re
import warnings
from collections import defaultdict
from datetime import datetime
from pandas.tslib import OutOfBoundsDatetime

from .core import indexing, utils
from .core.formatting import format_timestamp
from .core.variable import as_variable, Variable
from .core.pycompat import iteritems, bytes_type, unicode_type, OrderedDict


# standard calendars recognized by netcdftime
_STANDARD_CALENDARS = set(['standard', 'gregorian', 'proleptic_gregorian'])


def mask_and_scale(array, fill_value=None, scale_factor=None, add_offset=None,
                   dtype=float):
    """Scale and mask array values according to CF conventions for packed and
    missing values

    First, values equal to the fill_value are replaced by NaN. Then, new values
    are given by the formula:

        original_values * scale_factor + add_offset

    Parameters
    ----------
    array : array-like
        Original array of values to wrap
    fill_value : number, optional
        All values equal to fill_value in the original array are replaced
        by NaN.
    scale_factor : number, optional
        Multiply entries in the original array by this number.
    add_offset : number, optional
        After applying scale_factor, add this number to entries in the
        original array.

    Returns
    -------
    scaled : np.ndarray
        Array of masked and scaled values.

    References
    ----------
    http://www.unidata.ucar.edu/software/netcdf/docs/BestPractices.html
    """
    # by default, cast to float to ensure NaN is meaningful
    values = np.array(array, dtype=dtype, copy=True)
    if fill_value is not None and not pd.isnull(fill_value):
        if values.ndim > 0:
            values[values == fill_value] = np.nan
        elif values == fill_value:
            values = np.array(np.nan)
    if scale_factor is not None:
        values *= scale_factor
    if add_offset is not None:
        values += add_offset
    return values


def _netcdf_to_numpy_timeunit(units):
    return {'seconds': 's', 'minutes': 'm', 'hours': 'h', 'days': 'D'}[units]


def _unpack_netcdf_time_units(units):
    matches = re.match('(\S+) since (.+)', units).groups()
    if not matches:
        raise ValueError('invalid time units: %s' % units)
    delta, ref_date = matches
    return delta, ref_date


def _decode_netcdf_datetime(num_dates, units, calendar):
    import netCDF4 as nc4

    dates = np.asarray(nc4.num2date(num_dates, units, calendar))
    if (dates[np.nanargmin(num_dates)].year < 1678
            or dates[np.nanargmax(num_dates)].year >= 2262):
        warnings.warn('Unable to decode time axis into full '
                      'numpy.datetime64 objects, continuing using dummy '
                      'netCDF4.datetime objects instead, reason: dates out'
                      ' of range', RuntimeWarning, stacklevel=3)
    else:
        try:
            dates = nctime_to_nptime(dates)
        except ValueError as e:
            warnings.warn('Unable to decode time axis into full '
                          'numpy.datetime64 objects, continuing using '
                          'dummy netCDF4.datetime objects instead, reason:'
                          '{0}'.format(e), RuntimeWarning, stacklevel=3)
    return dates


def decode_cf_datetime(num_dates, units, calendar=None):
    """Given an array of numeric dates in netCDF format, convert it into a
    numpy array of date time objects.

    For standard (Gregorian) calendars, this function uses vectorized
    operations, which makes it much faster than netCDF4.num2date. In such a
    case, the returned array will be of type np.datetime64.

    See also
    --------
    netCDF4.num2date
    """
    num_dates = np.asarray(num_dates, dtype=float)
    flat_num_dates = num_dates.ravel()
    orig_shape = num_dates.shape
    if calendar is None:
        calendar = 'standard'

    delta, ref_date = _unpack_netcdf_time_units(units)

    try:
        if calendar not in _STANDARD_CALENDARS:
            raise OutOfBoundsDatetime

        delta = _netcdf_to_numpy_timeunit(delta)
        ref_date = pd.Timestamp(ref_date)

        dates = (pd.to_timedelta(num_dates.ravel(), delta) + ref_date).values
    except OutOfBoundsDatetime:
        dates = _decode_netcdf_datetime(flat_num_dates, units, calendar)

    return dates.reshape(num_dates.shape)


def decode_cf_timedelta(num_timedeltas, units):
    """Given an array of numeric timedeltas in netCDF format, convert it into a
    numpy timedelta64[ns] array.
    """
    units = _netcdf_to_numpy_timeunit(units)
    return pd.to_timedelta(np.asarray(num_timedeltas), unit=units, box=False)


TIME_UNITS = set(['days', 'hours', 'minutes', 'seconds'])

def _infer_time_units_from_diff(unique_timedeltas):
    for time_unit, delta in [('days', 86400), ('hours', 3600),
                             ('minutes', 60), ('seconds', 1)]:
        unit_delta = np.timedelta64(10 ** 9 * delta, 'ns')
        diffs = unique_timedeltas / unit_delta
        if np.all(diffs == diffs.astype(int)):
            return time_unit
    raise ValueError('could not automatically determine time units')


def infer_datetime_units(dates):
    """Given an array of datetimes, returns a CF compatible time-unit string of
    the form "{time_unit} since {date[0]}", where `time_unit` is 'days',
    'hours', 'minutes' or 'seconds' (the first one that can evenly divide all
    unique time deltas in `dates`)
    """
    dates = pd.to_datetime(np.asarray(dates), box=False)
    unique_timedeltas = np.unique(np.diff(dates[pd.notnull(dates)]))
    units = _infer_time_units_from_diff(unique_timedeltas)
    return '%s since %s' % (units, pd.Timestamp(dates[0]))


def infer_timedelta_units(deltas):
    """Given an array of timedeltas, returns a CF compatible time-unit from
    {'days', 'hours', 'minutes' 'seconds'} (the first one that can evenly
    divide all unique time deltas in `deltas`)
    """
    deltas = pd.to_timedelta(np.asarray(deltas), box=False)
    unique_timedeltas = np.unique(deltas[pd.notnull(deltas)])
    units = _infer_time_units_from_diff(unique_timedeltas)
    return units


def nctime_to_nptime(times):
    """Given an array of netCDF4.datetime objects, return an array of
    numpy.datetime64 objects of the same size"""
    times = np.asarray(times)
    new = np.empty(times.shape, dtype='M8[ns]')
    for i, t in np.ndenumerate(times):
        new[i] = np.datetime64(datetime(*t.timetuple()[:6]))
    return new


def _cleanup_netcdf_time_units(units):
    delta, ref_date = _unpack_netcdf_time_units(units)
    try:
        units = '%s since %s' % (delta, format_timestamp(ref_date))
    except OutOfBoundsDatetime:
        # don't worry about reifying the units if they're out of bounds
        pass
    return units



def encode_cf_datetime(dates, units=None, calendar=None):
    """Given an array of datetime objects, returns the tuple `(num, units,
    calendar)` suitable for a CF complient time variable.

    Unlike encode_cf_datetime, this function does not (yet) speedup encoding
    of datetime64 arrays. However, unlike `date2num`, it can handle datetime64
    arrays.

    See also
    --------
    netCDF4.date2num
    """
    import netCDF4 as nc4

    dates = np.asarray(dates)

    if units is None:
        units = infer_datetime_units(dates)
    else:
        units = _cleanup_netcdf_time_units(units)

    if calendar is None:
        calendar = 'proleptic_gregorian'

    if np.issubdtype(dates.dtype, np.datetime64):
        # for now, don't bother doing any trickery like decode_cf_datetime to
        # convert dates to numbers faster
        # note: numpy's broken datetime conversion only works for us precision
        dates = dates.astype('M8[us]').astype(datetime)

    def encode_datetime(d):
        return np.nan if d is None else nc4.date2num(d, units, calendar)

    num = np.array([encode_datetime(d) for d in dates.flat])
    num = num.reshape(dates.shape)
    return (num, units, calendar)


def encode_cf_timedelta(timedeltas, units=None):
    if units is None:
        units = infer_timedelta_units(timedeltas)

    np_unit = _netcdf_to_numpy_timeunit(units)
    num = timedeltas.astype('timedelta64[%s]' % np_unit).view(np.int64)

    missing = pd.isnull(timedeltas)
    if np.any(missing):
        num = num.astype(float)
        num[missing] = np.nan

    return (num, units)


class MaskedAndScaledArray(utils.NDArrayMixin):
    """Wrapper around array-like objects to create a new indexable object where
    values, when accessesed, are automatically scaled and masked according to
    CF conventions for packed and missing data values.

    New values are given by the formula:
        original_values * scale_factor + add_offset

    Values can only be accessed via `__getitem__`:

    >>> x = MaskedAndScaledArray(np.array([-99, -1, 0, 1, 2]), -99, 0.01, 1)
    >>> x
    MaskedAndScaledArray(array([-99, -1,  0,  1,  2]), fill_value=-99,
    scale_factor=0.01, add_offset=1)
    >>> x[:]
    array([  nan,  0.99,  1.  ,  1.01,  1.02]

    References
    ----------
    http://www.unidata.ucar.edu/software/netcdf/docs/BestPractices.html
    """
    def __init__(self, array, fill_value=None, scale_factor=None,
                 add_offset=None, dtype=float):
        """
        Parameters
        ----------
        array : array-like
            Original array of values to wrap
        fill_value : number, optional
            All values equal to fill_value in the original array are replaced
            by NaN.
        scale_factor : number, optional
            Multiply entries in the original array by this number.
        add_offset : number, optional
            After applying scale_factor, add this number to entries in the
            original array.
        """
        self.array = array
        self.fill_value = fill_value
        self.scale_factor = scale_factor
        self.add_offset = add_offset
        self._dtype = dtype

    @property
    def dtype(self):
        return np.dtype(self._dtype)

    def __getitem__(self, key):
        return mask_and_scale(self.array[key], self.fill_value,
                              self.scale_factor, self.add_offset, self._dtype)

    def __repr__(self):
        return ("%s(%r, fill_value=%r, scale_factor=%r, add_offset=%r, "
                "dtype=%r)" %
                (type(self).__name__, self.array, self.fill_value,
                 self.scale_factor, self.add_offset, self._dtype))


class DecodedCFDatetimeArray(utils.NDArrayMixin):
    """Wrapper around array-like objects to create a new indexable object where
    values, when accessesed, are automatically converted into datetime objects
    using decode_cf_datetime.
    """
    def __init__(self, array, units, calendar=None):
        self.array = array
        self.units = units
        self.calendar = calendar

    @property
    def dtype(self):
        return np.dtype('datetime64[ns]')

    def __getitem__(self, key):
        return decode_cf_datetime(self.array[key], units=self.units,
                                  calendar=self.calendar)


class DecodedCFTimedeltaArray(utils.NDArrayMixin):
    """Wrapper around array-like objects to create a new indexable object where
    values, when accessesed, are automatically converted into timedelta objects
    using decode_cf_timedelta.
    """
    def __init__(self, array, units):
        self.array = array
        self.units = units

    @property
    def dtype(self):
        return np.dtype('timedelta64[ns]')

    def __getitem__(self, key):
        return decode_cf_timedelta(self.array[key], units=self.units)


class CharToStringArray(utils.NDArrayMixin):
    """Wrapper around array-like objects to create a new indexable object where
    values, when accessesed, are automatically concatenated along the last
    dimension.

    >>> CharToStringArray(np.array(['a', 'b', 'c']))[:]
    array('abc',
          dtype='|S3')
    """
    def __init__(self, array):
        """
        Parameters
        ----------
        array : array-like
            Original array of values to wrap.
        """
        self.array = array

    @property
    def dtype(self):
        return np.dtype('S' + str(self.array.shape[-1]))

    @property
    def shape(self):
        return self.array.shape[:-1]

    def __str__(self):
        if self.ndim == 0:
            # always return a unicode str if it's a single item for py3 compat
            return self[...].item().decode('utf-8')
        else:
            return repr(self)

    def __repr__(self):
        return '%s(%r)' % (type(self).__name__, self.array)

    def __getitem__(self, key):
        if self.array.ndim == 0:
            values = self.array[key]
        else:
            # require slicing the last dimension completely
            key = indexing.expanded_indexer(key, self.array.ndim)
            if key[-1] != slice(None):
                raise IndexError('too many indices')
            values = char_to_string(self.array[key])
        return values


def string_to_char(arr):
    """Like netCDF4.stringtochar, but faster and more flexible.
    """
    # ensure the array is contiguous
    arr = np.array(arr, copy=False, order='C')
    kind = arr.dtype.kind
    if kind not in ['U', 'S']:
        raise ValueError('argument must be a string')
    return arr.reshape(arr.shape + (1,)).view(kind + '1')


def char_to_string(arr):
    """Like netCDF4.chartostring, but faster and more flexible.
    """
    # based on: http://stackoverflow.com/a/10984878/809705
    arr = np.array(arr, copy=False, order='C')
    kind = arr.dtype.kind
    if kind not in ['U', 'S']:
        raise ValueError('argument must be a string')
    return arr.view(kind + str(arr.shape[-1]))[..., 0]


def safe_setitem(dest, key, value):
    if key in dest:
        raise ValueError('Failed hard to prevent overwriting key %r' % key)
    dest[key] = value


def pop_to(source, dest, key, default=None):
    """
    A convenience function which pops a key k from source to dest.
    None values are not passed on.  If k already exists in dest an
    error is raised.
    """
    value = source.pop(key, None)
    if value is not None:
        safe_setitem(dest, key, value)
    return value


def _var_as_tuple(var):
    return var.dims, var.values, var.attrs.copy(), var.encoding.copy()


def maybe_encode_datetime(var):
    if (np.issubdtype(var.dtype, np.datetime64)
            or (var.dtype.kind == 'O'
                and isinstance(var.values.flat[0], datetime))):
        dims, values, attrs, encoding = _var_as_tuple(var)
        (values, units, calendar) = encode_cf_datetime(
            values, encoding.pop('units', None), encoding.pop('calendar', None))
        safe_setitem(attrs, 'units', units)
        safe_setitem(attrs, 'calendar', calendar)
        var = Variable(dims, values, attrs, encoding)
    return var


def maybe_encode_timedelta(var):
    if np.issubdtype(var.dtype, np.timedelta64):
        dims, values, attrs, encoding = _var_as_tuple(var)
        values, units = encode_cf_timedelta(
            values, encoding.pop('units', None))
        safe_setitem(attrs, 'units', units)
        var = Variable(dims, values, attrs, encoding)
    return var


def maybe_encode_offset_and_scale(var, needs_copy=True):
    if any(k in var.encoding for k in ['add_offset', 'scale_factor']):
        dims, values, attrs, encoding = _var_as_tuple(var)
        values = np.array(values, dtype=float, copy=needs_copy)
        needs_copy = False
        if 'add_offset' in encoding:
            values -= pop_to(encoding, attrs, 'add_offset')
        if 'scale_factor' in encoding:
            values /= pop_to(encoding, attrs, 'scale_factor')
        var = Variable(dims, values, attrs, encoding)
    return var, needs_copy


def maybe_encode_fill_value(var, needs_copy=True):
    # replace NaN with the fill value
    if '_FillValue' in var.encoding:
        dims, values, attrs, encoding = _var_as_tuple(var)
        fill_value = pop_to(encoding, attrs, '_FillValue')
        if not pd.isnull(fill_value):
            missing = pd.isnull(values)
            if missing.any():
                if needs_copy:
                    values = values.copy()
                    needs_copy = False
                values[missing] = fill_value
        var = Variable(dims, values, attrs, encoding)
    return var, needs_copy


def maybe_encode_dtype(var, needs_copy=True):
    if 'dtype' in var.encoding:
        dims, values, attrs, encoding = _var_as_tuple(var)
        dtype = np.dtype(encoding.pop('dtype'))
        if dtype.kind != 'O':
            if np.issubdtype(dtype, int):
                out = np.empty_like(values) if needs_copy else values
                np.around(values, out=out)
            if dtype == 'S1' and values.dtype != 'S1':
                values = string_to_char(np.asarray(values, 'S'))
                dims = dims + ('string%s' % values.shape[-1],)
            values = np.asarray(values, dtype=dtype)
            var = Variable(dims, values, attrs, encoding)
    return var


def _infer_dtype(array):
    """Given an object array with no missing values, infer its dtype from its
    first element
    """
    if array.size == 0:
        dtype = np.dtype(float)
    else:
        dtype = np.array(array.flat[0]).dtype
        if dtype.kind in ['S', 'U']:
            # don't just use inferred dtype to avoid truncating arrays to
            # the length of their first element
            dtype = np.dtype(dtype.kind)
        elif dtype.kind == 'O':
            raise ValueError('unable to infer dtype; xray cannot '
                             'serialize arbitrary Python objects')
    return dtype


def ensure_dtype_not_object(var):
    # TODO: move this from conventions to backends? (it's not CF related)
    if var.dtype.kind == 'O':
        dims, values, attrs, encoding = _var_as_tuple(var)
        missing = pd.isnull(values)
        if missing.any():
            non_missing_values = values[~missing]
            inferred_dtype = _infer_dtype(non_missing_values)

            if inferred_dtype.kind in ['S', 'U']:
                # There is no safe bit-pattern for NA in typical binary string
                # formats, we so can't set a fill_value. Unfortunately, this
                # means we won't be able to restore string arrays with missing
                # values.
                fill_value = ''
            else:
                # insist on using float for numeric values
                if not np.issubdtype(inferred_dtype, float):
                    inferred_dtype = np.dtype(float)
                fill_value = np.nan

            values = np.array(values, dtype=inferred_dtype, copy=True)
            values[missing] = fill_value
        else:
            values = np.asarray(values, dtype=_infer_dtype(values))
        var = Variable(dims, values, attrs, encoding)
    return var


def encode_cf_variable(var, needs_copy=True):
    """
    Converts an Variable into an Variable which follows some
    of the CF conventions:

        - Nans are masked using _FillValue (or the deprecated missing_value)
        - Rescaling via: scale_factor and add_offset
        - datetimes are converted to the CF 'units since time' format
        - dtype encodings are enforced.

    Parameters
    ----------
    var : xray.Variable
        A variable holding un-encoded data.

    Returns
    -------
    out : xray.Variable
        A variable which has been encoded as described above.
    """
    var = maybe_encode_datetime(var)
    var = maybe_encode_timedelta(var)
    var, needs_copy = maybe_encode_offset_and_scale(var, needs_copy)
    var, needs_copy = maybe_encode_fill_value(var, needs_copy)
    var = maybe_encode_dtype(var, needs_copy)
    var = ensure_dtype_not_object(var)
    return var


def decode_cf_variable(var, concat_characters=True, mask_and_scale=True,
                       decode_times=True):
    """
    Decodes a variable which may hold CF encoded information.

    This includes variables that have been masked and scaled, which
    hold CF style time variables (this is almost always the case if
    the dataset has been serialized) and which have strings encoded
    as character arrays.

    Parameters
    ----------
    var : Variable
        A variable holding potentially CF encoded information.
    concat_characters : bool
        Should character arrays be concatenated to strings, for
        example: ['h', 'e', 'l', 'l', 'o'] -> 'hello'
    mask_and_scale: bool
        Lazily scale (using scale_factor and add_offset) and mask
        (using _FillValue).
    decode_times : bool
        Decode cf times ('hours since 2000-01-01') to np.datetime64.

    Returns
    -------
    out : Variable
        A variable holding the decoded equivalent of var
    """
    # use _data instead of data so as not to trigger loading data
    var = as_variable(var)
    data = var._data
    dimensions = var.dims
    attributes = var.attrs.copy()
    encoding = var.encoding.copy()

    if 'dtype' in encoding:
        if data.dtype != encoding['dtype']:
            warnings.warn("CF decoding is overwriting dtype")
    else:
        encoding['dtype'] = data.dtype

    if concat_characters:
        if data.dtype.kind == 'S' and data.dtype.itemsize == 1:
            dimensions = dimensions[:-1]
            data = CharToStringArray(data)

    if mask_and_scale:
        if 'missing_value' in attributes:
            # missing_value is deprecated, but we still want to support it as
            # an alias for _FillValue.
            assert ('_FillValue' not in attributes
                    or utils.equivalent(attributes['_FillValue'],
                                        attributes['missing_value']))
            attributes['_FillValue'] = attributes.pop('missing_value')

        fill_value = pop_to(attributes, encoding, '_FillValue')
        scale_factor = pop_to(attributes, encoding, 'scale_factor')
        add_offset = pop_to(attributes, encoding, 'add_offset')
        if ((fill_value is not None and not pd.isnull(fill_value))
                or scale_factor is not None or add_offset is not None):
            if isinstance(fill_value, (bytes_type, unicode_type)):
                dtype = object
            else:
                dtype = float
            data = MaskedAndScaledArray(data, fill_value, scale_factor,
                                        add_offset, dtype)

    if decode_times and 'units' in attributes:
        if 'since' in attributes['units']:
            # datetime
            units = pop_to(attributes, encoding, 'units')
            calendar = pop_to(attributes, encoding, 'calendar')
            data = DecodedCFDatetimeArray(data, units, calendar)
        elif attributes['units'] in TIME_UNITS:
            # timedelta
            units = pop_to(attributes, encoding, 'units')
            data = DecodedCFTimedeltaArray(data, units)

    return Variable(dimensions, indexing.LazilyIndexedArray(data),
                    attributes, encoding=encoding)


def decode_cf_variables(variables, attributes, concat_characters=True,
                        mask_and_scale=True, decode_times=True,
                        decode_coords=True):
    """
    Decode a several CF encoded variables.

    See: decode_cf_variable
    """
    dimensions_used_by = defaultdict(list)
    for v in variables.values():
        for d in v.dims:
            dimensions_used_by[d].append(v)

    def stackable(dim):
        # figure out if a dimension can be concatenated over
        if dim in variables:
            return False
        for v in dimensions_used_by[dim]:
            if v.dtype.kind != 'S' or dim != v.dims[-1]:
                return False
        return True

    coord_names = set()

    new_vars = OrderedDict()
    for k, v in iteritems(variables):
        concat = (concat_characters and v.dtype.kind == 'S' and v.ndim > 0 and
                  stackable(v.dims[-1]))
        new_vars[k] = decode_cf_variable(
            v, concat_characters=concat, mask_and_scale=mask_and_scale,
            decode_times=decode_times)
        if decode_coords:
            coordinates = new_vars[k].attrs.pop('coordinates', None)
            if coordinates is not None:
                coord_names.update(coordinates.split())

    if decode_coords and 'coordinates' in attributes:
        attributes = OrderedDict(attributes)
        coord_names.update(attributes.pop('coordinates').split())

    return new_vars, attributes, coord_names


def decode_cf(obj, concat_characters=True, mask_and_scale=True,
              decode_times=True, decode_coords=True):
    """Decode the given Dataset or Datastore according to CF conventions into
    a new Dataset.

    Parameters
    ----------
    obj : Dataset or DataStore
        Object to decode.
    concat_characters : bool, optional
        Should character arrays be concatenated to strings, for
        example: ['h', 'e', 'l', 'l', 'o'] -> 'hello'
    mask_and_scale: bool, optional
        Lazily scale (using scale_factor and add_offset) and mask
        (using _FillValue).
    decode_times : bool, optional
        Decode cf times (e.g., integers since 'hours since 2000-01-01') to
        np.datetime64.
    decode_coords : bool, optional
        Use the 'coordinates' attribute on variable (or the dataset itself) to
        identify coordinates.

    Returns
    -------
    decoded : Dataset
    """
    from .core.dataset import Dataset
    from .backends.common import AbstractDataStore

    if isinstance(obj, Dataset):
        vars = obj._arrays
        attrs = obj.attrs
        extra_coords = set(obj.coords)
        file_obj = obj._file_obj
    elif isinstance(obj, AbstractDataStore):
        vars, attrs = obj.load()
        extra_coords = set()
        file_obj = obj
    else:
        raise TypeError('can only decode Dataset or DataStore objects')

    vars, attrs, coord_names = decode_cf_variables(
        vars, attrs, concat_characters, mask_and_scale, decode_times,
        decode_coords)
    ds = Dataset(vars, attrs=attrs)
    ds = ds.set_coords(coord_names.union(extra_coords))
    ds._file_obj = file_obj
    return ds


def cf_decoder(variables, attributes,
               concat_characters=True, mask_and_scale=True,
               decode_times=True):
    """
    Decode a set of CF encoded variables and attributes.

    See Also, decode_cf_variable

    Parameters
    ----------
    variables : dict
        A dictionary mapping from variable name to xray.Variable
    attributes : dict
        A dictionary mapping from attribute name to value
    concat_characters : bool
        Should character arrays be concatenated to strings, for
        example: ['h', 'e', 'l', 'l', 'o'] -> 'hello'
    mask_and_scale: bool
        Lazily scale (using scale_factor and add_offset) and mask
        (using _FillValue).
    decode_times : bool
        Decode cf times ('hours since 2000-01-01') to np.datetime64.

    Returns
    -------
    decoded_variables : dict
        A dictionary mapping from variable name to xray.Variable objects.
    decoded_attributes : dict
        A dictionary mapping from attribute name to values.
    """
    variables, attributes, _ = decode_cf_variables(
        variables, attributes, concat_characters, mask_and_scale, decode_times)
    return variables, attributes


def _encode_coordinates(variables, attributes, non_dim_coord_names):
    # calculate global and variable specific coordinates
    non_dim_coord_names = set(non_dim_coord_names)
    global_coordinates = non_dim_coord_names.copy()
    variable_coordinates = defaultdict(set)
    for coord_name in non_dim_coord_names:
        target_dims = variables[coord_name].dims
        for k, v in variables.items():
            if (k not in non_dim_coord_names and k not in v.dims
                    and any(d in target_dims for d in v.dims)):
                variable_coordinates[k].add(coord_name)
                global_coordinates.discard(coord_name)

    variables = OrderedDict((k, v.copy(deep=False))
                            for k, v in variables.items())

    # These coordinates are saved according to CF conventions
    for var_name, coord_names in variable_coordinates.items():
        attrs = variables[var_name].attrs
        if 'coordinates' in attrs:
            raise ValueError('cannot serialize coordinates because variable '
                             "%s already has an attribute 'coordinates'"
                             % var_name)
        attrs['coordinates'] = ' '.join(map(str, coord_names))

    # These coordinates are not associated with any particular variables, so we
    # save them under a global 'coordinates' attribute so xray can roundtrip
    # the dataset faithfully. Because this serialization goes beyond CF
    # conventions, only do it if necessary.
    # Reference discussion:
    # http://mailman.cgd.ucar.edu/pipermail/cf-metadata/2014/057771.html
    if global_coordinates:
        attributes = OrderedDict(attributes)
        if 'coordinates' in attributes:
            raise ValueError('cannot serialize coordinates because the global '
                             "attribute 'coordinates' already exists")
        attributes['coordinates'] = ' '.join(map(str, global_coordinates))

    return variables, attributes


def encode_dataset_coordinates(dataset):
    """Encode coordinates on the given dataset object into variable specific
    and global attributes.

    When possible, this is done according to CF conventions.

    Parameters
    ----------
    dataset : Dataset
        Object to encode.

    Returns
    -------
    variables : dict
    attrs : dict
    """
    non_dim_coord_names = set(dataset.coords) - set(dataset.dims)
    return _encode_coordinates(dataset._arrays, dataset.attrs,
                               non_dim_coord_names=non_dim_coord_names)


def cf_encoder(variables, attributes):
    """
    A function which takes a dicts of variables and attributes
    and encodes them to conform to CF conventions as much
    as possible.  This includes masking, scaling, character
    array handling, and CF-time encoding.

    Decode a set of CF encoded variables and attributes.

    See Also, decode_cf_variable

    Parameters
    ----------
    variables : dict
        A dictionary mapping from variable name to xray.Variable
    attributes : dict
        A dictionary mapping from attribute name to value

    Returns
    -------
    encoded_variables : dict
        A dictionary mapping from variable name to xray.Variable,
    encoded_attributes : dict
        A dictionary mapping from attribute name to value

    See also: encode_cf_variable
    """
    new_vars = OrderedDict((k, encode_cf_variable(v))
                           for k, v in iteritems(variables))
    return new_vars, attributes
