#ifndef PYTHONIC_MODULE_NUMPY_H
#define PYTHONIC_MODULE_NUMPY_H

#include <vector>
#include <cmath>
#include <cstdint>

#define NUMPY_EXPR_TO_NDARRAY0(fname)\
    template<class Op, class Arg, class... Types>\
        auto fname(core::numpy_uexpr<Op,Arg> const& expr, Types... others)\
            -> decltype(fname(typename core::numpy_expr_to_ndarray<core::numpy_uexpr<Op,Arg>>::type(expr), std::forward<Types>(others)...)) \
    {\
        return fname(typename core::numpy_expr_to_ndarray<core::numpy_uexpr<Op,Arg>>::type(expr), std::forward<Types>(others)...);\
    }\
    template<class Op, class Arg0, class Arg1, class... Types>\
        auto fname(core::numpy_expr<Op,Arg0, Arg1> const& expr, Types... others)\
            -> decltype(fname(typename core::numpy_expr_to_ndarray<core::numpy_expr<Op,Arg0,Arg1>>::type(expr), std::forward<Types>(others)...)) \
    {\
        return fname(typename core::numpy_expr_to_ndarray<core::numpy_expr<Op,Arg0, Arg1>>::type(expr), std::forward<Types>(others)...);\
    }\
    template<class T, class... Types>\
        auto fname(core::list<T> const& expr, Types... others)\
            -> decltype(fname(typename core::numpy_expr_to_ndarray<core::list<T>>::type(expr), std::forward<Types>(others)...)) \
    {\
        return fname(typename core::numpy_expr_to_ndarray<core::list<T>>::type(expr), std::forward<Types>(others)...);\
    }

namespace pythonic {
    namespace numpy {

        /* a few classical constants */
        double const pi = 3.141592653589793238462643383279502884;
        double const e = 2.718281828459045235360287471352662498;
        double const nan = std::numeric_limits<double>::quiet_NaN();
        double const inf = std::numeric_limits<double>::infinity();
        double const NINF = -std::numeric_limits<double>::infinity();

        /* numpy standard types */
        namespace proxy {
            // these typedefs are not functions, but the default constructor
            // make it legal to write pythonic::proxy::double_
            // as generated by pythran
            // so we put these typedefs in the proxy namespace
            typedef std::complex<double> complex;
            typedef std::complex<float> complex32;
            typedef std::complex<double> complex64;
            typedef std::complex<long double> complex128;
            typedef float float_;
            typedef float float32;
            typedef double float64;
            typedef double float128;
            typedef double double_;
            typedef int8_t int8;
            typedef int16_t int16;
            typedef int32_t int32;
            typedef int64_t int64;
            typedef uint8_t uint8;
            typedef uint16_t uint16;
            typedef uint32_t uint32;
            typedef uint64_t uint64;
        }


       template<class T, class dtype=typename nested_container_value_type<typename std::remove_cv<typename std::remove_reference<T>::type>::type>::type>
          core::ndarray<dtype, nested_container_depth<typename std::remove_cv<typename std::remove_reference<T>::type>::type>::value > array(T&& iterable, dtype d=dtype()) {
              return core::ndarray<dtype, nested_container_depth<typename std::remove_cv<typename std::remove_reference<T>::type>::type>::value >(std::forward<T>(iterable));
          }

       PROXY(pythonic::numpy, array);

       template<size_t N, class dtype=double>
          core::ndarray<dtype, N> zeros(std::array<long, N> const& shape, dtype d=dtype()) {
              return core::ndarray<dtype, N>(shape, dtype(0));
          }

       template<class dtype=double>
          core::ndarray<dtype, 1> zeros(long size, dtype d=dtype()) {
              return zeros(core::make_tuple(size), d);
          }


       PROXY(pythonic::numpy, zeros);

       template<size_t N, class dtype=double>
          core::ndarray<dtype, N> ones(std::array<long, N> const& shape, dtype d=dtype()) {
              return core::ndarray<dtype, N>(shape, dtype(1));
          }

       template<class dtype=double>
          core::ndarray<dtype, 1> ones(long size, dtype d=dtype()) {
              return ones(core::make_tuple(size), d);
          }

       PROXY(pythonic::numpy, ones);

       template<size_t N, class dtype=double>
          core::ndarray<dtype, N> empty(std::array<long, N>const& shape, dtype d=dtype()) {
              return core::ndarray<dtype, N>(shape, None);
          }
       template<class dtype=double>
          core::ndarray<dtype, 1> empty(long size, dtype d=dtype()) {
              return empty(core::make_tuple(size), d);
          }

       PROXY(pythonic::numpy, empty);


       template<class T, class U, class dtype=long>
           core::ndarray<decltype(std::declval<T>()+std::declval<U>()+std::declval<dtype>()), 1> arange(T begin, U end, dtype step=dtype(1))
           {
               typedef decltype(begin+end+step) R;
               size_t size = std::max(R(0), R(std::ceil((end - begin)/step)));
               core::ndarray<R, 1> a(core::make_tuple((long)size), None);
               if(size)
               {
                   auto prev = a.buffer,
                        end = a.buffer + a.size();
                   *prev = begin;
                   for(auto iter = prev + 1; iter!=end; ++iter) {
                       *iter = *prev + step;
                       prev = iter;
                   }
               }
               return a;
           }

       template<class T>
          core::ndarray<T, 1> arange(T end) {
              return arange(T(0), end);
          }
       PROXY(pythonic::numpy, arange);

       template<class T>
          long alen(T&& expr) {
              return expr.shape[0];
          }
       PROXY(pythonic::numpy, alen);

       core::ndarray<double, 1> linspace(double start, double stop, long num=50, bool endpoint = true)
       {
           double step = (stop - start) / (num - (endpoint?1:0)) ;
           return arange(start, stop + (endpoint?step*.5:0), step);
       }

       PROXY(pythonic::numpy, linspace);

       auto logspace(double start, double stop, long num=50, bool endpoint = true, double base=10.0)
           -> decltype(core::power(base, linspace(start, stop, num, endpoint)))
       {
           return core::power(base, linspace(start, stop, num, endpoint));
       }

       PROXY(pythonic::numpy, logspace);

       template<class T, size_t N, class ...S>
           core::ndarray<T, sizeof...(S)> reshape( core::ndarray<T,N> const& expr, S&& ...s) {
               return expr.reshape(core::make_tuple(std::forward<S>(s)...));
           }

       NUMPY_EXPR_TO_NDARRAY0(reshape);

       PROXY(pythonic::numpy, reshape);

       template<class T, size_t N, class dtype=T>
           core::ndarray<dtype,1> cumsum(core::ndarray<T,N> const& expr, dtype d = dtype()) {
               long count = expr.size();
               core::ndarray<dtype,1> cumsumy(core::make_tuple(count), None);
               std::partial_sum(expr.buffer, expr.buffer + count, cumsumy.buffer);
               return cumsumy;
           }

       template<class T, class dtype=T>
           core::ndarray<dtype,1> cumsum(core::ndarray<T,1> const& expr, long axis, dtype d = dtype()) {
               if(axis !=0)
                   throw __builtin__::ValueError("axis out of bounds");
               return cumsum(expr);
           }

       template<class T, size_t N, class dtype=T>
           core::ndarray<dtype,N> cumsum(core::ndarray<T,N> const& expr, long axis, dtype d = dtype()) {
               if(axis<0 || axis >=long(N))
                   throw __builtin__::ValueError("axis out of bounds");

               auto shape = expr.shape;
               core::ndarray<dtype,N> cumsumy(shape, None);
               if(axis==0) {
                   std::copy(expr.buffer, expr.buffer + shape[N-1], cumsumy.buffer);
                   std::transform(cumsumy.begin(), cumsumy.end()-1, expr.begin() + 1, cumsumy.begin() + 1, std::plus<core::ndarray<T,N-1>>());
               }
               else {
                   std::transform(expr.begin(), expr.end(), cumsumy.begin(), [=](core::ndarray<T,N-1> const& e) { return cumsum(e, axis-1, d); });
               }
               return cumsumy;
           }

       PROXY(pythonic::numpy, cumsum);

       template<class E>
           typename core::numpy_expr_to_ndarray<E>::type::dtype
           sum(E const& expr, none_type axis=None) {
               auto p = typename core::numpy_expr_to_ndarray<E>::type::dtype(0);
               for(long i=0, n = expr.size() ; i<n; ++i)
                   p += expr.at(i);
               return p;
           }

       template<class T>
            T sum( core::ndarray<T,1> const& array, long axis)
            {
                if(axis!=0)
                    throw __builtin__::ValueError("axis out of bounds");
                return sum(array);
            }

       template<class T, size_t N>
            typename std::enable_if<N!=1, typename core::ndarray<T,N>::value_type>::type
            sum( core::ndarray<T,N> const& array, long axis)
            {
                if(axis<0 || axis >=long(N))
                    throw __builtin__::ValueError("axis out of bounds");
                auto shape = array.shape;
                if(axis==0)
                {
                    return std::accumulate(array.begin() + 1, array.end(), *array.begin());
                }
                else
                {
                    std::array<long, N-1> shp;
                    std::copy(shape.begin(), shape.end() - 1, shp.begin());
                    core::ndarray<T,N-1> sumy(shp, None);
                    std::transform(array.begin(), array.end(), sumy.begin(), [=](core::ndarray<T,N-1> const& other) {return sum(other, axis-1);});
                    return sumy;
                }
            }

       PROXY(pythonic::numpy, sum);

       template<class E>
           typename core::numpy_expr_to_ndarray<E>::type::dtype
           nansum(E const& expr) {
               typename core::numpy_expr_to_ndarray<E>::type::dtype s=0;
               long n = expr.size();
               for(long i=0;i<n;++i) {
                   auto e_i = expr.at(i);
                   if(not nt2::is_nan(e_i))
                       s += e_i ;
               }
               return s;
           }

       PROXY(pythonic::numpy, nansum);

       template<class E>
           struct ndenumerate_iterator : std::iterator<std::random_access_iterator_tag, std::tuple<std::array<long, core::numpy_expr_to_ndarray<E>::N>, typename core::numpy_expr_to_ndarray<E>::type::dtype> > {
               long index;
               E const& expr;
               typename core::numpy_expr_to_ndarray<E>::type::dtype* iter;
               ndenumerate_iterator(){}
               ndenumerate_iterator(E const& expr, long first) : index(first), expr(expr), iter(expr.buffer) {
               }
               std::tuple<std::array<long, core::numpy_expr_to_ndarray<E>::N>, typename core::numpy_expr_to_ndarray<E>::type::dtype> operator*() {
                   std::array<long, core::numpy_expr_to_ndarray<E>::N> out;
                   auto shape = expr.shape;
                   constexpr long N = core::numpy_expr_to_ndarray<E>::N;
                   long mult = 1;
                   for(long j=N-1; j>0; j--) {
                       out[j] = (index/mult)%shape[j];
                       mult*=shape[j];
                   }
                   out[0] = index/mult;
                   return std::tuple<std::array<long, core::numpy_expr_to_ndarray<E>::N>, typename core::numpy_expr_to_ndarray<E>::type::dtype>(out, *iter);
               }
               ndenumerate_iterator& operator++() { ++index, ++iter ; return *this; }
               ndenumerate_iterator& operator+=(long n) { index+=n,iter+=n; return *this; }
               bool operator!=(ndenumerate_iterator const& other) { return index != other.index; }
               bool operator<(ndenumerate_iterator const& other) { return index < other.index; }
               long operator-(ndenumerate_iterator const& other) { return index - other.index; }

           };

       template<class E>
           struct _ndenumerate : ndenumerate_iterator<E> {
                typedef ndenumerate_iterator<E> iterator;
                E expr; // we need to keep one ref over the enumerated sequence alive
                iterator end_iter;

                _ndenumerate() {}
                _ndenumerate( E const& expr) :  ndenumerate_iterator<E>(expr, 0), expr(expr), end_iter(expr, expr.size()) {}
                iterator & begin() { return *this; }
                iterator const & begin() const { return *this; }
                iterator end() const { return end_iter; }
           };

       template<class E>
           _ndenumerate<E> ndenumerate(E const& expr) {
               return _ndenumerate<E>(expr);
           }

       PROXY(pythonic::numpy, ndenumerate);

       template<size_t N>
           struct ndindex_iterator : std::iterator<std::random_access_iterator_tag, std::array<long, N> > {
               long index;
               std::array<long, N> shape;
               ndindex_iterator(){}
               ndindex_iterator(std::array<long, N> const& shape, long first) : index(first), shape(shape) {
               }
               std::array<long, N> operator*() {
                   std::array<long, N> out;
                   long mult = 1;
                   for(long j=N-1; j>0; j--) {
                       out[j] = (index/mult)%shape[j];
                       mult*=shape[j];
                   }
                   out[0] = index/mult;
                   return out;
               }
               ndindex_iterator& operator++() { ++index; return *this; }
               ndindex_iterator& operator+=(long n) { index+=n; return *this; }
               bool operator!=(ndindex_iterator const& other) { return index != other.index; }
               bool operator<(ndindex_iterator const& other) { return index < other.index; }
               long operator-(ndindex_iterator const& other) { return index - other.index; }

           };

       template<size_t N>
           struct _ndindex : ndindex_iterator<N> {
                typedef ndindex_iterator<N> iterator;
                std::array<long, N> shape; 
                iterator end_iter;

                _ndindex() {}
                _ndindex( std::array<long, N> const& shape) :  ndindex_iterator<N>(shape, 0), shape(shape), end_iter(shape, std::accumulate(shape.begin(), shape.end(), 1L, std::multiplies<long>())) {
                }
                iterator & begin() { return *this; }
                iterator const & begin() const { return *this; }
                iterator end() const { return end_iter; }
           };

       template<class... Types>
           _ndindex<sizeof...(Types)> ndindex(Types... args) {
               return _ndindex<sizeof...(Types)>(core::make_tuple(args...));
           }

       PROXY(pythonic::numpy, ndindex);

       template<class E, class dtype=double>
           auto
           mean(E const& expr, none_type axis=None, dtype d=dtype())
           -> decltype(sum(expr)/dtype(expr.size()))
           {
               return sum(expr)/dtype(expr.size());
           }

       PROXY(pythonic::numpy, mean);

       template<class T, size_t N>
           decltype(std::declval<T>()+1.) median(core::ndarray<T,N> const& arr) {
               size_t n = arr.size();
               T* tmp = new T[n];
               std::copy(arr.buffer, arr.buffer + n, tmp);
               std::sort(tmp, tmp + n);
               auto out = (tmp[n/2]+tmp[(n-1)/2])/double(2);
               delete [] tmp;
               return out;
           }

       PROXY(pythonic::numpy, median);

       template<class E>
           typename core::numpy_expr_to_ndarray<E>::type
           nan_to_num(E const& expr) {
               typename core::numpy_expr_to_ndarray<E>::type out(expr.shape, None);
               for(long i=0, n=expr.size(); i<n; ++i) {
                   auto v = expr.at(i);
                   if(pythonic::numpy_expr::ops::isposinf(v))
                       out.at(i) = std::numeric_limits<typename core::numpy_expr_to_ndarray<E>::type::dtype>::max();
                   else if(pythonic::numpy_expr::ops::isneginf(v))
                       out.at(i) = -std::numeric_limits<typename core::numpy_expr_to_ndarray<E>::type::dtype>::max();
                   else if(nt2::is_nan(v))
                       out.at(i) = 0;
                   else out.at(i) = v;
               }
               return out;
           }

       PROXY(pythonic::numpy, nan_to_num);
       template<class E>
           long nanargmin(E&& expr) {
               long sz = expr.size();
               if(not sz) 
                   throw __builtin__::ValueError("empty sequence");
               long i = 0;
               auto e_i = expr.at(i);
               for(; i< sz and nt2::is_nan(e_i) ; ++i) {
                   e_i = expr.at(i);
               }
               if( i < sz) {
                   auto res = e_i;
                   auto index = i;
                   for(; i< sz ; ++i) {
                       auto e_i = expr.at(i);
                       if(e_i< res and not nt2::is_nan(e_i)) {
                           res = e_i;
                           index = i;
                       }
                   }
                   return index;
               }
               else
                   throw __builtin__::ValueError("empty sequence");
           }
       template<class E>
           long nanargmax(E&& expr) {
               long sz = expr.size();
               if(not sz) 
                   throw __builtin__::ValueError("empty sequence");
               long i = 0;
               auto e_i = expr.at(i);
               while(i< sz and nt2::is_nan(e_i)) {
                   e_i = expr.at(++i);
               }
               if( i < sz) {
                   auto res = e_i;
                   auto index = i;
                   for(; i< sz ; ++i) {
                       auto e_i = expr.at(i);
                       if(e_i > res and not nt2::is_nan(e_i)) {
                           res = e_i;
                           index = i;
                       }
                   }
                   return index;
               }
               else
                   throw __builtin__::ValueError("empty sequence");
           }
        PROXY(pythonic::numpy, nanargmax);

        PROXY(pythonic::numpy, nanargmin);

       template<class E>
           typename core::numpy_expr_to_ndarray<E>::type::dtype
           prod(E const& expr, none_type axis=None) {
               auto p = typename core::numpy_expr_to_ndarray<E>::type::dtype(1);
               for(long i=0, n = expr.size() ; i<n; ++i)
                   p *= expr.at(i);
               return p;
           }

       template<class T>
            T prod( core::ndarray<T,1> const& array, long axis)
            {
                if(axis!=0)
                    throw __builtin__::ValueError("axis out of bounds");
                return prod(array);
            }

       template<class T, size_t N>
            typename core::ndarray<T,N>::value_type prod( core::ndarray<T,N> const& array, long axis)
            {
                if(axis<0 || axis >=long(N))
                    throw __builtin__::ValueError("axis out of bounds");
                auto shape = array.shape;
                if(axis==0)
                {
                    return std::accumulate(array.begin() + 1, array.end(), *array.begin(), std::multiplies<typename core::ndarray<T,N>::value_type>());
                }
                else
                {
                    std::array<long, N-1> shp;
                    std::copy(shape.begin(), shape.end() - 1, shp.begin());
                    core::ndarray<T,N-1> prody(shp, None);
                    std::transform(array.begin(), array.end(), prody.begin(), [=](core::ndarray<T,N-1> const& other) {return prod(other, axis-1);});
                    return prody;
                }
            }

       ALIAS(prod, product);
       PROXY(pythonic::numpy, prod);
       PROXY(pythonic::numpy, product);

       template<class E>
           auto min(E&& expr) -> typename std::remove_reference<decltype(expr.at(0))>::type {
               long sz = expr.size();
               if(not sz) 
                   throw __builtin__::ValueError("empty sequence");
               auto res = expr.at(0);
               for(long i = 1; i< sz ; ++i) {
                   auto e_i = expr.at(i);
                   if(e_i< res)
                       res = e_i;
               }
               return res;
           }
       template<class E>
           auto max(E&& expr) -> typename std::remove_reference<decltype(expr.at(0))>::type {
               long sz = expr.size();
               if(not sz) 
                   throw __builtin__::ValueError("empty sequence");
               auto res = expr.at(0);
               for(long i = 1; i< sz ; ++i) {
                   auto e_i = expr.at(i);
                   if(e_i > res)
                       res = e_i;
               }
               return res;
           }

       template<class T>
           T min(core::ndarray<T,1> const& array, long axis) {
               if(axis!=0)
                   throw __builtin__::ValueError("axis out of bounds");
               return min(array);
           }

       template<class T>
           T max(core::ndarray<T,1> const& array, long axis) {
               if(axis!=0)
                   throw __builtin__::ValueError("axis out of bounds");
               return max(array);
           }

       template<class T, size_t N>
           typename core::ndarray<T,N>::value_type min(core::ndarray<T,N> const& array, long axis)
           {
               if(axis<0 || axis >=long(N))
                   throw __builtin__::ValueError("axis out of bounds");
               auto shape = array.shape;
               if(axis==0)
               {
                   std::array<long, N-1> shp;
                   size_t size = 1;
                   for(auto i= shape.begin() + 1, j = shp.begin(); i<shape.end(); ++i, ++j)
                        size*=(*j = *i);
                   core::ndarray<T,N-1> a(shp, None);
                   auto a_iter = a.buffer;
                   std::copy(array.buffer, array.buffer + size, a_iter);
                   for(auto i = array.begin() + 1; i<array.end(); ++i)
                   {
                       auto next_subarray = *i;  //we need this variable to keep this ndarray alive while iter is used
                       auto iter = next_subarray.buffer,
                            iter_end = next_subarray.buffer + next_subarray.size();
                       auto k = a_iter;
                       for(auto j = iter; j<iter_end; ++j, ++k)
                           *k=std::min(*k,*j);
                    }
                    return a;
               }
               else
               {
                   std::array<long, N-1> shp;
                   std::copy(shape.begin(), shape.end() - 1, shp.begin());
                   core::ndarray<T,N-1> miny(shp, None);
                   std::transform(array.begin(), array.end(), miny.begin(), [=](core::ndarray<T,N-1> const& other) {return min(other, axis-1);});
                   return miny;
               }
           }
       template<class T, size_t N>
           typename core::ndarray<T,N>::value_type max(core::ndarray<T,N> const& array, long axis)
           {
               if(axis<0 || axis >=long(N))
                   throw __builtin__::ValueError("axis out of bounds");
               auto shape = array.shape;
               if(axis==0)
               {
                   std::array<long, N-1> shp;
                   size_t size = 1;
                   for(auto i= shape.begin() + 1, j = shp.begin(); i<shape.end(); ++i, ++j)
                        size*=(*j = *i);
                   core::ndarray<T,N-1> a(shp, None);
                   auto a_iter = a.buffer;
                   std::copy(array.buffer, array.buffer + size, a_iter);
                   for(auto i = array.begin() + 1; i<array.end(); ++i)
                   {
                       auto next_subarray = *i;  //we need this variable to keep this ndarray alive while iter is used
                       auto iter = next_subarray.buffer,
                            iter_end = next_subarray.buffer + next_subarray.size();
                       auto k = a_iter;
                       for(auto j = iter; j<iter_end; ++j, ++k)
                           *k=std::max(*k,*j);
                    }
                    return a;
               }
               else
               {
                   std::array<long, N-1> shp;
                   std::copy(shape.begin(), shape.end() - 1, shp.begin());
                   core::ndarray<T,N-1> miny(shp, None);
                   std::transform(array.begin(), array.end(), miny.begin(), [=](core::ndarray<T,N-1> const& other) {return max(other, axis-1);});
                   return miny;
               }
           }


       PROXY(pythonic::numpy, min);
       PROXY(pythonic::numpy, max);

       template<class E>
           auto nanmin(E&& expr) -> typename std::remove_reference<decltype(expr.at(0))>::type {
               long n = expr.size();
               if(not n) 
                   throw __builtin__::ValueError("empty sequence");
               long i = 0;
               auto e_i = expr.at(i);
               while( nt2::is_nan(e_i) and i < n )
                   e_i = expr.at(++i);
               if(i == n) {
                   throw __builtin__::ValueError("nan sequence");
               }
               else {
                   auto res = e_i;
                   for(; i< n ; ++i) {
                       auto e_i = expr.at(i);
                       if(e_i< res and not nt2::is_nan(e_i))
                           res = e_i;
                   }
                   return res;
               }
           }

       template<class E>
           auto nanmax(E&& expr) -> typename std::remove_reference<decltype(expr.at(0))>::type {
               long n = expr.size();
               if(not n) 
                   throw __builtin__::ValueError("empty sequence");
               long i = 0;
               auto e_i = expr.at(i);
               while( nt2::is_nan(e_i) and i < n )
                   e_i = expr.at(++i);
               if(i == n) {
                   throw __builtin__::ValueError("nan sequence");
               }
               else {
                   auto res = e_i;
                   for(; i< n ; ++i) {
                       auto e_i = expr.at(i);
                       if(e_i > res and not nt2::is_nan(e_i))
                           res = e_i;
                   }
                   return res;
               }
           }

       PROXY(pythonic::numpy, nanmin);
       PROXY(pythonic::numpy, nanmax);

       template<class E>
           bool all(E&& expr) {
               long sz = expr.size();
               for(long i=0;i < sz ; ++i)
                   if( not expr.at(i) )
                       return false;
               return true;
           }

       template<class T>
            T all( core::ndarray<T,1> const& array, long axis)
            {
                if(axis!=0)
                    throw __builtin__::ValueError("axis out of bounds");
                return all(array);
            }

       template<class T, size_t N>
            typename core::ndarray<T,N>::value_type all( core::ndarray<T,N> const& array, long axis)
            {
                if(axis<0 || axis >=long(N))
                    throw __builtin__::ValueError("axis out of bounds");
                auto shape = array.shape;
                if(axis==0)
                {
                    std::array<long, N-1> shp;
                    size_t size = 1;
                    for(auto i= shape.begin() + 1, j = shp.begin(); i<shape.end(); ++i, ++j)
                        size*=(*j = *i);
                    core::ndarray<T,N-1> a(shp, None);
                    auto a_iter = a.buffer;
                    std::copy(array.buffer, array.buffer + size, a_iter);
                    for(auto i = array.begin() + 1; i<array.end(); ++i)
                    {
                        auto next_subarray = *i;  //we need this variable to keep this ndarray alive while iter is used
                        auto iter = next_subarray.buffer,
                             iter_end = next_subarray.buffer + next_subarray.size();
                        auto k = a_iter;
                        for(auto j = iter; j<iter_end; ++j, ++k)
                            *k=*k and *j;
                    }
                    return a;
                }
                else
                {
                    std::array<long, N-1> shp;
                    std::copy(shape.begin(), shape.end() - 1, shp.begin());
                    core::ndarray<T,N-1> ally(shp, None);
                    std::transform(array.begin(), array.end(), ally.begin(), [=](core::ndarray<T,N-1> const& other) {return all(other, axis-1);});
                    return ally;
                }
            }

        PROXY(pythonic::numpy, all);

        template<class U, class V>
            bool allclose(U&& u, V&& v, double rtol=1e-5, double atol=1e-8) {
                long u_s = u.size(),
                     v_s = v.size();
                if( u_s == v_s ) {
                    for(long i=0;i < u_s; ++i) {
                        auto v_i = v.at(i);
                        auto u_i = u.at(i);
                        if( nt2::is_nan(v_i) ||
                            nt2::is_nan(u_i) ||
                            std::abs(u.at(i)-v_i) > (atol + rtol * std::abs(v_i)))
                            return false;
                    }
                    return true;
                }
                return false;
            }

        PROXY(pythonic::numpy, allclose);

        template<class... Types>
            auto alltrue(Types&&... types) -> decltype(all(std::forward<Types>(types)...)) {
                return all(std::forward<Types>(types)...);
            }

        PROXY(pythonic::numpy, alltrue);

        template<class... Types>
            auto amax(Types&&... types) -> decltype(max(std::forward<Types>(types)...)) {
                return max(std::forward<Types>(types)...);
            }

        PROXY(pythonic::numpy, amax);

        template<class... Types>
            auto amin(Types&&... types) -> decltype(min(std::forward<Types>(types)...)) {
                return min(std::forward<Types>(types)...);
            }

        PROXY(pythonic::numpy, amin);

       template<class E>
           bool any(E&& expr) {
               long sz = expr.size();
               for(long i=0;i < sz ; ++i)
                   if( expr.at(i) )
                       return true;
               return false;
           }

       template<class T>
            T any( core::ndarray<T,1> const& array, long axis)
            {
                if(axis!=0)
                    throw __builtin__::ValueError("axis out of bounds");
                return any(array);
            }

       template<class T, size_t N>
            typename core::ndarray<T,N>::value_type any( core::ndarray<T,N> const& array, long axis)
            {
                if(axis<0 || axis >=long(N))
                    throw __builtin__::ValueError("axis out of bounds");
                auto shape = array.shape;
                if(axis==0)
                {
                    std::array<long, N-1> shp;
                    size_t size = 1;
                    for(auto i= shape.begin() + 1, j = shp.begin(); i<shape.end(); ++i, ++j)
                        size*=(*j = *i);
                    core::ndarray<T,N-1> a(shp, None);
                    auto a_iter = a.buffer;
                    std::copy(array.buffer, array.buffer + size, a_iter);
                    for(auto i = array.begin() + 1; i<array.end(); ++i)
                    {
                        auto next_subarray = *i;  //we need this variable to keep this ndarray alive while iter is used
                        auto iter = next_subarray.buffer,
                             iter_end = next_subarray.buffer + next_subarray.size();
                        auto k = a_iter;
                        for(auto j = iter; j<iter_end; ++j, ++k)
                            *k=*k or *j;
                    }
                    return a;
                }
                else
                {
                    std::array<long, N-1> shp;
                    std::copy(shape.begin(), shape.end() - 1, shp.begin());
                    core::ndarray<T,N-1> ally(shp, None);
                    std::transform(array.begin(), array.end(), ally.begin(), [=](core::ndarray<T,N-1> const& other) {return any(other, axis-1);});
                    return ally;
                }
            }

        PROXY(pythonic::numpy, any);
        ALIAS(any, sometrue);
        PROXY(pythonic::numpy, sometrue);


        template<class T, unsigned long N, class... C>
            core::ndarray<T,N> _transpose(core::ndarray<T,N> const & a, long const l[N])
            {
                auto shape = a.shape;
                std::array<long, N> shp;
                for(unsigned long i=0; i<N; ++i)
                    shp[i] = shape[l[i]];

                core::ndarray<T,N> new_array(shp, None);

                std::array<long, N> new_strides;
                new_strides[N-1] = 1;
                std::transform(new_strides.rbegin(), new_strides.rend() -1, shp.rbegin(), new_strides.rbegin() + 1, std::multiplies<long>());

                std::array<long, N> old_strides;
                old_strides[N-1] = 1;
                std::transform(old_strides.rbegin(), old_strides.rend() -1, shape.rbegin(), old_strides.rbegin() + 1, std::multiplies<long>());

                auto iter = a.buffer,
                     iter_end = a.buffer + a.size();
                for(long i=0; iter!=iter_end; ++iter, ++i) {
                    long offset = 0;
                    for(unsigned long s=0; s<N; s++)
                        offset += ((i/old_strides[l[s]]) % shape[l[s]])*new_strides[s];
                    new_array.buffer[offset] = *iter;
                }

                return new_array;
            }

        template<class T, size_t N>
            core::ndarray<T,N> transpose(core::ndarray<T,N> const & a)
            {
                long t[N];
                for(unsigned long i = 0; i<N; ++i)
                    t[N-1-i] = i;
                return _transpose(a, t);
            }
        template<class T, size_t N, size_t M>
            core::ndarray<T,N> transpose(core::ndarray<T,N> const & a, std::array<long, M> const& t)
            {
                static_assert(N==M, "axes don't match array");

                long val = t[M-1];
                if(val>=long(N))
                    throw __builtin__::ValueError("invalid axis for this array");
                return _transpose(a, &t[0]);
            }

        NUMPY_EXPR_TO_NDARRAY0(transpose);
        PROXY(pythonic::numpy, transpose);

#define NP_PROXY(name)\
        using nt2::name;\
        using pythonic::core::name;\
        PROXY(pythonic::numpy, name)
#define NP_PROXY_ALIAS(name, alias)\
        ALIAS(alias, name)\
        using pythonic::core::name;\
        PROXY(pythonic::numpy, name)
#define NP_PROXY_OP(name)\
        using pythonic::numpy_expr::ops::name;\
        using pythonic::core::name;\
        PROXY(pythonic::numpy, name)

        NP_PROXY(abs);

        NP_PROXY_ALIAS(absolute, nt2::abs);

        NP_PROXY_OP(add);

        NP_PROXY_ALIAS(angle_in_deg, pythonic::numpy_expr::ops::angle_in_deg);

        NP_PROXY_ALIAS(angle_in_rad, pythonic::numpy_expr::ops::angle_in_rad);

        template<class T>
            auto angle(T const& t, bool in_deg) -> decltype(typename core::numpy_expr_to_ndarray<T>::type(angle_in_rad(typename core::numpy_expr_to_ndarray<T>::type(t)))) {
                if(in_deg)
                    return typename core::numpy_expr_to_ndarray<T>::type(angle_in_deg(typename core::numpy_expr_to_ndarray<T>::type(t)));
                else
                    return typename core::numpy_expr_to_ndarray<T>::type(angle_in_rad(typename core::numpy_expr_to_ndarray<T>::type(t)));
            }
        template<class T>
            auto angle(T const& t) -> typename std::enable_if<not core::is_numpy_expr<T>::value,decltype(angle(t,false))>::type {
                    return angle(t,false);
            }
        PROXY(pythonic::numpy, angle);

        template<class T, size_t N, class F>
            core::ndarray<
                typename std::remove_cv<
                    typename std::remove_reference<
                        decltype(
                                std::declval<T>()
                                +
                                std::declval<typename nested_container_value_type<F>::type>())
                        >::type
                    >::type,
                1> append(core::ndarray<T,N> const& nto, F const& data) {
                    typename core::numpy_expr_to_ndarray<F>::type ndata(data);
                    long nsize = nto.size() + ndata.size();
                    core::ndarray<
                        typename std::remove_cv<
                            typename std::remove_reference<
                                decltype(
                                        std::declval<T>()
                                        +
                                        std::declval<typename nested_container_value_type<F>::type>())
                                >::type
                            >::type,
                        1> out(core::make_tuple(nsize), None);
                    size_t i=0;
                    for(i=0;i<nto.size();++i)
                        out.at(i) = nto.at(i);
                    for(size_t j=0;j<ndata.size();++j)
                        out.at(i+j) = ndata.at(j);
                    return out;
                }
        template<class T, class F>
            core::ndarray<
                typename std::remove_cv<
                    typename std::remove_reference<
                        decltype(
                                std::declval<typename nested_container_value_type<core::list<T>>::type>()
                                +
                                std::declval<typename nested_container_value_type<F>::type>())
                        >::type
                    >::type,
                1> append(core::list<T> const& to, F const& data) {
                    return append(typename core::numpy_expr_to_ndarray<core::list<T>>::type(to), data);
                }

        PROXY(pythonic::numpy, append);

       template<class E>
           long argmin(E&& expr) {
               long sz = expr.size();
               if(not sz) 
                   throw __builtin__::ValueError("empty sequence");
               auto res = expr.at(0);
               long index = 0;
               for(long i = 1; i< sz ; ++i) {
                   auto e_i = expr.at(i);
                   if(e_i< res) {
                       res = e_i;
                       index = i;
                   }
               }
               return index;
           }
       template<class E>
           long argmax(E&& expr) {
               long sz = expr.size();
               if(not sz) 
                   throw __builtin__::ValueError("empty sequence");
               auto res = expr.at(0);
               long index = 0;
               for(long i = 1; i< sz ; ++i) {
                   auto e_i = expr.at(i);
                   if(e_i > res) {
                       res = e_i;
                       index = i;
                   }
               }
               return index;
           }
        PROXY(pythonic::numpy, argmax);

        PROXY(pythonic::numpy, argmin);

        template<class T, size_t N>
            core::ndarray<long, N> argsort(core::ndarray<T,N> const& a) {
                size_t last_axis = a.shape[N-1];
                size_t n = a.size();
                core::ndarray<long, N> indices(a.shape, None);
                for(long j=0, * iter_indices = indices.buffer, *end_indices = indices.buffer + n;
                        iter_indices != end_indices;
                        iter_indices += last_axis, j+=last_axis)
                {
                    // fill with the original indices
                    std::iota(iter_indices, iter_indices + last_axis, 0L);
                    // sort the index using the value from a
                    std::sort(iter_indices, iter_indices + last_axis,
                            [&a,j](long i1, long i2) {return a.at(j+i1) < a.at(j+i2);});
                }
                return indices;
            }

        PROXY(pythonic::numpy, argsort);

        template<class E>
            typename core::ndarray<long, 2>
            argwhere(E const& expr) {
                typedef typename core::ndarray<long, 2> out_type;
                constexpr long N = core::numpy_expr_to_ndarray<E>::N;
                long sz = expr.size();
                auto eshape = expr.shape;
                long *buffer = new long[N * sz]; // too much memory used
                long *buffer_iter = buffer;
                long real_sz = 0;
                for(long i=0; i< sz; ++i) {
                    if(expr.at(i)) {
                        ++real_sz;
                        long mult = 1;
                        for(long j=N-1; j>0; j--) {
                            buffer_iter[j] = (i/mult)%eshape[j];
                            mult*=eshape[j];
                        }
                        buffer_iter[0] = i/mult;
                        buffer_iter+=N;
                    }
                }
                long shape[2] = { real_sz, N };
                return out_type(buffer, shape);
            }

        PROXY(pythonic::numpy, argwhere);

        template<class E>
            auto around(E const& a, long decimals=0) -> decltype(pythonic::core::rint(a * std::pow(typename core::numpy_expr_to_ndarray<E>::type::dtype(10),decimals)) / std::pow(typename core::numpy_expr_to_ndarray<E>::type::dtype(10), decimals)) {
                return pythonic::core::rint(a * std::pow(typename core::numpy_expr_to_ndarray<E>::type::dtype(10),decimals)) / std::pow(typename core::numpy_expr_to_ndarray<E>::type::dtype(10), decimals);
            }
        template<class T>
            typename core::numpy_expr_to_ndarray<core::list<T>>::type around(core::list<T> const& l, long decimals=0) {
                return around(typename core::numpy_expr_to_ndarray<core::list<T>>::type(l), decimals);
            }

        PROXY(pythonic::numpy, around);

        template<class T, size_t N>
            core::string array2string(core::ndarray<T,N> const& a) {
                std::ostringstream oss;
                oss << a;
                return core::string(oss.str());
            }

        PROXY(pythonic::numpy, array2string);

        template<class U, class V>
            typename std::enable_if<has_shape<U>::value and has_shape<V>::value,bool>::type array_equal(U const& u, V const&v) {
                if(u.shape == v.shape) {
                    long n = u.size();
                    for(long i=0;i<n;++i)
                        if(u.at(i) != v.at(i))
                            return false;
                    return true;
                }
                return false;
            }
        template<class U, class V>
            typename std::enable_if<has_shape<V>::value,bool>::type array_equal(core::list<U> const& u, V const&v) {
                return array_equal(typename core::numpy_expr_to_ndarray<core::list<U>>::type(u), v);
            }
        template<class U, class V>
            typename std::enable_if<has_shape<U>::value,bool>::type array_equal(U const& u, core::list<V> const&v) {
                return array_equal(u, typename core::numpy_expr_to_ndarray<core::list<V>>::type(v));
            }
        template<class U, class V>
            bool array_equal(core::list<U> const& u, core::list<V> const&v) {
                return array_equal(typename core::numpy_expr_to_ndarray<core::list<U>>::type(u), typename core::numpy_expr_to_ndarray<core::list<V>>::type(v));
            }

        PROXY(pythonic::numpy, array_equal);

        template<class U, class V>
            typename std::enable_if<has_shape<U>::value and has_shape<V>::value,bool>::type array_equiv(U const& u, V const&v) {
                if(u.shape == v.shape) {
                    return array_equal(u,v);
                }
                else if(u.size() > v.size()) {
                    return array_equiv(v,u);
                }
                else if(v.size()%u.size() ==0) {
                    long vs = v.size(),
                         us = u.size();
                    for(long vi = 0; vi < vs ; ) {
                        for(long ui=0;ui<us; ++ui,++vi)
                            if(u.at(ui) != v.at(vi))
                                return false;
                    }
                    return true;
                }
                return false;
            }
        template<class U, class V>
            typename std::enable_if<has_shape<V>::value,bool>::type array_equiv(core::list<U> const& u, V const&v) {
                return array_equiv(typename core::numpy_expr_to_ndarray<core::list<U>>::type(u), v);
            }
        template<class U, class V>
            typename std::enable_if<has_shape<U>::value,bool>::type array_equiv(U const& u, core::list<V> const&v) {
                return array_equiv(u, typename core::numpy_expr_to_ndarray<core::list<V>>::type(v));
            }
        template<class U, class V>
            bool array_equiv(core::list<U> const& u, core::list<V> const&v) {
                return array_equiv(typename core::numpy_expr_to_ndarray<core::list<U>>::type(u), typename core::numpy_expr_to_ndarray<core::list<V>>::type(v));
            }

        PROXY(pythonic::numpy, array_equiv);

        template<class T, size_t N>
            core::list<core::ndarray<T,N>> array_split(core::ndarray<T,N> const& a, long nb_split) {
                long sz = std::distance(a.begin(), a.end());
                long n = (sz + nb_split -1 ) / nb_split;
                long end = n * nb_split;
                long nb_full_split = nb_split;
                if(end != sz) nb_full_split -= (end -sz);
                core::list<core::ndarray<T,N>> out(nb_split);
                long index = 0;
                for(long i=0;i<nb_full_split; ++i, index+=n) 
                    out[i] = a[core::slice(index, index+n)];
                for(long i=nb_full_split;i<nb_split; ++i, index+=(n-1)) 
                    out[i] = a[core::slice(index, index + n - 1)];

                return out;
            }

        template<class T, size_t N, class I>
            typename std::enable_if<is_iterable<I>::value, core::list<core::ndarray<T,N>>>::type
            array_split(core::ndarray<T,N> const& a, I const& split_mask) {
                long sz = std::distance(a.begin(), a.end());
                core::list<core::ndarray<T,N>> out(1+split_mask.size());
                long index = 0;
                auto inserter = out.begin();
                for(auto next_index: split_mask) {
                    *inserter++ = a[core::slice(index, next_index)];
                    index = next_index;
                }
                *inserter = a[core::slice(index, sz)];
                return out;
            }

        PROXY(pythonic::numpy, array_split);
        template<class T, size_t N>
            core::string array_str(core::ndarray<T,N> const& a) {
                std::ostringstream oss;
                oss << a;
                return core::string(oss.str());
            }

        PROXY(pythonic::numpy, array_str);

        template<class E>
            struct _asarray {
                template<class... Types>
                    auto operator()(Types&&... args) -> decltype(array(std::forward<Types>(args)...)) {
                        return array(std::forward<Types>(args)...);
                    }
            };
        template<class T, size_t N>
            struct _asarray<core::ndarray<T,N>> {
                template<class F>
                    core::ndarray<T,N> operator()(F&& a) {
                        return a;
                    }
            };

        template<class E, class... Types>
            auto asarray(E&& e, Types&&... args) -> decltype(_asarray<typename std::remove_cv<typename std::remove_reference<E>::type>::type>()(std::forward<E>(e), std::forward<Types>(args)...)) {
                return _asarray<typename std::remove_cv<typename std::remove_reference<E>::type>::type>()(std::forward<E>(e), std::forward<Types>(args)...);
            }

        PROXY(pythonic::numpy, asarray);

        template<class... Types>
            auto asarray_chkfinite(Types&&... args) -> decltype(asarray(std::forward<Types>(args)...)) {
                auto out = asarray(std::forward<Types>(args)...);
                for(auto iter = out.buffer, end = out.buffer + out.size();
                        iter != end;
                        ++iter)
                {
                    if(not std::isfinite(*iter))
                        throw __builtin__::ValueError("array must not contain infs or NaNs");
                }
                return out;
            }

        PROXY(pythonic::numpy, asarray_chkfinite);

        ALIAS(asarray, ascontiguousarray)
        PROXY(pythonic::numpy, ascontiguousarray);

        template<class E>
            auto asscalar(E const& expr) -> decltype(expr.at(0)) {
                if(expr.size() != 1)
                    throw __builtin__::ValueError("can only convert an array  of size 1 to a Python scalar");
                return expr.at(0);
            }

        PROXY(pythonic::numpy, asscalar);

        template<class T>
            typename std::enable_if<std::is_scalar<T>::value or is_complex<T>::value, core::ndarray<T,1>>::type
            atleast_1d(T t) {
                return core::ndarray<T,1>(core::make_tuple(1L), t);
            }
        template<class T>
            auto atleast_1d(T const& t) -> typename std::enable_if< not(std::is_scalar<T>::value or is_complex<T>::value), typename core::numpy_expr_to_ndarray<T>::type > ::type {
                return asarray(t);
            }

        PROXY(pythonic::numpy, atleast_1d);

        template<class T>
            typename std::enable_if<std::is_scalar<T>::value or is_complex<T>::value, core::ndarray<T,2>>::type
            atleast_2d(T t) {
                return core::ndarray<T,2>(core::make_tuple(1L,1L), t);
            }
        template<class T>
            auto atleast_2d(T const& t)
            -> typename std::enable_if<
                    not(std::is_scalar<T>::value or is_complex<T>::value) and core::numpy_expr_to_ndarray<T>::type::value < 2,
                    core::ndarray<typename core::numpy_expr_to_ndarray<T>::type::dtype,2>
                > ::type
            {
                auto r = asarray(t);
                return r.reshape(core::make_tuple(1L, r.shape[0]));
            }

        template<class T>
            auto atleast_2d(T const& t)
            -> typename std::enable_if<
                    not(std::is_scalar<T>::value or is_complex<T>::value) and core::numpy_expr_to_ndarray<T>::type::value >= 2,
                    decltype(asarray(t))
               > ::type
            {
                return asarray(t);
            }

        PROXY(pythonic::numpy, atleast_2d);

        template<class T>
            typename std::enable_if<std::is_scalar<T>::value or is_complex<T>::value, core::ndarray<T,3>>::type
            atleast_3d(T t) {
                return core::ndarray<T,3>(core::make_tuple(1L,1L,1L), t);
            }
        template<class T>
            auto atleast_3d(T const& t)
            -> typename std::enable_if<
                    not(std::is_scalar<T>::value or is_complex<T>::value) and (core::numpy_expr_to_ndarray<T>::type::value < 3),
                    core::ndarray<typename core::numpy_expr_to_ndarray<T>::type::dtype, 3>
                > ::type
            {
                auto r = asarray(t);
                auto shape = r.shape;
                if(shape.size() == 1)
                    return r.reshape(core::make_tuple(1L, shape[0],1L));
                else
                    return r.reshape(core::make_tuple(shape[0], shape[1], 1L));
            }

        template<class T>
            auto atleast_3d(T const& t)
            -> typename std::enable_if<
                    not(std::is_scalar<T>::value or is_complex<T>::value) and core::numpy_expr_to_ndarray<T>::type::value >= 3,
                    decltype(asarray(t))
               > ::type
            {
                return asarray(t);
            }

        PROXY(pythonic::numpy, atleast_3d);

        template<class E>
            auto average(E const & expr, none_type const& axis=None) -> decltype(sum(expr, axis)/1.) {
                return sum(expr, axis) / double(expr.size());
            }

        template<class E>
            auto average(E const & expr, long axis) -> decltype(sum(expr, axis)/1.) {
                auto shape = expr.shape;
                return sum(expr, axis) / double(shape[axis]);
            }

        template<class E, class W>
            auto average(E const & expr, none_type const& axis, W const& weights) -> decltype(average(expr * asarray(weights) / average(asarray(weights)))) {
                auto aweights = asarray(weights);
                auto weighted_expr = expr * aweights / average(aweights) ;
                return average(weighted_expr) ;
            }

        PROXY(pythonic::numpy, average);

        namespace {

            char *int2bin(long a, char *buffer, int buf_size) {
                buffer += (buf_size - 1);
                buffer[1]=0;
                for (int i = 0; i < buf_size; ++i) {
                    *buffer-- = (a & 1) + '0';
                    a >>= 1;
                }
                return buffer;
            }

            char* itoa( int value, char* result, int base ) {
                // check that the base if valid
                if (base < 2 || base > 16) { *result = 0; return result; }

                char* out = result;
                int quotient = abs(value);

                do {
                    const int tmp = quotient / base;
                    *out = "0123456789ABCDEF"[ quotient - (tmp*base) ];
                    ++out;
                    quotient = tmp;
                } while ( quotient );

                // Apply negative sign
                if ( value < 0) *out++ = '-';

                std::reverse( result, out );
                *out = 0;
                return result;
            }

        }

        core::string base_repr(long number, long base=2, long padding=0) {
            char * mem = new char[sizeof(number)*8+1 + padding];
            std::fill(mem, mem+padding, '0');
            itoa(number, mem + padding, base);
            auto res = core::string(mem);
            delete [] mem;
            return res;
        }

        PROXY(pythonic::numpy, base_repr);

        core::string binary_repr(long number, none_type width=None) {
            return base_repr(number,2);
        }

        core::string binary_repr(long number, long width) {
            core::string out = binary_repr(std::abs(number));
            if(number>=0) {
                return base_repr(std::abs(number), 2, width - out.size());
            }
            else {
                char * mem = new char[width+1];
                int2bin(number, mem, width);
                auto res = core::string(mem);
                delete [] mem;
                return res;
            }
        }

        PROXY(pythonic::numpy, binary_repr);

        template<class T, size_t N>
            core::ndarray<long,1> bincount(core::ndarray<T,N> const & expr, none_type weights=None, none<long> minlength = None) {
                long length = 0;
                if(minlength) length = (long)minlength;
                length = std::max(length, 1 + max(expr));
                core::ndarray<long, 1> out( core::make_tuple(length), 0L);
                long n = expr.size();
                for(long i=0; i < n; ++i)
                    ++out[ expr.at(i) ];
                return out;
            }

        template<class T, size_t N, class E>
            core::ndarray<decltype(std::declval<long>()*std::declval<E>().at(0)),1> bincount(core::ndarray<T,N> const & expr, E const& weights, none<long> minlength = None) {
                long length = 0;
                if(minlength) length = (long)minlength;
                length = std::max(length, 1 + max(expr));
                core::ndarray<decltype(std::declval<long>()*std::declval<E>().at(0)), 1> out( core::make_tuple(length), 0L);
                long n = expr.size();
                for(long i=0; i < n; ++i)
                    out[ expr.at(i) ] += weights.at(i);
                return out;
            }

        PROXY(pythonic::numpy, bincount);

        template<class E, class Mi, class Ma>
            typename core::numpy_expr_to_ndarray<E>::type clip(E const& e, Mi a_min, Ma a_max) {
                typename core::numpy_expr_to_ndarray<E>::type out(e.shape, None);
                auto out_iter = out.buffer;
                for(long i=0, n=e.size(); i<n; ++i) {
                    auto v = e.at(i);
                    if(v<a_min) v=a_min;
                    else if(v>a_max) v = a_max;
                    *out_iter++ = v;
                }
                return out;
            }

        PROXY(pythonic::numpy, clip);

        template<class T, size_t N, size_t M>
            core::ndarray<T,N> concatenate(std::array<core::ndarray<T,N>, M> const & ai) {
                long n = 1;
                long shape[N];
                shape[0] = 0L;
                for(auto const& a : ai) {
                    shape[0] += a.shape[0];
                    n *= a.size();
                }
                std::copy(ai[0].shape.begin() +1 , ai[0].shape.end(), &shape[1]);

                T* buffer = new T[n];
                T* iter = buffer;
                for(auto const& a : ai) 
                    iter = std::copy(a.buffer, a.buffer + a.size(), iter);

                return core::ndarray<T,N>(buffer, shape);
            }
        PROXY(pythonic::numpy, concatenate);

        template<class T, size_t N>
            core::ndarray<T,N> copy(core::ndarray<T,N> const& a) {
                auto res = core::ndarray<T,N>(a.shape, None);
                std::copy(a.buffer, a.buffer + a.size(), res.buffer);
                return res;
            }
        NUMPY_EXPR_TO_NDARRAY0(copy);
        PROXY(pythonic::numpy, copy);

        template<class T, size_t N, class dtype=T>
            core::ndarray<dtype,1> cumprod(core::ndarray<T,N> const& expr, dtype d = dtype()) {
                long count = expr.size();
                core::ndarray<dtype,1> cumprody(core::make_tuple(count), None);
                std::partial_sum(expr.buffer, expr.buffer + count, cumprody.buffer, std::multiplies<T>());
                return cumprody;
            }

        template<class T, class dtype=T>
            core::ndarray<dtype,1> cumprod(core::ndarray<T,1> const& expr, long axis, dtype d = dtype()) {
                if(axis !=0)
                    throw __builtin__::ValueError("axis out of bounds");
                return cumprod(expr);
            }

        template<class T, size_t N, class dtype=T>
            core::ndarray<dtype,N> cumprod(core::ndarray<T,N> const& expr, long axis, dtype d = dtype()) {
                if(axis<0 || axis >=long(N))
                    throw __builtin__::ValueError("axis out of bounds");

                auto shape = expr.shape;
                core::ndarray<dtype,N> cumprody(shape, None);
                if(axis==0) {
                    std::copy(expr.buffer, expr.buffer + shape[N-1], cumprody.buffer);
                    std::transform(cumprody.begin(), cumprody.end()-1, expr.begin() + 1, cumprody.begin() + 1, std::multiplies<core::ndarray<T,N-1>>());
                }
                else {
                    std::transform(expr.begin(), expr.end(), cumprody.begin(), [=](core::ndarray<T,N-1> const& e) { return cumprod(e, axis-1, d); });
                }
                return cumprody;
            }

        ALIAS(cumprod, cumproduct)
        PROXY(pythonic::numpy, cumproduct);
        PROXY(pythonic::numpy, cumprod);

        template<class T, size_t N>
            core::ndarray<T,1> delete_(core::ndarray<T,N> const& a, long index, none_type axis=None) {
                core::ndarray<T,1> out(core::make_tuple(long(a.size())-1), None);
                long n = a.size();
                index = std::min(n, index);
                std::copy(a.buffer + index + 1 , a.buffer + n, std::copy(a.buffer, a.buffer + index, out.buffer));
                return out;
            }

        template<class T, size_t N, class I>
            typename std::enable_if<!std::is_scalar<I>::value, core::ndarray<T,1>>::type
            delete_(core::ndarray<T,N> const& in, I const& indices, none_type axis=None) {
                core::ndarray<T,1> out(core::make_tuple(long(in.size())-indices.size()), None);
                auto out_iter = out.buffer;
                auto in_iter = in.buffer;
                for(long index : indices) {
                    out_iter = std::copy(in_iter, in.buffer + index, out_iter);
                    in_iter = in.buffer + index + 1;
                }
                std::copy(in_iter, in.buffer + in.size(), out_iter);
                return out;
            }

        PROXY(pythonic::numpy, delete_);

        template<class T>
            core::ndarray<T,1> diag(core::ndarray<T,2> a, long k=0) {
                T* buffer = new T[std::max(a.shape[0], a.shape[1])];
                long shape[1] = {0};
                auto iter = buffer;
                if(k>=0)
                    for(int i=0, j = k; i< a.shape[0] and j < a.shape[1]; ++i, ++j, ++shape[0])
                        *iter++ = a[i][j];
                else {
                    for(int i=-k, j = 0; i< a.shape[0] and j < a.shape[1]; ++i, ++j, ++shape[0])
                        *iter++ = a[i][j];
                }
                return core::ndarray<T,1>(buffer, shape);
            }

        template<class T>
            core::ndarray<T,2> diag(core::ndarray<T,1> a, long k=0) {
                long n = a.size() + std::abs(k);
                core::ndarray<T,2> out(core::make_tuple(n,n), 0);
                if(k>=0)
                    for(long i=0,j =k ; i< n and j<n ;++i,++j)
                        out[i][j] = a.buffer[i];
                else
                    for(long i=-k,j =0 ; i< n and j<n ;++i,++j)
                        out[i][j] = a.buffer[j];
                return out;
            }

        NUMPY_EXPR_TO_NDARRAY0(diag);
        PROXY(pythonic::numpy, diag);

        ALIAS(diag, diagflat);
        PROXY(pythonic::numpy, diagflat);

        ALIAS(diag, diagonal);
        PROXY(pythonic::numpy, diagonal);

        template<class E>
            typename core::numpy_expr_to_ndarray<E>::type
            diff(E const& expr, long n=1) {
                decltype(expr.shape) shape(expr.shape);
                --shape[core::numpy_expr_to_ndarray<E>::N-1];

                typename core::numpy_expr_to_ndarray<E>::type out(shape, None);
                auto slice = expr.shape[core::numpy_expr_to_ndarray<E>::N-1];
                long j = 0;
                for(long i = 0, n = expr.size(); i< n ; i+=slice) {
                    auto prev = expr.at(i);
                    for(long k = 1; k< slice ; ++k) {
                        auto nprev = expr.at(i+k);
                        out.at(j++) = nprev - prev;
                        prev = nprev;
                    }
                }
                if(n==1) return out;
                else return diff(out, n-1); // TODO: inplace modification to avoid n-1 allocations
            }

        PROXY(pythonic::numpy, diff);

        template<class E, class F>
            core::ndarray< long, 1 >
            digitize(E const& expr, F const& b) {
                auto bins = asarray(b);
                bool is_increasing = bins.size() > 1 and bins.at(0) < bins.at(1);
                core::ndarray<long, 1> out(core::make_tuple(long(expr.size())), None);
                if(is_increasing) {
                    for(long i = 0, n = expr.size(); i< n; ++i)
                        out.at(i) = std::lower_bound(bins.begin(), bins.end(), expr.at(i)) - bins.begin();
                }
                else {
                    for(long i = 0, n = expr.size(); i< n; ++i)
                        out.at(i) = std::lower_bound(bins.begin(), bins.end(), expr.at(i), operator_::proxy::gt()) - bins.begin();
                }
                return out;
            }

        PROXY(pythonic::numpy, digitize);

        template<class E, class F>
            typename std::enable_if<
                (std::is_scalar<E>::value or is_complex<E>::value) and (std::is_scalar<F>::value or is_complex<F>::value),
                decltype(std::declval<E>()*std::declval<F>())
            >::type
            dot(E const& e, F const& f)
            {
                return e*f;
            }

        template<class E, class F>
            typename std::enable_if<
                core::is_array<E>::value and core::is_array<F>::value and
                core::numpy_expr_to_ndarray<E>::N == 1 and core::numpy_expr_to_ndarray<F>::N ==1,
                decltype(std::declval<typename core::numpy_expr_to_ndarray<E>::type::dtype>()*std::declval<typename core::numpy_expr_to_ndarray<F>::type::dtype>())
            >::type
            dot(E const& e, F const& f) {
                return sum(e*f);
            }

        template<class E, class F>
            typename std::enable_if<
                (std::is_scalar<E>::value or is_complex<E>::value) and (std::is_scalar<E>::value or is_complex<E>::value),
                decltype(std::declval<E>()*std::declval<F>())
            >::type
            dot(core::list<E> const& e, core::list<F> const& f) {
                return dot(asarray(e), asarray(f));
            }

        template<class E, class F>
            typename std::enable_if<
                (std::is_scalar<E>::value or is_complex<E>::value) and
                core::is_array<F>::value and core::numpy_expr_to_ndarray<F>::N ==1,
                decltype(std::declval<E>()*std::declval<typename core::numpy_expr_to_ndarray<F>::type::dtype>())
            >::type
            dot(F const& f,core::list<E> const& e) {
                return dot(f, asarray(e));
            }

        template<class E, class F>
            typename std::enable_if<
                (std::is_scalar<E>::value or is_complex<E>::value) and
                core::is_array<F>::value and core::numpy_expr_to_ndarray<F>::N ==1,
                decltype(std::declval<E>()*std::declval<typename core::numpy_expr_to_ndarray<F>::type::dtype>())
            >::type
            dot(core::list<E> const& e, F const& f) {
                return dot(asarray(e), f);
            }

        PROXY(pythonic::numpy, dot);

        template<class E>
            core::ndarray<typename core::numpy_expr_to_ndarray<E>::type::dtype, 1>
            ediff1d(E const& expr)
            {
                long n = expr.size() -1 ;
                core::ndarray<typename core::numpy_expr_to_ndarray<E>::type::dtype, 1> out(core::make_tuple(n), None);
                auto prev = expr.at(0);
                for(long i=0; i< n ; ++i) {
                    auto next = expr.at(i+1);
                    out.at(i) = next - prev;
                    prev = next;
                }
                return out;
            }

        template<class E>
            auto ediff1d(core::list<E> const & expr) -> decltype(ediff1d(asarray(expr))) {
                return ediff1d(asarray(expr));
            }

        PROXY(pythonic::numpy, ediff1d);

        template<class dtype = double>
            core::ndarray<dtype, 2> eye(long N, long M=-1, long k=0, dtype d=dtype())
            {
                if(M<0) M = N;
                core::ndarray<dtype, 2> out = zeros(core::make_tuple(N, M), d);
                if(k>=0)
                    for(int i=0, j = k; i< N and j < M; ++i, ++j)
                        out[i][j] = dtype(1);
                else
                    for(int i=-k, j = 0; i< N and j < M; ++i, ++j)
                        out[i][j] = dtype(1);
                return out;
            }

        PROXY(pythonic::numpy, eye);

        template<class dtype=double>
            core::finfo<dtype> finfo(dtype d=dtype()) {
                return core::finfo<dtype>();
            }
        PROXY(pythonic::numpy, finfo)


            template<class E>
            core::ndarray<long, 1> flatnonzero(E const& expr) {
                long n = expr.size();
                long *buffer = new long[n];
                long *iter = buffer;
                for(long i=0;i<n;++i) 
                    if(expr.at(i))
                        *iter++ = i;
                long shape[1] = { iter - buffer };
                return core::ndarray<long, 1>(buffer, shape);
            }
        template<class E>
            auto flatnonzero(core::list<E> const & l)
            -> decltype(flatnonzero(asarray(l)))
            {
                return flatnonzero(asarray(l));
            }

        PROXY(pythonic::numpy, flatnonzero);

        template<class T, size_t N>
            core::ndarray<T,N> fliplr(core::ndarray<T,N> const& a) {
                static_assert(N>=2, "fliplr only works on array of dimension >= 2");
                core::ndarray<T,N> out(a.shape, None);
                std::copy(a.buffer, a.buffer + a.size(), out.buffer);
                for(auto col : out)
                    std::reverse(col.begin(), col.end());
                return out;
            }

        PROXY(pythonic::numpy, fliplr); // does not return a view...

        template<class T, size_t N>
            core::ndarray<T,N> flipud(core::ndarray<T,N> const& a) {
                core::ndarray<T,N> out(a.shape, None);
                std::reverse_copy(a.begin(), a.end(), out.begin());
                return out;
            }
        PROXY(pythonic::numpy, flipud);

        template<class F, size_t N, class dtype>
            struct fromfunction_helper;

        template<class F, class dtype>
            struct fromfunction_helper<F,1,dtype> {
                core::ndarray<typename std::remove_cv<typename std::remove_reference<decltype(std::declval<F>()(dtype()))>::type>::type, 1>
                    operator()(F&& f, std::array<long,1> const& shape, dtype d = dtype()) {
                        core::ndarray<typename std::remove_cv<typename std::remove_reference<decltype(f(dtype()))>::type>::type, 1> out(shape, None);
                        for(dtype i=0, n= out.shape[0]; i<n; ++i)
                            out[i] = f(i);
                        return out;
                    }
            };

        template<class F, class dtype>
            struct fromfunction_helper<F,2,dtype> {
                core::ndarray<typename std::remove_cv<typename std::remove_reference<decltype(std::declval<F>()(dtype(), dtype()))>::type>::type, 2>
                    operator()(F&& f, std::array<long,2> const& shape, dtype d = dtype()) {
                        core::ndarray<typename std::remove_cv<typename std::remove_reference<decltype(f(dtype(), dtype()))>::type>::type, 2> out(shape, None);
                        for(dtype i=0, n= out.shape[0]; i<n; ++i)
                            for(dtype j=0, m= out.shape[1]; j<m; ++j)
                                out[i][j] = f(i,j);
                        return out;
                    }
            };


        template<class F, size_t N, class dtype=double>
            auto fromfunction(F&& f, std::array<long, N> const& shape, dtype d = dtype())
            -> decltype(fromfunction_helper<F, N, dtype>()(std::forward<F>(f), shape)) {
                return fromfunction_helper<F, N, dtype>()(std::forward<F>(f), shape);
            }

        /* must specialize for higher order */
        PROXY(pythonic::numpy, fromfunction);

        template<class Iterable, class dtype=double>
            core::ndarray<typename std::remove_cv<typename std::remove_reference<Iterable>::type>::type::value_type, 1>
            fromiter(Iterable&& iterable, dtype d=dtype(), long count = -1)
            {
                typedef typename std::remove_cv<typename std::remove_reference<Iterable>::type>::type::value_type T;
                if(count < 0) {
                    core::list<T> buffer(0);
                    std::copy(iterable.begin(), iterable.end(), std::back_inserter(buffer));
                    return core::ndarray<T,1>(buffer);
                }
                else {
                    T* buffer = new T[count];
                    std::copy_n(iterable.begin(), count, buffer);
                    long shape [1] = { count };
                    return core::ndarray<T,1>(buffer, shape);
                }
            }
        PROXY(pythonic::numpy, fromiter);

        template<class dtype=double>
            core::ndarray<dtype,1> fromstring(core::string const& string, dtype d=dtype(), long count = -1 , core::string const& sep = "") {
                if(sep) {
                    core::list<dtype> res(0);
                    if(count<0) count = std::numeric_limits<long>::max();
                    else res.reserve(count);
                    size_t current;
                    size_t next = -1;
                    long numsplit = 0;
                    do {
                        current = next + 1;
                        next = string.find_first_of( sep, current );
                        dtype item;
                        std::istringstream iss(string.substr( current, next - current ));
                        iss >> item;
                        res.push_back(item);
                    }
                    while (next != core::string::npos && ++numsplit<count);
                    return core::ndarray<dtype, 1>(res);
                }
                else {
                    if(count <0) count = string.size();
                    long shape[1] = { count };
                    dtype* buffer = new dtype[shape[0]];
                    dtype const* tstring = reinterpret_cast<dtype const*>(string.data());
                    std::copy(tstring, tstring + shape[0], buffer);
                    return core::ndarray<dtype,1>(buffer, shape);
                }
            }

        PROXY(pythonic::numpy, fromstring);

        template<class dtype=double>
            auto identity(long n, dtype d=dtype()) -> decltype(eye(n,n,0,d)) {
                return eye(n,n,0,d);
            }
        PROXY(pythonic::numpy, identity);

        template<size_t N, class dtype=long>
            core::ndarray<dtype, N+1> indices(std::array<long, N> const& shape, dtype d = dtype()) {
                std::array<long, N+1> oshape;
                oshape[0] = N ;
                std::copy(shape.begin(), shape.end(), oshape.begin() + 1);
                core::ndarray<dtype, N+1> out(oshape, None);
                dtype* iters[N];
                for(size_t n=0; n<N; ++n) 
                    iters[n]=out[n].buffer;
                size_t lens[N];
                lens[0] = out.size() / shape[0];
                for(size_t n=1; n<N; ++n) 
                    lens[n] = lens[n-1] / shape[n];
                for(long i=0, n=out.size()/N; i<n;++i) {
                        long mult = 1;
                        for(long n=N-1; n>0; n--) {
                            *(iters[n]++) = (i/mult)%shape[n];
                            mult *= shape[n];
                        }
                        *(iters[0]++) = i/mult;
                }
                return out;
            }

        PROXY(pythonic::numpy, indices);

        ALIAS(dot, inner); // only for scalar and 1D case
        PROXY(pythonic::numpy, inner);

        template<class T, size_t N, class I, class F>
            typename std::enable_if<is_iterable<I>::value and is_iterable<F>::value, core::ndarray<T,1>>::type
            insert(core::ndarray<T,N> const& in, I const& indices, F const& data, none_type axis=None)
            {
                core::ndarray<T,1> out(core::make_tuple(long(in.size()+std::min(indices.size(), data.size()))), None);
                auto out_iter = out.buffer;
                auto in_iter = in.buffer;
                auto data_iter = data.begin();
                for(long index : indices) {
                    out_iter = std::copy(in_iter, in.buffer + index, out_iter);
                    *out_iter++ = *data_iter++;
                    in_iter = in.buffer + index;
                }
                std::copy(in_iter, in.buffer + in.size(), out_iter);
                return out;
            }
        template<class T, size_t N, class I, class F>
            typename std::enable_if<is_iterable<I>::value and not is_iterable<F>::value, core::ndarray<T,1>>::type
            insert(core::ndarray<T,N> const& in, I const& indices, F const& data, none_type axis=None)
            {
                return insert(in, indices, core::list<F>({data}), axis);
            }
        template<class T, size_t N, class I, class F>
            typename std::enable_if<not is_iterable<I>::value and is_iterable<F>::value, core::ndarray<T,1>>::type
            insert(core::ndarray<T,N> const& in, I const& indices, F const& data, none_type axis=None)
            {
                return insert(in, core::list<I>({indices}), {data}, axis);
            }
        template<class T, size_t N, class I, class F>
            typename std::enable_if<not is_iterable<I>::value and not is_iterable<F>::value, core::ndarray<T,1>>::type
            insert(core::ndarray<T,N> const& in, I const& indices, F const& data, none_type axis=None)
            {
                return insert(in, core::list<I>({indices}), core::list<F>({data}), axis);
            }

        PROXY(pythonic::numpy, insert);

        template<class E, class F>
            core::ndarray<
                decltype(std::declval<typename core::numpy_expr_to_ndarray<E>::type::dtype>()
                        +
                        std::declval<typename core::numpy_expr_to_ndarray<F>::type::dtype>()),
                1>
                    intersect1d(E const& e, F const& f)
                    {
                        typedef decltype(std::declval<typename core::numpy_expr_to_ndarray<E>::type::dtype>()
                                +
                                std::declval<typename core::numpy_expr_to_ndarray<F>::type::dtype>()) T;
                        auto ae = asarray(e);
                        auto af = asarray(f);
                        std::set<T> sae(ae.buffer, ae.buffer + ae.size());
                        std::set<T> found;
                        core::list<T> lout(0);
                        lout.reserve(sae.size());
                        for(long i=0, n = af.size(); i<n; ++i)
                            if(sae.find(af.at(i)) != sae.end() and found.find(af.at(i)) == found.end()) {
                                found.insert(af.at(i));
                                lout.push_back(af.at(i));
                            }
                        std::sort(lout.begin(), lout.end());
                        return core::ndarray<T, 1> (lout);
                    }

        PROXY(pythonic::numpy, intersect1d);

        template<class E>
            typename std::enable_if<
                    is_complex<typename core::numpy_expr_to_ndarray<E>::type::dtype>::value,
                    core::ndarray<bool, core::numpy_expr_to_ndarray<E>::N>
                        >::type
            iscomplex(E const& expr) {
                core::ndarray<bool, core::numpy_expr_to_ndarray<E>::N> out(expr.shape, None);
                for(long i=0, n=expr.size(); i<n; ++i)
                    out.at(i) = expr.at(i).imag();
                return out;
            }

        template<class E>
            typename std::enable_if<
                    not is_complex<typename core::numpy_expr_to_ndarray<E>::type::dtype>::value,
                    core::ndarray<bool, core::numpy_expr_to_ndarray<E>::N>
                        >::type
            iscomplex(E const& expr) {
                return core::ndarray<bool, core::numpy_expr_to_ndarray<E>::N>(expr.shape, false); 
            }

        PROXY(pythonic::numpy, iscomplex);

        template<class E>
            typename std::enable_if<
                    is_complex<typename core::numpy_expr_to_ndarray<E>::type::dtype>::value,
                    core::ndarray<bool, core::numpy_expr_to_ndarray<E>::N>
                        >::type
            isreal(E const& expr) {
                core::ndarray<bool, core::numpy_expr_to_ndarray<E>::N> out(expr.shape, None);
                for(long i=0, n=expr.size(); i<n; ++i)
                    out.at(i) = not expr.at(i).imag();
                return out;
            }

        template<class E>
            typename std::enable_if<
                    not is_complex<typename core::numpy_expr_to_ndarray<E>::type::dtype>::value,
                    core::ndarray<bool, core::numpy_expr_to_ndarray<E>::N>
                        >::type
            isreal(E const& expr) {
                return core::ndarray<bool, core::numpy_expr_to_ndarray<E>::N>(expr.shape, true); 
            }

        PROXY(pythonic::numpy, isreal);

        template<class E>
            constexpr bool isrealobj(E const& expr) {
                return not is_complex<typename core::numpy_expr_to_ndarray<E>::type::dtype>::value ;
            }

        PROXY(pythonic::numpy, isrealobj);

        template<class E>
            constexpr bool isscalar(E const&) {
                return std::is_scalar<E>::value or is_complex<E>::value or std::is_same<E, core::string>::value;
            }
        PROXY(pythonic::numpy, isscalar);

        template<class E>
            constexpr bool issctype(E const& expr) {
                return isscalar(expr); // types are represented as an instance of the type...
            }
        PROXY(pythonic::numpy, issctype);

        template<class K>
            struct lexcmp {
                K const& keys;
                lexcmp(K const& keys) : keys(keys) {
                }
                bool operator()(long i0, long i1) {
                    for(long i= keys.size() -1; i>=0; --i)
                        if(keys[i][i0] < keys[i][i1]) return true;
                        else if(keys[i][i0] > keys[i][i1]) return false;
                    return false;
                }
            };

        template<class T, size_t N>
            core::ndarray<long, 1> lexsort(std::array<T, N> const& keys) {
                long n = keys[0].size();
                core::ndarray<long, 1> out(core::make_tuple(n), None);
                // fill with the original indices
                std::iota(out.buffer, out.buffer + n, 0L);
                // then sort using keys as the comparator
                std::sort(out.buffer, out.buffer +n, lexcmp<std::array<T, N>>(keys));
                return out;
            }
        PROXY(pythonic::numpy, lexsort)

            template<class E>
            auto nonzero(E const& expr) -> std::array<core::ndarray<long,1>, core::numpy_expr_to_ndarray<E>::N>
            {
                constexpr long N = core::numpy_expr_to_ndarray<E>::N;
                typedef std::array<core::ndarray<long,1>, N> out_type;
                long sz = expr.size();
                auto eshape = expr.shape;
                long *buffer = new long[N * sz]; // too much memory used
                long *buffer_iter = buffer;
                long real_sz = 0;
                for(long i=0; i< sz; ++i) {
                    if(expr.at(i)) {
                        ++real_sz;
                        long mult = 1;
                        for(long j=N-1; j>0; j--) {
                            buffer_iter[j] = (i/mult)%eshape[j];
                            mult*=eshape[j];
                        }
                        buffer_iter[0] = i/mult;
                        buffer_iter+=N;
                    }
                }
                out_type out;
                std::array<long, 1> shape{{real_sz}};
                for(long i=0; i<N; ++i)
                {
                    out[i] = core::ndarray<long, 1>(shape, None);
                    for(long j=0; j<real_sz; ++j)
                        out[i][j] = buffer[j * N + i];
                }
                return out;
            }

        PROXY(pythonic::numpy, nonzero)

        /* cas : array_like, array_like, array_like */
        template<class E, class X, class Y>
            typename std::enable_if<core::is_array_like<typename std::remove_reference<typename std::remove_cv<E>::type>::type>::value && 
            core::is_array_like<typename std::remove_reference<typename std::remove_cv<X>::type>::type>::value &&
            core::is_array_like<typename std::remove_reference<typename std::remove_cv<Y>::type>::type>::value,
            core::ndarray<decltype(std::declval<typename nested_container_value_type<typename std::remove_reference<typename std::remove_cv<X>::type>::type, !core::is_array_like<typename std::remove_reference<typename std::remove_cv<X>::type>::type>::value>::type>() + std::declval<typename nested_container_value_type<typename std::remove_reference<typename std::remove_cv<Y>::type>::type, !core::is_array_like<typename std::remove_reference<typename std::remove_cv<Y>::type>::type>::value>::type>()), nested_container_depth<typename std::conditional<core::is_array_like<typename std::remove_reference<typename std::remove_cv<E>::type>::type>::value, typename std::remove_reference<typename std::remove_cv<E>::type>::type, bool>::type>::value>
                >::type where(E const& e, X const& x, Y const& y) {
                    auto condition = asarray(e);
                    auto x_ = asarray(x);
                    auto y_ = asarray(y);
                    core::ndarray<decltype(std::declval<typename nested_container_value_type<typename std::remove_reference<typename std::remove_cv<X>::type>::type>::type>() + std::declval<typename nested_container_value_type<typename std::remove_reference<typename std::remove_cv<Y>::type>::type>::type>()), nested_container_depth<typename std::remove_reference<typename std::remove_cv<E>::type>::type>::value> out(condition.shape, None);
                    auto out_iter = out.buffer;
                    for(long i=0, n=condition.size(); i<n; ++i) {
                        if(condition.at(i))
                            *out_iter++ = x_.at(i);
                        else
                            *out_iter++ = y_.at(i);
                    }
                    return out;
                }

        /* cas : array_like, value, array_like */
        template<class E, class X, class Y>
            typename std::enable_if<core::is_array_like<typename std::remove_reference<typename std::remove_cv<E>::type>::type>::value && 
            !core::is_array_like<typename std::remove_reference<typename std::remove_cv<X>::type>::type>::value &&
            core::is_array_like<typename std::remove_reference<typename std::remove_cv<Y>::type>::type>::value,
            core::ndarray<decltype(std::declval<X>() + std::declval<typename nested_container_value_type<typename std::remove_reference<typename std::remove_cv<Y>::type>::type, !core::is_array_like<typename std::remove_reference<typename std::remove_cv<Y>::type>::type>::value>::type>()), nested_container_depth<typename std::conditional<core::is_array_like<typename std::remove_reference<typename std::remove_cv<E>::type>::type>::value, typename std::remove_reference<typename std::remove_cv<E>::type>::type, bool>::type>::value>
                >::type where(E const& e, X const& x, Y const& y) {
                    auto condition = asarray(e);
                    auto y_ = asarray(y);
                    core::ndarray<decltype(std::declval<X>() + std::declval<typename nested_container_value_type<typename std::remove_reference<typename std::remove_cv<Y>::type>::type>::type>()), nested_container_depth<typename std::remove_reference<typename std::remove_cv<E>::type>::type>::value> out(condition.shape, None);
                    auto out_iter = out.buffer;
                    for(long i=0, n=condition.size(); i<n; ++i) {
                        if(condition.at(i))
                            *out_iter++ = x;
                        else
                            *out_iter++ = y_.at(i);
                    }
                    return out;
                }

        /* cas : array_like, array_like, value */
        template<class E, class X, class Y>
            typename std::enable_if<core::is_array_like<typename std::remove_reference<typename std::remove_cv<E>::type>::type>::value && 
            core::is_array_like<typename std::remove_reference<typename std::remove_cv<X>::type>::type>::value &&
            !core::is_array_like<typename std::remove_reference<typename std::remove_cv<Y>::type>::type>::value,
            core::ndarray<decltype(std::declval<Y>() + std::declval<typename nested_container_value_type<typename std::remove_reference<typename std::remove_cv<X>::type>::type, !core::is_array_like<typename std::remove_reference<typename std::remove_cv<X>::type>::type>::value>::type>()), nested_container_depth<typename std::conditional<core::is_array_like<typename std::remove_reference<typename std::remove_cv<E>::type>::type>::value, typename std::remove_reference<typename std::remove_cv<E>::type>::type, bool>::type>::value>
                >::type where(E const& e, X const& x, Y const& y) {
                    auto condition = asarray(e);
                    auto x_ = asarray(x);
                    core::ndarray<decltype(std::declval<Y>() + std::declval<typename nested_container_value_type<typename std::remove_reference<typename std::remove_cv<X>::type>::type>::type>()), nested_container_depth<typename std::remove_reference<typename std::remove_cv<E>::type>::type>::value> out(condition.shape, None);
                    auto out_iter = out.buffer;
                    for(long i=0, n=condition.size(); i<n; ++i) {
                        if(condition.at(i))
                            *out_iter++ = x_.at(i);
                        else
                            *out_iter++ = y;
                    }
                    return out;
                }

        /* cas : array_like, value, value */
        template<class E, class X, class Y>
            typename std::enable_if<core::is_array_like<typename std::remove_reference<typename std::remove_cv<E>::type>::type>::value && 
            !core::is_array_like<typename std::remove_reference<typename std::remove_cv<X>::type>::type>::value &&
            !core::is_array_like<typename std::remove_reference<typename std::remove_cv<Y>::type>::type>::value,
            core::ndarray<decltype(std::declval<Y>() + std::declval<X>()), nested_container_depth<typename std::conditional<core::is_array_like<typename std::remove_reference<typename std::remove_cv<E>::type>::type>::value, typename std::remove_reference<typename std::remove_cv<E>::type>::type, bool>::type>::value>
                >::type where(E const& e, X const& x, Y const& y) {
                    auto condition = asarray(e);
                    core::ndarray<decltype(std::declval<Y>() + std::declval<X>()), nested_container_depth<typename std::remove_reference<typename std::remove_cv<E>::type>::type>::value> out(condition.shape, None);
                    auto out_iter = out.buffer;
                    for(long i=0, n=condition.size(); i<n; ++i) {
                        if(condition.at(i))
                            *out_iter++ = x;
                        else
                            *out_iter++ = y;
                    }
                    return out;
                }

        /* cas : value, value, array_like */
        template<class E, class X, class Y>
            typename std::enable_if<!core::is_array_like<typename std::remove_reference<typename std::remove_cv<E>::type>::type>::value && 
            !core::is_array_like<typename std::remove_reference<typename std::remove_cv<X>::type>::type>::value &&
            core::is_array_like<typename std::remove_reference<typename std::remove_cv<Y>::type>::type>::value,
            core::ndarray<decltype(std::declval<X>() + std::declval<typename nested_container_value_type<typename std::remove_reference<typename std::remove_cv<Y>::type>::type, !core::is_array_like<typename std::remove_reference<typename std::remove_cv<Y>::type>::type>::value>::type>()), nested_container_depth<typename std::conditional<core::is_array_like<typename std::remove_reference<typename std::remove_cv<Y>::type>::type>::value, typename std::remove_reference<typename std::remove_cv<Y>::type>::type, bool>::type>::value>
                >::type where(E const& e, X const& x, Y const& y) {
                    auto y_ = asarray(y);
                    if(e)
                        return core::ndarray<decltype(std::declval<X>() + std::declval<typename nested_container_value_type<typename std::remove_reference<typename std::remove_cv<Y>::type>::type>::type>()), nested_container_depth<typename std::remove_reference<typename std::remove_cv<Y>::type>::type>::value>(y_.shape, x);
                    else
                        return copy(y_);
                }

        /* cas : value, array_like, value */
        template<class E, class X, class Y>
            typename std::enable_if<!core::is_array_like<typename std::remove_reference<typename std::remove_cv<E>::type>::type>::value && 
            core::is_array_like<typename std::remove_reference<typename std::remove_cv<X>::type>::type>::value &&
            !core::is_array_like<typename std::remove_reference<typename std::remove_cv<Y>::type>::type>::value,
            core::ndarray<decltype(std::declval<Y>() + std::declval<typename nested_container_value_type<typename std::remove_reference<typename std::remove_cv<X>::type>::type, !core::is_array_like<typename std::remove_reference<typename std::remove_cv<X>::type>::type>::value>::type>()), nested_container_depth<typename std::conditional<core::is_array_like<typename std::remove_reference<typename std::remove_cv<X>::type>::type>::value, typename std::remove_reference<typename std::remove_cv<X>::type>::type, bool>::type>::value>
                >::type where(E const& e, X const& x, Y const& y) {
                    auto x_ = asarray(x);
                    if(e)
                        return copy(x_);
                    else
                        return core::ndarray<decltype(std::declval<Y>() + std::declval<typename nested_container_value_type<typename std::remove_reference<typename std::remove_cv<X>::type>::type>::type>()), nested_container_depth<typename std::remove_reference<typename std::remove_cv<X>::type>::type>::value>(x_.shape, y);
                }

        /* cas : value, array_like, array_like */
        template<class E, class X, class Y>
            typename std::enable_if<!core::is_array_like<typename std::remove_reference<typename std::remove_cv<E>::type>::type>::value && 
            core::is_array_like<typename std::remove_reference<typename std::remove_cv<X>::type>::type>::value &&
            core::is_array_like<typename std::remove_reference<typename std::remove_cv<Y>::type>::type>::value,
            core::ndarray<decltype(std::declval<typename nested_container_value_type<typename std::remove_reference<typename std::remove_cv<Y>::type>::type, !core::is_array_like<typename std::remove_reference<typename std::remove_cv<Y>::type>::type>::value>::type>() + std::declval<typename nested_container_value_type<typename std::remove_reference<typename std::remove_cv<X>::type>::type, !core::is_array_like<typename std::remove_reference<typename std::remove_cv<X>::type>::type>::value>::type>()), nested_container_depth<typename std::conditional<core::is_array_like<typename std::remove_reference<typename std::remove_cv<X>::type>::type>::value, typename std::remove_reference<typename std::remove_cv<X>::type>::type, bool>::type>::value>
                >::type where(E const& e, X const& x, Y const& y) {
                    if(e)
                        return copy(asarray(x));
                    else
                        return copy(asarray(y));
                }

        template<class E>
            auto where(E const& expr) -> std::array<core::ndarray<long,1>, core::numpy_expr_to_ndarray<E>::N>
            {
                return nonzero(expr);
            }

        PROXY(pythonic::numpy, where)

        template<class E>
            core::ndarray<double, core::numpy_expr_to_ndarray<E>::N> unwrap(E const& expr, double discont = pi)
            {
                discont = nt2::max(discont, pi);
                core::ndarray<double, core::numpy_expr_to_ndarray<E>::N> out(expr.shape, None);
                out.buffer[0] = expr.at(0);
                for(size_t i=1; i<out.size(); ++i)
                {
                    auto val = expr.at(i);
                    if(nt2::abs(out.buffer[i-1] - val) > discont)
                        out.buffer[i] = val + 2*pi * int((out.buffer[i-1] - val) / (discont));
                    else
                        out.buffer[i] = val;
                }
                return out;
            }

        PROXY(pythonic::numpy, unwrap)

        template<class E>
            core::ndarray<typename core::numpy_expr_to_ndarray<E>::T, 1> unique(E const& expr) {
                std::set<typename core::numpy_expr_to_ndarray<E>::T> res;
                for(size_t i=0; i<expr.size(); ++i)
                    res.insert(expr.at(i));
                return core::ndarray<typename core::numpy_expr_to_ndarray<E>::T, 1>(res);
            }

        template<class E>
            std::tuple<core::ndarray<typename core::numpy_expr_to_ndarray<E>::T, 1>, core::ndarray<long, 1>> unique(E const& expr, bool return_index) {
                std::set<typename core::numpy_expr_to_ndarray<E>::T> res;
                std::vector<long> return_index_res;
                for(size_t i=0; i<expr.size(); ++i)
                {
                    std::pair<typename std::set<typename core::numpy_expr_to_ndarray<E>::T>::iterator, bool> pair = res.insert(expr.at(i));
                    if(pair.second)
                        return_index_res.push_back(i);
                }
                return std::make_tuple(core::ndarray<typename core::numpy_expr_to_ndarray<E>::T, 1>(res), core::ndarray<long, 1>(return_index_res));
            }

        template<class E>
            std::tuple<core::ndarray<typename core::numpy_expr_to_ndarray<E>::T, 1>, core::ndarray<long, 1>, core::ndarray<long, 1>> unique(E const& expr, bool return_index, bool return_inverse) {
                std::set<typename core::numpy_expr_to_ndarray<E>::T> res;
                std::vector<long> return_index_res;
                core::ndarray<long, 1> return_inverse_res({{expr.size()}}, None);
                for(int i=0; i<expr.size(); ++i)
                {
                    auto pair = res.insert(expr.at(i));
                    return_inverse_res[i] = std::distance(res.begin(), pair.first);
                    if(pair.second)
                        return_index_res.push_back(i);
                }
                return std::make_tuple(core::ndarray<typename core::numpy_expr_to_ndarray<E>::T, 1>(res), core::ndarray<long, 1>(return_index_res), return_inverse_res);
            }

        PROXY(pythonic::numpy, unique)

        template<class E, class F>
            core::ndarray<decltype(std::declval<typename core::numpy_expr_to_ndarray<E>::T>() + std::declval<typename core::numpy_expr_to_ndarray<F>::T>()), 1> union1d(E const& e, F const& f)
            {
                std::set<decltype(std::declval<typename core::numpy_expr_to_ndarray<E>::T>() + std::declval<typename core::numpy_expr_to_ndarray<F>::T>())> res;
                for(size_t i=0; i<e.size(); ++i)
                    res.insert(e.at(i));
                for(size_t i=0; i<f.size(); ++i)
                    res.insert(f.at(i));
                return core::ndarray<decltype(std::declval<typename core::numpy_expr_to_ndarray<E>::T>() + std::declval<typename core::numpy_expr_to_ndarray<F>::T>()), 1>(res);
            }

        PROXY(pythonic::numpy, union1d)

        template<class T>
            core::ndarray<T,2> triu(core::ndarray<T,2> const& expr, int k = 0)
            {
                core::ndarray<T,2> out(expr.shape, None);
                for(int i=0; i<expr.shape[0]; ++i)
                    for(long j=0 ; j<expr.shape[1]; ++j)
                        if( j - i >= k)
                            out.buffer[i * expr.shape[1] + j] = expr.buffer[i * expr.shape[1] + j];
                        else
                            out.buffer[i * expr.shape[1] + j] = 0;
                return out;
                
            }
        NUMPY_EXPR_TO_NDARRAY0(triu)
        PROXY(pythonic::numpy, triu)

        template<class T>
            core::ndarray<T,2> tril(core::ndarray<T,2> const& expr, int k = 0)
            {
                core::ndarray<T,2> out(expr.shape, None);
                for(int i=0; i<expr.shape[0]; ++i)
                    for(long j=0 ; j<expr.shape[1]; ++j)
                        if( j - i <= k)
                            out.buffer[i * expr.shape[1] + j] = expr.buffer[i * expr.shape[1] + j];
                        else
                            out.buffer[i * expr.shape[1] + j] = 0;
                return out;
            }

        NUMPY_EXPR_TO_NDARRAY0(tril)
        PROXY(pythonic::numpy, tril)

        template<class T>
            core::ndarray<T,1> trim_zeros(core::ndarray<T,1> const& expr, core::string const& trim = "fb")
            {
                int begin = 0;
                int end = expr.size();
                if(trim.find("f") != std::string::npos)
                    begin = std::find_if(expr.buffer, expr.buffer + end, [](T i){return i!=0;}) - expr.buffer;
                if(trim.find("b") != std::string::npos)
                    while(*(expr.buffer + --end) != 0); // Ugly, ndarray miss real iterator
                core::ndarray<T,1> out({{end - begin}}, None);
                std::copy(expr.buffer + begin, expr.buffer + end, out.buffer);
                return out;
            }
        NUMPY_EXPR_TO_NDARRAY0(trim_zeros)
        PROXY(pythonic::numpy, trim_zeros)

        template<class dtype = double>
        core::ndarray<dtype, 2> tri(int N, int M=-1, int k=0, dtype d=dtype())
        {
            if(M==-1)
                M = N;
           core::ndarray<dtype, 2> out({{N, M}}, 0); 
            for(int i=0; i<N; ++i)
                for(long j=0 ; j<M; ++j)
                    if( j - i <= k)
                        out.buffer[i * M + j] = 1;
            return out;
        }

        PROXY(pythonic::numpy, tri)

        template<class T>
            T trace(core::ndarray<T,2> const& expr, int offset=0)
            {
                T res = 0;
                long size = std::min(expr.shape[0] + std::min(offset, 0), expr.shape[1] - std::max(0, offset));
                for(int i=0; i<size; ++i)
                    res += expr.buffer[i * expr.shape[1] + i + offset];
                return res;
            }
        NUMPY_EXPR_TO_NDARRAY0(trace)
        PROXY(pythonic::numpy, trace)

        template<class E>
            typename core::numpy_expr_to_ndarray<E>::type tile(E const& expr, int reps)
            {
                typename core::numpy_expr_to_ndarray<E>::type out({{expr.size() * reps}}, None);
                for(size_t i=0; i<expr.size(); ++i)
                    out.buffer[i] = expr.at(i);
                for(size_t i=expr.size(); i<expr.size() * reps; ++i)
                    out.buffer[i] = out.buffer[i%expr.size()];
                return out;
            }

        template<class E, size_t N>
            core::ndarray<typename core::numpy_expr_to_ndarray<E>::T, N> tile(E const& expr, std::array<long, N> const& reps)
            {
                std::array<long, N> shape;
                for(size_t i=0; i<N; ++i)
                    shape[N - i - 1] = reps[N - i - 1] * ((E::value > i)?expr.shape[i]:1);
                core::ndarray<typename core::numpy_expr_to_ndarray<E>::T, N> out(shape, None);
                for(size_t i=0; i<expr.size(); ++i)
                    out.buffer[i] = expr.at(i);
                for(size_t i=expr.size(); i<out.size(); ++i)
                    out.buffer[i] = out.buffer[i%expr.size()];
                return out;
            }

        PROXY(pythonic::numpy, tile);

        template<class T, size_t N>
            core::ndarray<T,N> swapaxes(core::ndarray<T,N> const & a, int axis1, int axis2)
            {
                long t[N];
                for(unsigned long i = 0; i<N; ++i)
                    t[i] = i;
                std::swap(t[axis1], t[axis2]);
                return _transpose(a, t);
            }

        NUMPY_EXPR_TO_NDARRAY0(swapaxes);
        PROXY(pythonic::numpy, swapaxes);

        template<class F, class T, size_t N>
            typename core::numpy_expr_to_ndarray<F>::type take(core::ndarray<T,N> const & expr, F const& indices)
            {
                typename core::numpy_expr_to_ndarray<F>::type out = asarray(indices);
                for(size_t i=0; i<out.size(); ++i)
                    out.buffer[i] = expr.at(out.buffer[i]);
                return out;
            }
        NUMPY_EXPR_TO_NDARRAY0(take);
        PROXY(pythonic::numpy, take);

        template<class T, size_t N>
            core::list<core::ndarray<T,N>> split(core::ndarray<T,N> const& a, long nb_split) {
                if(a.size()%nb_split != 0)
                    throw __builtin__::ValueError("array split does not result in an equal division");
                return  array_split(a, nb_split);
            }

        template<class T, size_t N, class I>
            typename std::enable_if<is_iterable<I>::value, core::list<core::ndarray<T,N>>>::type
            split(core::ndarray<T,N> const& a, I const& split_mask) {
                return array_split(a, split_mask);
            }

        PROXY(pythonic::numpy, split);

        bool comp_complex(proxy::complex const& i, proxy::complex const& j)
        {
            if(std::real(i) == std::real(j))
                return std::imag(i) < std::imag(j);
            else
                return std::real(i) < std::real(j);
        }

        template<class T, size_t N>
            core::ndarray<proxy::complex,N> sort_complex(core::ndarray<T,N> const& expr)
            {
                core::ndarray<proxy::complex,N> out(expr.shape, None);
                std::copy(expr.buffer, expr.buffer + expr.size(), out.buffer);
                for(int i=0; i<expr.size()/expr.shape[N-1]; ++i)
                    std::sort(out.buffer + i * expr.shape[N-1], out.buffer + (i+1) * expr.shape[N-1], comp_complex);
                return out;
            }

        NUMPY_EXPR_TO_NDARRAY0(sort_complex)
        PROXY(pythonic::numpy, sort_complex);

        template<class T, size_t N>
            core::ndarray<T,N> sort(core::ndarray<T,N> const& expr, long axis=-1)
            {
                while(axis<0)
                    axis += N;
                axis = axis%N;
                core::ndarray<T,N> out(expr.shape, None);
                std::copy(expr.buffer, expr.buffer + expr.size(), out.buffer);
                long step = std::accumulate(expr.shape.begin() + axis, expr.shape.end(), 1L, std::multiplies<long>());
                for(size_t i=0; i<expr.size()/expr.shape[axis]*step; i+=step)
                {
                    std::vector<T> to_sort;
                    T* iter = out.buffer + (i%expr.size() + i/expr.size());
                    while(iter!=out.buffer + (i%expr.size() + i/expr.size()) + step)
                    {
                        to_sort.push_back(*iter);
                        iter+=step/expr.shape[axis];
                    }
                    std::sort(to_sort.begin(), to_sort.end());
                    iter = out.buffer + (i%expr.size() + i/expr.size());
                    for(auto val: to_sort)
                    {
                        *iter = val;
                        iter += step/expr.shape[axis];
                    }
                }
                return out;
            }

        NUMPY_EXPR_TO_NDARRAY0(sort)
        PROXY(pythonic::numpy, sort);

        template<class T, size_t N, class U>
            core::ndarray<T,N> select(core::list<core::ndarray<U,N>> const& condlist, core::list<core::ndarray<T,N>> const& choicelist, T _default = 0)
            {
                core::ndarray<T,N> out(choicelist[0].shape, _default);
                for(size_t i=0; i<out.size(); ++i)
                {
                    for(int j=0; j<condlist.size(); ++j)
                    {
                        if(condlist[j].buffer[i])
                        {
                            out.buffer[i] = choicelist[j].buffer[i];
                            break;
                        }
                    }
                }
                return out;
            }

        template<class T, size_t N, class U>
            auto select(core::list<U> const& condlist, core::list<core::ndarray<T,N>> const& choicelist, T _default = 0) 
                -> decltype(select(std::declval<core::list<typename core::numpy_expr_to_ndarray<U>::type>>(), choicelist, _default))
            {
                core::list<typename core::numpy_expr_to_ndarray<U>::type> condition(condlist.size());
                for(int i=0; i<condlist.size(); ++i)
                    condition[i] = asarray(condlist[i]);
                return select(condition, choicelist);
            }

        template<class T, size_t N, class U>
            auto select(core::list<core::ndarray<T,N>> const& condlist, core::list<U> const& choicelist, T _default = 0) 
                -> decltype(select(condlist, std::declval<core::list<typename core::numpy_expr_to_ndarray<U>::type>>(), _default))
            {
                core::list<typename core::numpy_expr_to_ndarray<U>::type> choice(choicelist.size());
                for(int i=0; i<choicelist.size(); ++i)
                    choice[i] = asarray(choicelist[i]);
                return select(condlist, choice);
            }

        template<class V, class U>
            auto select(core::list<V> const& condlist, core::list<U> const& choicelist, typename core::numpy_expr_to_ndarray<U>::T _default = 0) 
                -> decltype(select(std::declval<core::list<typename core::numpy_expr_to_ndarray<V>::type>>(), std::declval<core::list<typename core::numpy_expr_to_ndarray<U>::type>>(), _default))
            {
                core::list<typename core::numpy_expr_to_ndarray<U>::type> choice(choicelist.size());
                core::list<typename core::numpy_expr_to_ndarray<V>::type> condition(condlist.size());
                for(int i=0; i<choicelist.size(); ++i)
                    choice[i] = asarray(choicelist[i]);
                for(int i=0; i<condlist.size(); ++i)
                    condition[i] = asarray(condlist[i]);
                return select(condition, choice);
            }

        NUMPY_EXPR_TO_NDARRAY0(select)
        PROXY(pythonic::numpy, select);

        template<class T, size_t N>
            core::ndarray<T,N> rot90(core::ndarray<T,N> const& expr, int k=1)
            {
                if(k%4==0)
                    return copy(expr);
                std::array<long, N> shape;
                std::copy(expr.shape.begin(), expr.shape.end(), shape.begin());
                if(k%4!=2)
                    std::swap(shape[0], shape[1]);
                core::ndarray<T,N> out(shape, None);
                if(k%4==1)
                {
                    for(int i=0; i<out.shape[1]; ++i)
                        for(int j=0; j<out.shape[0]; ++j)
                            out[out.shape[0]-1-j][i] = expr[i][j];
                }
                else if(k%4==2)
                {
                    for(int i=0; i<out.shape[1]; ++i)
                        for(int j=0; j<out.shape[0]; ++j)
                            out[out.shape[0]-1-j][out.shape[1]-1-i] = expr[j][i];
                }
                else
                {
                    for(int i=0; i<out.shape[1]; ++i)
                        for(int j=0; j<out.shape[0]; ++j)
                            out[j][out.shape[1]-1-i] = expr[i][j];
                }
                return out;
            }

        NUMPY_EXPR_TO_NDARRAY0(rot90)
        PROXY(pythonic::numpy, rot90);

        template<class E>
        size_t rank(E const& expr)
        {
            return E::value;
        }

        PROXY(pythonic::numpy, rank);

        template<class E, class T>
        typename std::enable_if<core::is_array_like<E>::value, core::ndarray<long, core::numpy_expr_to_ndarray<E>::N>>::type searchsorted(core::ndarray<T,1> const& a, E const& v, core::string side = "left")
        {
            core::ndarray<T,core::numpy_expr_to_ndarray<E>::N> to_search = asarray(v);
            core::ndarray<long, core::numpy_expr_to_ndarray<E>::N> out(to_search.shape, None);
            if(side[0]=='l')
            {
                for(int i=0; i<out.size(); ++i)
                    out.at(i) = std::lower_bound(a.buffer, a.buffer + a.size(), to_search.at(i)) - a.buffer;
            }
            else if(side[0]=='r')
            {
                for(int i=0; i<out.size(); ++i)
                    out.at(i) = std::upper_bound(a.buffer, a.buffer + a.size(), to_search.at(i)) - a.buffer;
            }
            else
               throw __builtin__::ValueError("'" + side + "' is an invalid value for keyword 'side'");
            return out;
        }

        template<class T>
        long searchsorted(core::ndarray<T,1> const& a, T const& v, core::string side = "left")
        {
            if(side[0]=='l')
            {
                return std::lower_bound(a.buffer, a.buffer + a.size(), v) - a.buffer;
            }
            else if(side[0]=='r')
            {
                return std::upper_bound(a.buffer, a.buffer + a.size(), v) - a.buffer;
            }
            else
               throw __builtin__::ValueError("'" + side + "' is an invalid value for keyword 'side'");
        }

        NUMPY_EXPR_TO_NDARRAY0(searchsorted)
        PROXY(pythonic::numpy, searchsorted);

        template<class T, size_t N>
        core::ndarray<T,N> roll(core::ndarray<T,N> const& expr, int shift)
        {
            while(shift<0) shift+=expr.size();
            shift %=expr.size();;
            core::ndarray<T,N> out(expr.shape, None);
            for(int i=shift; i<expr.size(); ++i)
                out.at(i) = expr.at(i - shift);
            for(int i=0; i<shift; ++i)
                out.at(i) = expr.at(i + expr.size() - shift);

            return out;
        }
        NUMPY_EXPR_TO_NDARRAY0(roll)
        PROXY(pythonic::numpy, roll);

        template<class T, size_t N>
            core::ndarray<T,N> rollaxis(core::ndarray<T,N> const & a, int axis, int start=0)
            {
                if(start>=axis)
                    return copy(a);
                long t[N];
                for(unsigned long i = 0; i<start; ++i)
                    t[i] = i;
                t[start] = axis;
                for(unsigned long i = start + 1; i<=axis; ++i)
                    t[i] = i-1;
                for(unsigned long i = axis + 1; i<N; ++i)
                    t[i] = i;
                return _transpose(a, t);
            }

        NUMPY_EXPR_TO_NDARRAY0(rollaxis);
        PROXY(pythonic::numpy, rollaxis);

        template<class T, size_t N>
        core::ndarray<T,1> resize(core::ndarray<T,N> const& expr, int new_shape)
        {
            core::ndarray<T,1> out({{new_shape}}, None);
            for(int i=0; i<new_shape; ++i)
                out.at(i) = expr.at(i % expr.size());
            return out;
        }

        template<class T, size_t N, size_t M>
        core::ndarray<T,M> resize(core::ndarray<T,N> const& expr, std::array<long, M> const& new_shape)
        {
            core::ndarray<T,M> out(new_shape, None);
            for(size_t i=0; i<out.size(); ++i)
                out.at(i) = expr.at(i % expr.size());
            return out;
        }

        NUMPY_EXPR_TO_NDARRAY0(resize);
        PROXY(pythonic::numpy, resize);

        template<class T, size_t N>
        core::ndarray<T,1> repeat(core::ndarray<T,N> const& expr, int repeats)
        {
            core::ndarray<T,1> out({{expr.size() * repeats}}, None);
            for(int i=0; i<out.size(); ++i)
                out.at(i) = expr.at(i / expr.size());
            return out;
        }

        NUMPY_EXPR_TO_NDARRAY0(repeat);
        PROXY(pythonic::numpy, repeat);

        template<class T, size_t N>
        core::ndarray<T,1> ravel(core::ndarray<T,N> const& expr)
        {
            return reshape(expr, (long)expr.size());
        }

        NUMPY_EXPR_TO_NDARRAY0(ravel);
        PROXY(pythonic::numpy, ravel);

        template<class T, size_t N, class E,class F>
        none_type putmask(core::ndarray<T,N> & expr, E const& mask, F const& values)
        {
            for(size_t i=0; i<expr.size(); ++i)
                if(mask.at(i))
                    expr.at(i) = values.at(i%values.size());
            return None;
        }
        PROXY(pythonic::numpy, putmask);

        template<class T, size_t N, class E,class F>
        none_type place(core::ndarray<T,N> & expr, E const& mask, F const& values)
        {
            int j=0;
            int first = -1;
            for(size_t i=0; i<expr.size(); ++i)
            {
                if(mask.at(i))
                {
                    if(first==-1)
                        first = i;
                    if(j<values.size())
                        expr.at(i) = values.at(j++);
                    else
                    {
                        expr.at(i) = expr.at(first);
                    }
                }
            }
            return None;
        }
        PROXY(pythonic::numpy, place);

        template<class F, class T, size_t N, class E>
            typename std::enable_if<core::is_array_like<F>::value, none_type>::type put(core::ndarray<T,N> & expr, F const& ind, E const& v)
            {
                for(size_t i=0; i<ind.size(); ++i)
                {
                    auto val = ind.at(i);
                    if(val>=expr.size() || val <0)
                        throw __builtin__::ValueError("indice out of bound");
                    expr.at(val) = v.at(i%v.size());
                }
                return None;
            }

        template<class T, size_t N>
            none_type put(core::ndarray<T,N> & expr, int ind, T const& v)
            {
                if(ind>=expr.size() || ind <0)
                    throw __builtin__::ValueError("indice out of bound");
                expr.at(ind) = v;
                return None;
            }

        NUMPY_EXPR_TO_NDARRAY0(put);
        PROXY(pythonic::numpy, put);

        template<class E>
        auto ptp(E const& expr, long axis) -> decltype(max(expr, axis) - min(expr, axis))
        {
            return max(expr, axis) - min(expr, axis);
        }

        template<class E>
        auto ptp(E const& expr) -> decltype(max(expr) - min(expr))
        {
            return max(expr) - min(expr);
        }

        PROXY(pythonic::numpy, ptp);

        template<class E, class F>
        core::ndarray<typename core::numpy_expr_to_ndarray<E>::T, 2> outer(E const& a, F const& b)
        {
            core::ndarray<typename core::numpy_expr_to_ndarray<E>::T, 2> out({{a.size(), b.size()}}, None);
            for(size_t i=0; i<a.size(); ++i)
                for(size_t j=0; j<b.size(); ++j)
                    out.buffer[i * b.size() + j] = a.at(i) * b.at(j);
            return out;
        }

        PROXY(pythonic::numpy, outer);

        template<class T>
        typename std::enable_if<std::is_scalar<T>::value, std::tuple<T,int>>::type
        frexp(T val) {
            int exp;
            T significand = std::frexp(val, &exp);
            return std::make_tuple(significand, exp);
        }
        template<class E>
            typename std::enable_if<
                not std::is_scalar<E>::value and not is_complex<E>::value,
                std::tuple<
                    core::ndarray<typename core::numpy_expr_to_ndarray<E>::type::dtype, core::numpy_expr_to_ndarray<E>::N>,
                    core::ndarray<int, core::numpy_expr_to_ndarray<E>::N>
                >
            >::type
            frexp(E const& arr) {
                core::ndarray<typename core::numpy_expr_to_ndarray<E>::type::dtype, core::numpy_expr_to_ndarray<E>::N> significands(arr.shape, None);
                core::ndarray<int, core::numpy_expr_to_ndarray<E>::N> exps(arr.shape, None);
                for(long i=0,n=arr.size(); i<n; ++i)
                    significands.buffer[i] = std::frexp(arr.at(i), exps.buffer + i);
            return std::make_tuple(significands, exps);
        }
        PROXY(pythonic::numpy, frexp);

        NP_PROXY_ALIAS(arccos, nt2::acos);

        NP_PROXY_ALIAS(arccosh, nt2::acosh);

        NP_PROXY_ALIAS(arcsin, nt2::asin);

        NP_PROXY_ALIAS(arcsinh, nt2::asinh);

        NP_PROXY_ALIAS(arctan, nt2::atan);

        NP_PROXY_ALIAS(arctan2, nt2::atan2);

        NP_PROXY_ALIAS(arctanh, nt2::atanh);

        NP_PROXY_OP(bitwise_and);

        NP_PROXY_OP(bitwise_not);

        NP_PROXY_OP(bitwise_or);

        NP_PROXY_OP(bitwise_xor);

        NP_PROXY(ceil);

        NP_PROXY(conj);

        NP_PROXY_ALIAS(conjugate, nt2::conj);

        NP_PROXY(copysign);

        NP_PROXY(cos);

        NP_PROXY(cosh);

        NP_PROXY_ALIAS(deg2rad, nt2::inrad);

        NP_PROXY_ALIAS(degrees, nt2::indeg);

        NP_PROXY_OP(divide);

        NP_PROXY_ALIAS(empty_like, pythonic::numpy_expr::ops::empty_like);

        NP_PROXY_OP(equal);

        NP_PROXY(exp);

        NP_PROXY(expm1);

        NP_PROXY_ALIAS(fabs, nt2::abs);

        NP_PROXY(floor);

        NP_PROXY_ALIAS(floor_divide, nt2::divfloor);

        NP_PROXY_ALIAS(fmax, nt2::max);

        NP_PROXY_ALIAS(fmin, nt2::min);

        NP_PROXY_ALIAS(fmod, nt2::mod);

        // NP_PROXY(frexp); // TODO

        NP_PROXY_OP(greater);

        NP_PROXY_OP(greater_equal);

        NP_PROXY(hypot);

        NP_PROXY_ALIAS(invert, pythonic::numpy_expr::ops::bitwise_not); 

        NP_PROXY_ALIAS(isfinite, nt2::is_finite);

        NP_PROXY_ALIAS(isinf, nt2::is_inf);

        NP_PROXY_ALIAS(isnan, nt2::is_nan);

        NP_PROXY_ALIAS(isneginf, pythonic::numpy_expr::ops::isneginf); 

        NP_PROXY_ALIAS(isposinf, pythonic::numpy_expr::ops::isposinf); 

        NP_PROXY(ldexp);

        NP_PROXY_OP(left_shift);

        NP_PROXY_OP(less);

        NP_PROXY_OP(less_equal);

        NP_PROXY(log10);

        NP_PROXY(log1p);

        NP_PROXY(log2);

        NP_PROXY_ALIAS(logaddexp, pythonic::numpy_expr::ops::logaddexp);

        NP_PROXY_ALIAS(logaddexp2, pythonic::numpy_expr::ops::logaddexp2);

        NP_PROXY_OP(logical_and);

        NP_PROXY_OP(logical_not);

        NP_PROXY_OP(logical_or);

        NP_PROXY_OP(logical_xor);

        NP_PROXY_ALIAS(maximum, nt2::max);

        NP_PROXY_ALIAS(minimum, nt2::min);

        NP_PROXY(mod);

        NP_PROXY_OP(multiply);

        NP_PROXY_OP(negative);

        NP_PROXY(nextafter);

        NP_PROXY_OP(not_equal);

        NP_PROXY_ALIAS(ones_like, pythonic::numpy_expr::ops::ones_like);

        NP_PROXY_ALIAS(power, nt2::pow);

        NP_PROXY_ALIAS(rad2deg, nt2::indeg);

        NP_PROXY_ALIAS(radians, nt2::inrad);

        NP_PROXY_ALIAS(reciprocal, nt2::rec);

        NP_PROXY(remainder);

        NP_PROXY_OP(right_shift);

        NP_PROXY_ALIAS(rint, nt2::iround)

        ALIAS(rint, round);
        PROXY(pythonic::numpy, round);

        ALIAS(rint, round_);
        PROXY(pythonic::numpy, round_);

        NP_PROXY(sign);

        NP_PROXY_ALIAS(signbit, nt2::bitofsign)

        NP_PROXY(sin);

        NP_PROXY(sinh);

        NP_PROXY_ALIAS(spacing, nt2::eps)

        NP_PROXY(sqrt);

        NP_PROXY_ALIAS(square, pythonic::numpy_expr::ops::square);

        NP_PROXY_ALIAS(subtract, pythonic::numpy_expr::ops::subtract);

        NP_PROXY(tan);

        NP_PROXY(tanh);

        NP_PROXY_ALIAS(true_divide, pythonic::numpy_expr::ops::divide); 

        NP_PROXY(trunc);

        ALIAS(trunc, fix);
        PROXY(pythonic::numpy, fix);

        NP_PROXY_ALIAS(zeros_like, pythonic::numpy_expr::ops::zeros_like);

#undef NP_PROXY
#undef NAMED_OPERATOR
#undef NAMED_UOPERATOR
    }

    namespace __builtin__ {
        template<class Op, class Arg0>
            auto sum(core::numpy_uexpr<Op, Arg0> const& e) -> decltype(numpy::sum(numpy::asarray(e), 0))
            {
                return numpy::sum(numpy::asarray(e), 0);
            }

        template<class Op, class Arg0, class Arg1>
            auto sum(core::numpy_expr<Op, Arg0, Arg1> const& e) -> decltype(numpy::sum(numpy::asarray(e), 0))
            {
                return numpy::sum(numpy::asarray(e), 0);
            }
        PROXY(pythonic::__builtin__,sum);
    }
}

#endif
