# -*- coding: utf-8 -*-
'''

ADAM Job object for managing the processing chain of an ADAM calculation.

.. moduleauthor:: Cedric Bacour, Ivan Price


'''


import os
import netCDF4
import numpy as np
from datetime import datetime
import time
import logging
import random
import re


# import our config containing paths to data files and output directorys
# as well as all parameters of calculation models
import  adam_config


# import ADAM specific modules, these will reference the config imported above
import adam_io
import process_reflectance
import process_brdf
import show
import analysis




class adam_job():

    def __init__(self, extent=None, month_index=0,  job_output_dir=None, cfg=None,
                 sza=None, vza=None, phi=None, lmbd=None, sza_max_limit=None, vza_max_limit=None, beyond_za_limit=None,
                 brdf_graph_type=None, job_type=None, create_working_dir=True, spectral_domains=None):
        ''' Initialisation for the class.

        All parameters are optional, and normally populated by the validate_input method'''

        # the job needs the cfg property which it uses to source default values for
        # many different types of operations
        self.cfg = cfg
        if cfg is None: self.cfg = adam_config.adam_config()

        # geographic extent
        self.extent = extent
        # month number (0->11)
        self.month_index = month_index
        
        # a job needs a working directory
        if job_output_dir is None:
            self.job_output_dir = self.__get_job_output_dir__(create_working_dir)

        # set observation / illumination geometries, with defaults coming from the config
        self.sza = sza
        if sza is None: self.sza = self.cfg.sza_std
        self.phi = phi
        if phi is None: self.phi = np.array([ self.cfg.phi_std ])
        self.vza = vza
        if vza is None: self.vza = np.array([ self.cfg.vza_std ])

        # values to use when sza or vza are beyond limits
        self.sza_max_limit = sza_max_limit
        if sza_max_limit is None: self.sza_max_limit = self.cfg.sza_max_limit
        self.vza_max_limit = vza_max_limit
        if vza_max_limit is None: self.vza_max_limit = self.cfg.vza_max_limit
        self.beyond_za_limit = beyond_za_limit
        if beyond_za_limit is None: self.beyond_za_limit = self.cfg.beyond_za_limit
        

        # wavelength(s)
        self.lmbd = lmbd
        if lmbd is None: self.lmbd = self.cfg.lmbd
        self.nlmbd = len(self.lmbd)

        # spectral domain for when using wavelength ranges
        self.spectral_domains = spectral_domains
        if self.spectral_domains is None:
            self.spectral_domains = self.cfg.spectral_domains
        
        # various housekeeping properties
        self.start_time = time.time()
        # property to store the ongoing status of a job
        self.running_status = []
        # holds the outputs of a job including graphs and downloads
        self.outputs = []
        # properties storing the type of job for the benefit of the validation function
        self.job_type = job_type
        self.brdf_graph_type = brdf_graph_type
        # initialise the dictonary holding the output data
        self.data = {}






    def __get_job_output_dir__(self, create=True):
        ''' Internal method return (and optionally create) a suitable output path for the output produced for this job.
        
            **Kwargs:**
               state (bool): Current state to be in.
        
            **Returns:**
               string: the full path to the job directory
        
        '''
        
        date_string = '%s.%s' % (datetime.strftime(datetime.now(), '%Y%m%d%H%M%S'), datetime.now().microsecond)
        job_output_dir = '%s/%s' % (self.cfg.output_root_dir, date_string)
        # ensure the dir doesn't already exist
        while os.path.exists(job_output_dir):
            date_string = '%s.%s' % (datetime.strftime(datetime.now(), '%Y%m%d%H%M%S'), datetime.now().microsecond)
            job_output_dir = '%s/%s' % (self.cfg.output_root_dir, date_string)

        if create:
            os.mkdir(job_output_dir)

        return job_output_dir

    def __set_extent__(self, extent):
        ''' Internal method ensure minx is less than maxx, miny is less than maxy,
        and the max dimensions of requests according to the config are respected.
        
        Job extent property is modified in-place.
        
        **Args:**
           extent (dict): Dictionary with keys: 'minx','miny','maxx','maxy'
    
        **Returns:**
           extent (dict): Dictionary with keys: 'minx','miny','maxx','maxy'
               
        '''
        # get input
        minx,miny,maxx,maxy = extent['minx'], extent['miny'], extent['maxx'], extent['maxy']

        # ensure mins and maxes are correct
        if maxx < minx:
            maxx, minx = minx, maxx
        if maxy < miny:
            maxy, miny = miny, maxy

        if minx == maxx or miny==maxy:
            raise Exception('minx and maxx or miny and maxy are the same')
            
        # assert request extent limit, only if operation is NOT download
        if ((maxx - minx) * (maxy - miny)) > self.cfg.limit_square_degrees and 'download' not in self.job_type:
            logging.exception('error extent too large')
            raise Exception('Requested extent is too large for non-download request. x*y limit is %s' %
                             self.cfg.limit_square_degrees)

        extent['minx'], extent['miny'], extent['maxx'], extent['maxy'] = minx,miny,maxx,maxy
        self.extent = extent
        return extent

    def validate_input(self, request_dict):
        ''' Method to validate and populate critical properties of this job instance.

        This is the preferred way to prepare an ADAM Job instance for a calculation.
        The input dictionary parameter is normally the CGI dictionary passed by an HTTP client.
        
        **Args:**
            request_dict(dict):  The request dictonary
        
        **Returns:**
            bool.  The result of the validation.
            
        An example request_dict follows::
            
            request_dict = {
                'fieldOperationType': 'brdf',
                'fieldSpectralDomain': '850-860 nm',
                'fieldBRType': 'polar',
                'fieldSunZenith': 45,
                'fieldViewZenith': 0,
                'fieldRelAzimuth': 0,
                'fieldMonth': 'jan',

                'fieldCorner1Lat'   : 63.2,
                'fieldCorner1Lon'   : 27.2,
                'fieldCorner2Lat'   : 63.3,
                'fieldCorner2Lon'   : 27.4
            }
        
        Further examples can be obtained by using the ADAM Web Interface and inspecting
        the 'Debug' tab, where the request_dict is displayed for each request.


        '''

        # set the job type from the dictonary
        self.job_type = request_dict['fieldOperationType']


        # determine the month from the 'fieldMonth' parameter
        months = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']
        try:
            self.month_index = months.index(request_dict['fieldMonth'].lower())
        except:
            self.month_index = 0

        # validate / clean the spatial extent
        extent = {}
        try:
            extent['minx'], extent['miny'], extent['maxx'], extent['maxy'] = map(float,
                                                                                [request_dict['fieldCorner1Lon'],
                                                                                 request_dict['fieldCorner1Lat'],
                                                                                request_dict['fieldCorner2Lon'],
                                                                                request_dict['fieldCorner2Lat'] ]
                                                                             )
            self.__set_extent__(extent)
        except:
            logging.exception('error validating / cleaning extent')
            raise


        # set some job-type specific properties
        if 'brdf' in self.job_type:
            try:
                spectral_domains = request_dict.get('fieldSpectralDomain', None)
                if spectral_domains == None:
                    self.spectral_domains = self.cfg.spectral_domains
                else:
                    # remove any text charactes (like 'nm')
                    spectral_domains = re.sub('[a-z]| *', '', spectral_domains)
                    spectral_domains = spectral_domains.split('-')
                    spectral_domains = map(int, spectral_domains)
                    self.spectral_domains = np.array([ spectral_domains ])

                # the lmbd is a 1-D array of integers representing all the wavelengths given
                # in the spectral_domains variable
                self.lmbd = np.linspace(self.spectral_domains[0][0],
                                        self.spectral_domains[-1][1],
                                        self.spectral_domains[-1][1] - self.spectral_domains[0][0] + 1)

                self.nlmbd = len(self.lmbd)

                # phi and vza are always (maybe length=1) arrays of float(s)
                self.phi = float(request_dict['fieldRelAzimuth'])
                self.phi = np.array([ self.phi ])
                self.vza        = float(request_dict['fieldViewZenith'])
                self.vza        = np.array([self.vza])

                self.sza = float(request_dict['fieldSunZenith'])

                # set overlimit variables
                if 'fieldSunZenithMaxLimit' in request_dict.keys():
                    self.sza_max_limit = float(request_dict['fieldSunZenithMaxLimit'])
                if 'fieldViewZenithMaxLimit' in request_dict.keys():
                    self.vza_max_limit = float(request_dict['fieldViewZenithMaxLimit'])
                if 'fieldBeyondZALimit' in request_dict.keys():
                    self.beyond_za_limit = request_dict['fieldBeyondZALimit'].lower().strip()
                    
            except:
                logging.error('Error validating parameters for BRDF')
                raise

            # default BRDF graph is principle plane
            self.brdf_graph_type = 'principal'
            if request_dict.has_key('fieldBRType'):
                self.brdf_graph_type = request_dict['fieldBRType'].lower()


        if 'time' in self.job_type:
            self.vza        = float(request_dict['fieldViewZenith'])
            self.vza        = np.array([self.vza])

            if 'fieldSunZenithMaxLimit' in request_dict.keys():
                self.sza_max_limit = float(request_dict['fieldSunZenithMaxLimit'])
            if 'fieldViewZenithMaxLimit' in request_dict.keys():
                self.vza_max_limit = float(request_dict['fieldViewZenithMaxLimit'])
            if 'fieldBeyondZALimit' in request_dict.keys():
                self.beyond_za_limit = float(request_dict['fieldBeyondZALimit'])


        if 'spectrum' in self.job_type:
            try:
                self.vza = np.array([ float(request_dict['fieldViewZenith']) ])
                self.phi = np.array([ float(request_dict['fieldRelAzimuth']) ])
                self.sza = float(request_dict['fieldSunZenith'])

                if 'fieldSunZenithMaxLimit' in request_dict.keys():
                    self.sza_max_limit = float(request_dict['fieldSunZenithMaxLimit'])
                if 'fieldViewZenithMaxLimit' in request_dict.keys():
                    self.vza_max_limit = float(request_dict['fieldViewZenithMaxLimit'])
                if 'fieldBeyondZALimit' in request_dict.keys():
                    self.beyond_za_limit = float(request_dict['fieldBeyondZALimit'])

            except:
                logging.exception('Error validating parameters for spectrum graph')
                raise


        return True


    def get_month_name(self, month_index=None):
        ''' Utility method to return the name of the month for this job, or optionally from an aribtary index.
        
        Used by the graphing functions.
        
        **Kwargs:**
           month_index (int): The month index (from 0 to 11)
        
        **Returns:**
           str.  The name of the month.

        '''
        if month_index is None:
            month_index = self.month_index
        months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Oct','Nov','Dec']
        
        return months[month_index]

    def set_status(self, status_text):
        ''' Method to update the status log for a potentially long-running job, provides for job-specific logging.
        
        **Args:**
            status_text (str):  the incoming status text
        
        **Returns:**
           None
           
        '''
        self.status = status_text
        # update ongoing status
        self.running_status.append('%s: %s' % (time.time(), status_text))
        # log to debug also
        logging.debug('ADAM Job: %s: %s' % (self.job_output_dir.split('/')[-1], self.status))


    def add_output(self, output_type, filename, case=None, title=None):
        ''' Method to add a reference to an output (e.g. a graph / netcdf output) to this job for
        recuperation later.
        
        This is used by the web client to present a list of outputs (in the form of Tabs in the
        user interface) resulting from the job.
        
        **Args:**
            output_type (str):  The type ('graph'/'download')
            filename (str): the name of the file to be found in the job output directory
        **Kwargs:**
            case (str): 'pixels' or 'stats', used by the web client
            title (str): title of output, used by the web client
    
        **Returns:**
           None

        '''

        # convert // and /// to /
        self.job_output_dir = re.sub( '/+','/', self.job_output_dir)
        # get the filename and job directory of the output, minus all other path info
        abs_filename = filename
        filename = '/'.join(filename.split('/')[-2:])
        # build the path to the file from the perspective of a http client
        http_root = self.cfg.http_root
        # build the http url with the http path and the bare filename
        http_url = '%s/%s' % (http_root, filename)

        # add the output to the job
        self.outputs.append({'type': output_type,
                             'filename': abs_filename,
                             'http_url': http_url,
                             'case': case,
                             'title': title})


    def build_filename(self, output_type, extension, month_index=None, lats=None, lons=None, rand=''):
        ''' Method to build the filename of an output according to a predefined standard. 
        
        Responsible for the bizarre name given to ADAM downloads. Includes a random component
        to avoid duplicate filenames in the same job.
        
        **Args:**
            output_type (str):  The type ('netcdf'/'BRDF_polar'..)
            extension (str): the filename extension
        **Kwargs:**
            month_indexes (list): list of integer month indexes contained in the output
            lats (numpy.array): list of latitudes represented in the output
            lons (numpy.array): list of longitudes represented in the output
            rand (string): random text used to make a unique filename if there is already a filename
                           with the desired name in the working directory.
    
        **Returns:**
            str: the new filename to be used
           
        '''

        if month_index is None:
            month_index = self.data['month_index']
        if lats is None:
            lats = self.data['latitude']
        if lons is None:
            lons = self.data['longitude']

        lats = np.array(lats)
        lons = np.array(lons)
        # if we were given the random string then pad it out with some hyphens
        if rand != '':
            rand = '--%s' % rand
            
        filename = 'ADAM_V01_%s_M%02d_Lon_%.02f_%.02f_Lat_%.02f_%.02f%s.%s' % (output_type,
                                                                             month_index + 1,
                                                                             lons.min(),
                                                                             lons.max(),
                                                                             lats.min(),
                                                                             lats.max(),
                                                                             rand,
                                                                             extension)
        filename = '%s/%s' % (self.job_output_dir, filename)
        filename = re.sub( '/+','/', filename)

        # if this filename already exists, run this sub again, but this time with
        # a random number following the desired filename
        while os.path.exists(filename):
            filename = self.build_filename(output_type, extension, month_index=month_index,
                                           rand=random.randint(0,100))


        return filename




    def is_pixel_request(self):
        ''' Boolean method dictating whether we allow this job to be performed at the pixel level or not '''

        num_pixels = self.data['ref_land'].shape[0] * self.data['ref_land'].shape[1]

        # check if the total number of pixels exceeds the maximum defined in the config
        if num_pixels <= self.cfg.max_pts_analysis:
            return True

        return False




    def load_data(self):
        ''' Loads the data for this job into the job.data dictionary.

            The mask indicies for distinguishing land from ocean from snow are also populated.
            
        '''
        self.data = adam_io.get_netcdf_data(self.extent, self.month_index, self.cfg)

        return self.data




    def process_reflectance(self,  do_compute_error=True):
        ''' Convenience method to calculate the reflectance and populate the standard variable names.
        
        **Kwargs:**
           do_compute_error (bool): Whether to include the error computation or not
    
        **Returns:**
           tuple: Tuple of 2 numpy arrays:  (reflectance, reflectance error)

        '''
        self.data['reflectance'], self.data['reflectance_err_land'] = process_reflectance.main(
                                                                        self,
                                                                        do_compute_error=do_compute_error
                                                                      )

        return self.data['reflectance'], self.data['reflectance_err_land']


    def calculate_ndvi(self):
        ''' Populates the NDVI data array for this job.
        
        The NDVI array is used by the :func:`process_brdf` method on an ADAM job.
        
        Actual work is performed by the :func:`process_reflectance.calculate_ndvi` function, its parameters
        are detailed there.
        
        '''

        self.data['NDVI'] = process_reflectance.calculate_ndvi(
                                self.data['reflectance'],
                                self.cfg,
                                lmbd=self.cfg.lmbd,
                                domains=self.cfg.modis_wavebands
                            )

        return self.data['NDVI']

    def calculate_stats(self, input_array):
        ''' Calculate various statistics (min/max/mean) for the array passed in.
        
        Results are inserted into the 'stats' key of the data dictionary.
        
        Actual work is performed by the :func:`analysis.get_stats` function, its parameters
        are detailed there.
        
        **Args:**
           input_array (numpy.array):  The input array to be analysed
        
        **Returns:**
           None
        
        '''

        self.data['stats'] = analysis.get_stats(input_array, self.data)


    def define_vza_hotspot(self):
        ''' Increase vza sampling depending on sza (increase resolution near sza).
        
        Actual work is performed by the :func:`process_brdf.define_vza_hotspot` function, its parameters
        are detailed there.
        
        '''

        self.vza = process_brdf.define_vza_hotspot(self.sza, self.cfg.vza_values)



    def process_brdf(self, do_compute_error=True, do_spectral_averaging=False):
        '''Convenience method to calculate the BRDF and populate the standard variable names.
        
        Further information is available in the :func:`process_brdf.main` function, which performs the work
        for this method.
        
        **Kwargs:**
            do_compute_error (bool): Whether to include the error computation or not
            do_spectral_averaging (bool): Whether to average the results across all the input spectral bands
           
        **Returns:**
            tuple: Tuple of 2 numpy arrays:  (brdf, brdf error)

        '''
        
        if not self.data.has_key('NDVI'): self.calculate_ndvi()
        
        ref_data = self.data['reflectance']
        if do_spectral_averaging:
            if not self.data.has_key('reflectance_averaged'):
                self.data['reflectance_averaged'] = process_reflectance.spectral_selection(
                                                        self.data['reflectance'],
                                                        self.cfg.lmbd,
                                                        domains=self.spectral_domains
                                                    )
            ref_data = self.data['reflectance_averaged']
        
        
        self.data['BRDF'], self.data['BRDF_err_land'] = process_brdf.main(
                                                          ref_data,
                                                          self.data,
                                                          self.cfg,
                                                          self.sza,
                                                          self.lmbd,
                                                          self.vza,
                                                          self.phi,
                                                          sza_max_limit=self.sza_max_limit,
                                                          vza_max_limit=self.vza_max_limit,
                                                          beyond_za_limit=self.beyond_za_limit,                                                                          
                                                          do_compute_error=do_compute_error,
                                                          do_spectral_averaging=do_spectral_averaging
                                                    )
                                                    


        return self.data['BRDF'], self.data['BRDF_err_land']








    def graph_brdf_time(self):
        ''' Method to output time series (1 per month) of BRDF reflectance. 
        
        This method drives the chain of calculating the BRDF for each month before finally calling
        the :func:`show.main_brdf_time` function to do the actual graphing work.
        
        
        '''

        # initialise what will be the array of monthly BRDF data
        self.time_series = []
        self.time_series_idxs = []

        # loop through every month of the year:
        for month_index in range(12):
            # get the data
            self.data = adam_io.get_netcdf_data(self.extent, month_index, self.cfg)

            # calculate the BRDF
            self.set_status('get reflectance for month index: %s' % month_index)
            self.process_reflectance()
            self.process_brdf(do_spectral_averaging=True)

            # save this netcdf to a file, this will be added to the list of generated products
            # for recuperation by the client
            self.save_netcdf()
            
            # append the data to our array
            self.time_series.append(self.data['BRDF'])


            # the land/snow/ocean indexes potentially change between months so they need
            # to be stored in an array also
#            self.time_series_idxs.append({'land': self.data['idx_land'],
#                                          'ocean': self.data['idx_ocean'],
#                                          'snow': self.data['idx_snow']
#                                        })
            # land and snow are presented together
            self.time_series_idxs.append({'land' : np.concatenate((self.data['idx_land'], self.data['idx_snow'])),
                                          'ocean': self.data['idx_ocean'],
                                          'snow' : np.array([])
                                        })

        # now all the months are calculated, count the number of pixels this calculation was for
        num_pixels = self.data['reflectance'].shape[0] * self.data['reflectance'].shape[1]
        # if there are only a few pixels selected we do pixel analysis
        if num_pixels <= self.cfg.max_pts_analysis:
            self.set_status('Perform BRDF pixel analysis for time series')
            show.main_brdf_time(self, case = 'pixels')

        # otherwise do stats
        else:
            self.set_status('Perform BRDF stats analysis for time series')
            self.data['stats'] = {}

            surfaces = ['land', 'ocean', 'snow']

            for surface in surfaces:
                self.data['stats'][surface] = {}

                # init the dictionaries, these will hold an array of each stat, one for each month,
                # i.e. self.data['stats'][surface]['mean'] = (value for ) [jan, feb, mar, apr]
                self.data['stats'][surface]['mean'] = []
                self.data['stats'][surface]['std'] = []
                self.data['stats'][surface]['min'] = []
                self.data['stats'][surface]['max'] = []


                # this var keeps track of the maximum number of pixels for this surface that occur over all the months
                self.data['stats'][surface]['npts'] = 0
                # stats are calculated within each month's BRDF values
                for month_index in range(12):
                    # get the indices of this surface as we treat each type seperately
                    surface_indexs = self.time_series_idxs[month_index][surface]

                    # for each month, calculate the various stats
                    # if there was at least one pixel for this surface for this month, calculate
                    if len(surface_indexs) > 0:
                        self.data['stats'][surface]['mean'].append(
                             self.time_series[month_index].ravel()[surface_indexs].mean()
                        )
                        self.data['stats'][surface]['std'].append(
                            self.time_series[month_index].ravel()[surface_indexs].std()
                        )
                        self.data['stats'][surface]['min'].append(
                            self.time_series[month_index].ravel()[surface_indexs].min()
                        )
                        self.data['stats'][surface]['max'].append(
                            self.time_series[month_index].ravel()[surface_indexs].max()
                        )
                    # otherwise insert a NaN so the graph will skip this month gracefully
                    else:
                        self.data['stats'][surface]['mean'].append(np.NaN)
                        self.data['stats'][surface]['std'].append(np.NaN)
                        self.data['stats'][surface]['min'].append(np.NaN)
                        self.data['stats'][surface]['max'].append(np.NaN)

                    # add some additional data for the legend text
                    self.data['stats'][surface]['npts'] = max( self.data['stats'][surface]['npts'],
                                                               len(surface_indexs)
                                                              )



            # draw the graph(s)
            show.main_brdf_time(self, case = 'stats')


    def graph_principal_plane(self, case='stats', indices=None, title=None, graph_error=False):
        ''' Writes a principal_plane graph to the correct output dir.
        
        Actual work is performed by :func:`show.main_brdf_principal_plane`, more documentation can
        be obtained there.
        
        If pixel graphs are requested for multiple pixels then multiple graphs will be generated.
        The graph(s) will be added to the 'outputs' array on the job.
        
        **Kwargs:**
            case (str): 'stats' or 'pixels' indicating the type of graph to draw
            indices (numpy.array): array of pixel indexes to be graphed
            title (str): graph title
            graph_error (bool): whether to draw the error bars on the graph
           
        **Returns:**
            None
        
        '''
        # if we are to make 'stats' style graphs, showing mean/max/min values
        if case == 'stats':
            show.main_brdf_principal_plane(self, case='stats')
        # otherwise we will make 'pixel' graphs of one graph per pixel.
        else:
            show.main_brdf_principal_plane(self, case='pixels', indices=indices, title=title, graph_error=graph_error)




    def graph_polar_plot(self, case='stats', indices=None, title=None, three_d=False):
        ''' Writes a polar plot graph to the correct output dir.

        Actual work is performed by :func:`show.main_brdf_polar_plot`, more documentation can be obtained there.
        
        If pixel graphs are requested for multiple pixels then multiple graphs will be generated.
        The graph(s) will be added to the 'outputs' array on the job.
        
        **Kwargs:**
            case (str): 'stats' or 'pixels' indicating the type of graph to draw
            indices (numpy.array): array of pixel indexes to be graphed
            title (str): graph title
            three_d (bool): whether to use 3d  instead of 2D mode
            
        **Returns:**
            None
        '''

        if case=='stats':
            show.main_brdf_polar_plot(self, case='stats', title=title, three_d=three_d)
        else: # pixels
            show.main_brdf_polar_plot(self, case='pixels', indices=indices, title=title, three_d=three_d)


    def graph_main_ref_spectra(self, case='stats', indices=None, title=None):
        ''' Writes a spectrum graph to the correct output dir.
        
        Actual work is performed by :func:`show.main_ref_spectra`, more documentation can be obtained there.
        
        If pixel graphs are requested for multiple pixels then multiple graphs will be generated.
        The graph(s) will be added to the 'outputs' array on the job.
        
        **Kwargs:**
            case (str): 'stats' or 'pixels' indicating the type of graph to draw
            indices (numpy.array): array of pixel indexes to be graphed
            title (str): graph title
            
        **Returns:**
            None
        
        '''
        show.main_ref_spectra(self, case=case, indices=indices, title=title)


    def map_ref_averaged_spectra(self, spectral_domain_index=0, width=None, output_dir=None):
        ''' Writes a map (png or jpg) and a world file providing georeferencing info for the job.
        
        Actual work is performed by :func:`show.map_pil`, more documentation can be obtained there.
        
        The method uses PIL rather than Python graphing to ensure no margin for more accurate
        georeferencing.        
        
        **Kwargs:**
            spectral_domain_index (int): The index of the spectral domain in the 'reflectance_averaged'
                                         array when dealing with jobs over multiple spectral domains.
                                         
            width (int): width of map in pixels
            output_dir (str): path to directory to write map to
            
        **Returns:**
            None
            
        '''
        show.map_pil(self, spectral_domain_index, width=width, output_dir=output_dir)

    def map_raw_values(self, spectral_domain_index=0, width=None, output_dir=None):
        ''' Writes a tif map with raw (non coloured) data values.
        
        Actual work is performed by :func:`show.map_raw_values`, more documentation can be obtained there.
        
        This is used to create the TIFs that can later be used for colour composite mapping, i.e.
        those containing raw reflectance values as pixel values as opposed to stretched RGB values.
        
        **Kwargs:**
            spectral_domain_index (int): The index of the spectral domain in the 'reflectance_averaged'
                                         array when dealing with jobs over multiple spectral domains.
                                         
            width (int): width of map in pixels
            output_dir (str): absolute path to directory to write map to
            
        **Returns:**
            None
            
        '''
        show.map_raw_values(self, spectral_domain_index, width=width, output_dir=output_dir)


    def save_netcdf(self, output_filename=None):
        ''' Writes a netcdf file to the job working directory containing all the extracted and calculated data.
        
        The netcdf is added to the 'outputs' array on the job.
        
        
        **Kwargs:**
            output_filename (str): absolute name of filename to write to
            
        **Returns:**
            None
        '''

        # if we were not given an explicit output filename we request one from our utility method
        if output_filename is None:
            output_filename = self.build_filename('netcdf', 'nc')

        # initialise the output netcdf
        output_dataset = netCDF4.Dataset(output_filename, 'w', format='NETCDF4')
        # open the input raw data netcdf readonly
        source_dataset = netCDF4.Dataset(self.cfg.netcdf_datafile, 'r')

        # create the dimensions in the output dataset
        #output_dataset.createDimension('month',     len(self.data['month_index'])  )
        output_dataset.createDimension('month',     1  )
        output_dataset.createDimension('band',      len(source_dataset.dimensions['band']))
        output_dataset.createDimension('latitude',  len(self.data['latitude']) )
        output_dataset.createDimension('longitude', len(self.data['longitude']) )

        # set the indexes of the land/ocean/snow pixels as attributes in the netcdf
        # note these are redundant now as we also include the mask as a variable (variable: 'mask')
        output_dataset.setncattr('idx_land',  self.data['idx_land'])
        output_dataset.setncattr('idx_ocean', self.data['idx_ocean'])
        output_dataset.setncattr('idx_snow',  self.data['idx_snow'])

        # set more attributes
        output_dataset.setncattr('wavelength_min',  self.lmbd.min())
        output_dataset.setncattr('wavelength_max',  self.lmbd.max())

        if hasattr(self, 'phi'): output_dataset.setncattr('phi',  self.phi)
        if hasattr(self, 'sza'): output_dataset.setncattr('sza',  self.sza)
        if hasattr(self, 'vza'): output_dataset.setncattr('vza',  self.vza)

        ##########################################################
        # test sinusoidal

#        start_time = time.time()
#
#        import osgeo.gdal as gdal
#        import osgeo.osr as osr
#        import osgeo.ogr as ogr
#
#        lon = self.data['longitude']
#        lat = self.data['latitude']
#        width = len(lon)
#        height = len(lat)
#
#        x_int = lon[1] - lon[0]
#        src_geotransform =  [ lon.min(), (lon.max() + x_int- lon.min()) / width, 0, lat.max(),
#                              0, -(lat.max()- lat.min()) / height ]
#
#        src_proj = osr.SpatialReference()
#        src_proj.ImportFromEPSG(4326)
#        dst_proj = osr.SpatialReference()
#        dst_proj.SetSinusoidal(0,0,0)
#
#        wkt = 'POINT(%f %f)' % (lon.min(), lat.min())
#        p_min = ogr.CreateGeometryFromWkt(wkt)
#        p_min.AssignSpatialReference(src_proj)
#        p_min.TransformTo(dst_proj)
#        wkt = 'POINT(%f %f)' % (lon.max(), lat.max())
#        p_max = ogr.CreateGeometryFromWkt(wkt)
#        p_max.AssignSpatialReference(src_proj)
#        p_max.TransformTo(dst_proj)
#
#        dst_geotransform =  [ p_min.GetX(), (p_max.GetX() - p_min.GetX()) / width,
#                              0, p_max.GetY(), 0, -(p_max.GetY()- p_min.GetY()) / height ]
#
#
#        src_driver = gdal.GetDriverByName('MEM')
#        src_dataset = src_driver.Create('', width, height, 1, gdal.GDT_Float32)
#        src_dataset.SetGeoTransform(src_geotransform)
#        src_dataset.SetProjection( src_proj.ExportToWkt() )
#        src_band = src_dataset.GetRasterBand(1)
#        #TODO: setting water and NODATA the same here !!!!!
#        src_band.SetNoDataValue( -1 )
#
#        #print 'width, height: %s, %s' % (width, height)
#        #print 'shape is %s' % [self.data['reflectance'][:,:,0].shape]
#        the_data = self.data['reflectance'][:,:,0].T
#        #the_data = the_data * 100
#        src_band.WriteArray( the_data )
#
#
#        drv = gdal.GetDriverByName( 'GTiff' )
#        tiff_filename = '%s/%s' % (self.job_output_dir, 'test-sinu.tif')
#        dst_dataset = drv.Create(tiff_filename, width, height, 1, gdal.GDT_Float32 )
#        #TODO: setting water and NODATA the same here !!!!!
#        dst_dataset.GetRasterBand(1).SetNoDataValue( -1 )
#
#        #dst_dataset.SetProjection( dst_proj.ExportToWkt() )
#        dst_dataset.SetGeoTransform(dst_geotransform)
#        gdal.ReprojectImage( src_dataset, dst_dataset, src_proj.ExportToWkt(),
#                             dst_proj.ExportToWkt(), gdal.GRA_NearestNeighbour) # gdal.GRA_Cubic
#        dst_dataset = None
#
#
#        self.add_output('tif_sin_output', tiff_filename)
#
#        self.set_status('wrote sinusoidal tif in %s seconds' % (time.time() - start_time))


        # now loop through the variables in the source dataset, and if they exist in our job we
        # will include them in the output netCDF
        for var_name in source_dataset.variables.keys():

            # skip variables that don't exist in our job
            if not self.data.has_key(var_name): continue

            source_var = source_dataset.variables[var_name]
            
            
            logging.exception('create: %s dimensions: %s' % (var_name, [source_var.dimensions,]))
            
            
            # create the variable in the output file
            output_var = output_dataset.createVariable(var_name,
                                                       source_var.dtype,
                                                       source_var.dimensions,
                                                       zlib=True
                                                       )

            # for each attribute in this variable, transfer it to the output file
            for attribute in source_var.ncattrs():
                output_var.setncattr(attribute, source_var.getncattr(attribute))

            # land_cover_classes is special as it's static, and doesn't depend / change with 
            # on this job object like the others
            if var_name == 'land_cover_classes':
                output_var[:] = source_dataset.variables[var_name][:]
            else:
                output_var[:] = self.data[var_name][:]

                if var_name == 'latitude' or var_name == 'longitude':
                    output_var.setncattr('valid_min', output_var[:].min())
                    output_var.setncattr('valid_max', output_var[:].max())


        if self.data.has_key('ref_land_covar'):

            output_dataset.createDimension('covariance_matrix',  28 )
            new_var = output_dataset.createVariable( 'ref_land_covar',
                                                      self.data['ref_land_covar'].dtype,
                                                      ('longitude', 'latitude', 'covariance_matrix'),
                                                      zlib=True
                                                     )

            new_var[:] = self.data['ref_land_covar'][:]
            new_var.setncattr('valid_min', np.nanmin(new_var[:]))
            new_var.setncattr('valid_max', np.nanmax(new_var[:]))

        # for convenience.. save the land / ocean / snow mask in the shape of the reflectance matrix
        # TODO: this does not take into account multiple month calcualtions, and the mask can change over months !
        if self.data.has_key('reflectance'):
            land_water_snow = np.empty(self.data['reflectance'][:,:,0].shape)
            land_water_snow[:] = -1
            land_water_snow = land_water_snow.flatten()
            land_water_snow[self.data['idx_land']]  = 0
            land_water_snow[self.data['idx_ocean']] = 1
            land_water_snow[self.data['idx_snow']]  = 2
            land_water_snow = land_water_snow.reshape(self.data['reflectance'][:,:,0].shape)
            new_var = output_dataset.createVariable('mask', np.int, ('longitude', 'latitude'), zlib=True)
            new_var.setncattr('legend', '0=land, 1=ocean, 2=snow, -1=undef')
            new_var[:] = land_water_snow


            # create the 'spectra' dimension and associated variable
            output_dataset.createDimension('spectra',  self.data['reflectance'].shape[-1] )
            new_var = output_dataset.createVariable('spectra', 'f4', 'spectra', zlib=True)
            new_var[:] = self.cfg.lmbd[:]
            new_var.setncattr('valid_min', self.cfg.lmbd.min())
            new_var.setncattr('valid_max', self.cfg.lmbd.max())
    
            # create the 'vza' dimension and associated variable
            output_dataset.createDimension('vza',  len(self.vza) )
            new_var = output_dataset.createVariable('vza', self.vza.dtype, 'vza', zlib=True)
            new_var[:] = self.vza[:]
            new_var.setncattr('valid_min', self.vza.min())
            new_var.setncattr('valid_max', self.vza.max())
    
            # create the 'phi' dimension and associated variable
            output_dataset.createDimension('phi',  len(self.phi) )
            new_var = output_dataset.createVariable('phi', self.phi.dtype, 'phi', zlib=True)
            new_var[:] = self.phi[:]
            new_var.setncattr('valid_min', self.phi.min())
            new_var.setncattr('valid_max', self.phi.max())


            new_var = output_dataset.createVariable( 'reflectance',
                                                     self.data['reflectance'].dtype,
                                                     ('longitude', 'latitude', 'spectra'),
                                                     zlib=True
                                                    )
            new_var[:] = self.data['reflectance'][:]
            new_var.setncattr('valid_min', np.nanmin(new_var[:]))
            new_var.setncattr('valid_max', np.nanmax(new_var[:]))


        if self.data.has_key('reflectance_err_land'):
            new_var = output_dataset.createVariable( 'reflectance_err_land',
                                                     self.data['reflectance_err_land'].dtype,
                                                     ('longitude', 'latitude', 'spectra'),
                                                     zlib=True
                                                   )
            new_var[:] = self.data['reflectance_err_land'][:]
            new_var.setncattr('valid_min', np.nanmin(new_var[:]))
            new_var.setncattr('valid_max', np.nanmax(new_var[:]))



        # this dimension is only required for the BRDF (when averaged) and reflectance_averaged
        # variables, so is not present for all jobs
        if hasattr(self, 'spectral_domains'):
            output_dataset.createDimension('spectral_domains',  len(self.spectral_domains))
            new_var = output_dataset.createVariable('spectral_domains', 'S', 'spectral_domains', zlib=True)
            # we cannot use slicing when assigning the string variable
            for i in range( len(self.spectral_domains) ):
                new_var[i] = map(str,self.spectral_domains)[i]


        if self.data.has_key('reflectance_averaged'):
            new_var = output_dataset.createVariable( 'reflectance_averaged',
                                                     self.data['reflectance_averaged'].dtype,
                                                     ('longitude', 'latitude', 'spectral_domains'),
                                                     zlib=True
                                                    )
            new_var[:] = self.data['reflectance_averaged'][:]
            new_var.setncattr('valid_min', np.nanmin(new_var[:]))
            new_var.setncattr('valid_max', np.nanmax(new_var[:]))

        if self.data.has_key('NDVI'):
            new_var = output_dataset.createVariable('NDVI', self.data['NDVI'].dtype, ('longitude', 'latitude'), zlib=True)
            new_var[:] = self.data['NDVI'][:]
            new_var.setncattr('valid_min', np.nanmin(new_var[:]))
            new_var.setncattr('valid_max', np.nanmax(new_var[:]))



        if self.data.has_key('BRDF'):
            # the brdf variable can be either in the shape of the spectra, or in the spectral domains 
            # (i.e. when averaged).. we need to determine which one it is for this job
            brdf_spec_dimension = 'spectral_domains'
            # the shape of the BRDF variable could be long,lat, spect_domains,vza,phi OR
            # long,lat,nlmbd,vza,phi .. so if it is not spec domains we set it as lmbd here
            if self.data['BRDF'].shape[2] != len(self.spectral_domains):
                # if we're here BRDF was not averaged, so we need to create a dimension
                # for each of the wavelengths that was used in the calculation (lmbd)
                output_dataset.createDimension('wavelengths',  self.data['BRDF'].shape[2])
                new_var = output_dataset.createVariable('wavelengths', 'f4', 'wavelengths', zlib=True)
                new_var[:] = self.lmbd
                brdf_spec_dimension = 'wavelengths'
            
            # now we know the dimension exists we can write the variable
            new_var = output_dataset.createVariable( 'BRDF',
                                                     self.data['BRDF'].dtype, 
                                                     ('longitude', 'latitude', brdf_spec_dimension, 'vza', 'phi'),
                                                     zlib=True
                                                    )
            new_var[:] = self.data['BRDF'][:]
            new_var.setncattr('valid_min', np.nanmin(new_var[:]))
            new_var.setncattr('valid_max', np.nanmax(new_var[:]))

        # BRDF error will use the same dimensions as BRDF
        if self.data.has_key('BRDF_err_land'):
            new_var = output_dataset.createVariable( 'BRDF_err_land',
                                                     self.data['BRDF_err_land'].dtype,
                                                     ('longitude', 'latitude', brdf_spec_dimension, 'vza', 'phi'),
                                                     zlib=True
                                                    )
            new_var[:] = self.data['BRDF_err_land'][:]
            new_var.setncattr('valid_min', np.nanmin(new_var[:]))
            new_var.setncattr('valid_max', np.nanmax(new_var[:]))


        # save and close the netCDF
        output_dataset.sync()
        output_dataset.close()

        # add the netcdf to the list of outputs of this job
        self.add_output('netcdf_output', output_filename)

        # that's it !



