#!/usr/bin/env python
import netCDF4_classic as netCDF4
import numpy as NP

nchunk = 1238400

def nc3tonc4(filename3,filename4,unpackshort=True,zlib=True,complevel=6,shuffle=True,fletcher32=False,clobber=False,lsd_dict=None):
    """convert a netcdf 3 file (filename3) to a netcdf 4 file
    (in NETCDF4_CLASSIC format).
    If unpackshort=True, variables stored as short
    integers with a scale and offset are unpacked to floats.
    in the netcdf 4 file.  If the lsd_dict is not None, variable names
    corresponding to the keys of the dict will be truncated to the decimal place
    specified by the values of the dict.  This improves compression by
    making it 'lossy'..
    The zlib, complevel and shuffle keywords control
    how the compression is done."""

    ncfile3 = netCDF4.Dataset(filename3,'r')
    ncfile4 = netCDF4.Dataset(filename4,'w',clobber=clobber)
    mval = 1.e30 # missing value if unpackshort=True
    # create dimensions. Check for unlimited dim.
    unlimdimname = False
    unlimdim = None
    print 'copying dimensions ..'
    for dimname,dim in ncfile3.dimensions.iteritems():
        if dim.isunlimited():
            unlimdimname = dimname
            unlimdim = dim
            ncfile4.createDimension(dimname,None)
        else:
            ncfile4.createDimension(dimname,len(dim))
    # create global attributes.
    print 'copying global attributes ..'
    for attname in ncfile3.ncattrs():
        setattr(ncfile4,attname,getattr(ncfile3,attname))
    # create variables.
    print ncfile3.variables.keys()
    print len(ncfile3.variables.keys())
    print ncfile3.variables.keys().index('1790')
    sys.exit(0)
    for varname,ncvar in ncfile3.variables.iteritems():
        print 'copying variable',varname
        # quantize data?
        if lsd_dict is not None and lsd_dict.has_key(varname):
            lsd = lsd_dict[varname]
            print 'truncating to least_significant_digit =',lsd
        else:
            lsd = None # no quantization.
        # unpack short integers to floats?
        if unpackshort and hasattr(ncvar,'scale_factor') and hasattr(ncvar,'add_offset'):
            dounpackshort = True
            datatype = 'f4'
        else:
            dounpackshort = False
            datatype = ncvar.dtype
        # is there an unlimited dimension?
        if unlimdimname and unlimdimname in ncvar.dimensions:
            hasunlimdim = True
        else:
            hasunlimdim = False
        if dounpackshort:
            print 'unpacking short integers to floats ...'
        var = ncfile4.createVariable(varname,datatype,ncvar.dimensions,least_significant_digit=lsd,zlib=zlib,complevel=complevel,shuffle=shuffle,fletcher32=fletcher32)
        # fill variable attributes.
        for attname in ncvar.ncattrs():
            if dounpackshort and attname in ['add_offset','scale_factor']: continue
            if dounpackshort and attname == 'missing_value':
                setattr(var,attname,mval)
            else:
                setattr(var,attname,getattr(ncvar,attname))
        # fill variables with data.
        if hasunlimdim: # has an unlim dim, loop over unlim dim index.
            # range to copy
            start = 0; stop = len(unlimdim); step = nchunk
            if step < 1: step = 1
            for n in range(start, stop, step):
                nmax = n+nchunk
                if nmax > len(unlimdim): nmax=len(unlimdim)
                idata = ncvar[n:nmax]
                if dounpackshort:
                    tmpdata = (ncvar.scale_factor*idata.astype('f')+ncvar.add_offset).astype('f')
                    if hasattr(ncvar,'missing_value'):
                        tmpdata = NP.where(idata == ncvar.missing_value, mval, tmpdata)
                else:
                    tmpdata = idata
                var[n:nmax] = tmpdata
                ncfile4.sync() # flush data to disk.
        else: # no unlim dim or 1-d variable, just copy all data at once.
            idata = ncvar[:]
            if dounpackshort:
                tmpdata = (ncvar.scale_factor*idata.astype('f')+ncvar.add_offset).astype('f')
                if hasattr(ncvar,'missing_value'):
                    tmpdata = NP.where(idata == ncvar.missing_value, mval, tmpdata)
            else:
                tmpdata = idata
            var[:] = tmpdata
        ncfile4.sync()
    # close files.
    ncfile3.close()
    ncfile4.close()

if __name__ == '__main__':

    import sys, getopt, os

    usage = """
 Convert a netCDF 3 file to netCDF 4 format (NETCDF4_CLASSIC), optionally
 unpacking variables packed as short integers (with scale_factor and add_offset)
 to floats, and adding zlib compression (with the HDF5 shuffle filter and fletcher32 checksum).
 Data may also be quantized (truncated) to a specified precision to improve compression.

 usage: %s [-h] [-o] [--zlib=(0|1)] [--complevel=(1-9)] [--shuffle=(0|1)] [--fletcher32=(0|1)] [--unpackshort=(0|1)] [--quantize=var1=n1,var2=n2,..] netcdf3filename netcdf4filename
 -h -- Print usage message.
 -o -- Overwite destination file (default is to raise an error if output file already exists).
 --zlib=(0|1) -- Activate (or disable) zlib compression (default is activate).
 --complevel=(1-9) -- Set zlib compression level (6 is default).
 --shuffle=(0|1) -- Activate (or disable) the shuffle filter (active by default).
 --fletcher32=(0|1) -- Activate (or disable) the fletcher32 checksum (not
     active by default).
 --unpackshort=(0|1) -- Unpack short integer variables to float variables
     using scale_factor and add_offset netCDF variable attributes (active by default).
 --quantize=(comma separated list of "variable name=integer" pairs) --
     Truncate the data in the specified variables to a given decimal precision.
     For example, 'speed=2, height=-2, temp=0' will cause the variable
     'speed' to be truncated to a precision of 0.01, 'height' to a precision of 100
     and 'temp' to 1. This can significantly improve compression. The default
     is not to quantize any of the variables.
\n""" % os.path.basename(sys.argv[0])

    try:
        opts, pargs = getopt.getopt(sys.argv[1:], 'ho',
                                    ['zlib=',
                                     'complevel=',
                                     'shuffle=',
                                     'fletcher32=',
                                     'unpackshort=',
                                     'quantize='
                                     ])
    except:
        (type, value, traceback) = sys.exc_info()
        print "Error parsing the options. The error was:", value
        sys.stderr.write(usage)
        sys.exit(0)

    # default options
    overwritefile = 0
    complevel = 6
    zlib = 1
    shuffle = 1
    fletcher32 = 0
    unpackshort = 1
    quantize = None

    # Get the options
    for option in opts:
        if option[0] == '-h':
            sys.stderr.write(usage)
            sys.exit(0)
        elif option[0] == '-o':
            overwritefile = 1
        elif option[0] == '--zlib':
            zlib = int(option[1])
        elif option[0] == '--complevel':
            complevel = option[1]
        elif option[0] == '--shuffle':
            shuffle = int(option[1])
        elif option[0] == '--fletcher32':
            fletcher32 = int(option[1])
        elif option[0] == '--unpackshort':
            unpackshort = int(option[1])
        elif option[0] == '--quantize':
            quantize = option[1]
        else:
            print option[0], ": Unrecognized option"
            sys.stderr.write(usage)
            sys.exit(0)

    # if we pass a number of files different from 2, abort
    if len(pargs) <> 2:
        print "You need to pass both source and destination!."
        sys.stderr.write(usage)
        sys.exit(0)

    # Catch the files passed as the last arguments
    filename3 = pargs[0]
    filename4 = pargs[1]

    # Parse the quantize option, create a dictionary from key/value pairs.
    if quantize is not None:
        lsd_dict = {}
        for p in quantize.split(','):
            kv = p.split('=')
            lsd_dict[kv[0]] = int(kv[1])
    else:
        lsd_dict=None

    # copy the data from filename3 to filename4.
    nc3tonc4(filename3,filename4,unpackshort=unpackshort,zlib=zlib,complevel=complevel,shuffle=shuffle,fletcher32=fletcher32,clobber=overwritefile,lsd_dict=lsd_dict)
