#!/usr/bin/python

# Metarace : Cycle Race Abstractions
# Copyright (C) 2012  Nathan Fraser
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""Update the metarace shared namebank database from a dump file.

Dump file is direct from IMG poo:

0	First
1	Last
2	State
3	Country
4	DOB
5	Gender
6	Member Types
7	Member Number
8	Club
9	Club Code (ignored)

"""

from __future__ import print_function

import os
import sys
import metarace
import shelve
import random
import datetime
from metarace import strops
from metarace import ucsv
from contextlib import nested, closing
COLMAP = { 0 : 1,
	1 : 2,
	2 : 5,
	3 : 8,
	4 : 6,
	5 : 7,
	6 : 4,
	7 : 0,
	8 : 3 }
STATEMAP = {u'vic' : u'VIC',
            u'new' : u'NSW',
            u'tas' : u'TAS',
            u'sou' : u'SA',
            u'wes' : u'WA',
            u'nor' : u'NT',
            u'a.c' : u'ACT',
            u'aus' : u'ACT',
            u'que' : u'QLD' }
catchecka = set()

def cleandate(indate):
    ret = None
    try:
        dt = datetime.datetime.strptime(indate,'%d-%b-%Y')
        ret = dt.strftime('%Y-%m-%d')
    except:
        print('Warning: Invalid DoB ' + repr(indate))
        pass
    return ret

# TODO: Ignore liense except for N/C, Gold, Masters and determine CAT
#	by season age
def cleancat(cat, gender):
    ret = cat	# default is no change
    ckey = cat.lower()
    if u'kidz' in ckey:
        ret = u'KIDZ'
    elif u'ride' in ckey or u'platinum' in ckey:
        ret = u'N/C'
    elif u'elite' in ckey:
        ret = u'Elite' + gender.upper()
    elif u'mas' in ckey:
        ret = gender.upper() + ckey.upper()
    elif u'jnr' in ckey:
        ## swtich for U11, U13, 15+17, U19
        if u'u19' in ckey:
            ret = u'J' + gender.upper() + u'19'
        elif u'u17' in ckey:
            ret = u'J' + gender.upper() + u'17'
        elif u'u15' in ckey:
            ret = u'J' + gender.upper() + u'15'
        elif u'u13' in ckey:
            if gender.upper() == u'M':
                ret = u'JB13'
            elif gender.upper() == u'W':
                ret = u'JG13'
            else:
                ret = u'J13' + gender.upper()
        elif u'u11' in ckey:
            ret = u'J11'
        elif u'u9' in ckey:
            ret = u'J9'
        # else leave unchanged
    elif ckey == u'u23':
        ret = gender.upper() + u'23'

    catchecka.add(ret)
    return ret

if len(sys.argv) < 2:
    print (u'Usage: ' + sys.argv[0] + u' namebank_file.csv [-r]')
    sys.exit(1)

if not os.path.isfile(sys.argv[1]):
    print (u'Error: ' + sys.argv[1] + u' not a file.')
    sys.exit(1)
sfile = os.path.abspath(sys.argv[1])
dbflag = 'c'
if len(sys.argv) > 2 and sys.argv[2] == '-r':
    dbflag = 'n'

metarace.init(withgtk=False)

with nested(closing(shelve.open(os.path.join(metarace.DEFAULTS_PATH,
                                             u'namebank'),
                                flag=dbflag)),
            closing(shelve.open(os.path.join(metarace.DEFAULTS_PATH,
                                             u'nameindx'),
                                flag='n'))
           ) as (nb, idx):
    print('Opened namebank: ' + str(len(nb)) + ' entries.')
    licset = set()
    with open(sfile) as f:
        print('Reading names from ' + sfile + '...')
        cr = ucsv.UnicodeReader(f)
        rcount = 0
        for row in cr:
            ir = [cell.translate(strops.PRINT_UTRANS).strip() for cell in row]
            if len(ir) > 9 and ir[7].isdigit():
                key = ir[7].encode('ascii','ignore')
                if key in licset:
                    olr = nb[key]
                    oldname = u' '.join([olr[1],olr[2],olr[4]])
                    key = 'd' + str(random.randint(50000,80000))
                    print(u'Duplicate License: ' + olr[0] + u' ' + oldname
                           + u' assigned to: ' + key)
                licset.add(key)
                rcount += 1
                if nb.has_key(key):
                    nr = nb[key]
                else:
                    nr = [key, u'', u'', u'', u'', u'', u'', u'', u'']
                # patch gender BEFORE category
                for i in [0,1,2,3,4,5,6,7,8]:
                    if len(ir) >= i+1:
                        # Clean up input fields
                        if i == 1:	# Last
                            ir[i] = ir[i].upper()
                        elif i == 0:
                            ir[i] = ir[i].title() # titlecase first name
                        elif i == 5:	# gender
                            if ir[i] and ir[i][0].lower() == u'm':
                                ir[i] = u'M'
                            else:
                                ir[i] = u'W'
                        elif i == 6:	# category
                            ir[6] = cleancat(ir[6],ir[5])
                        elif i == 2:	# state
                            skey = ir[i].lower()[0:3]
                            if skey in STATEMAP:
                                ir[i] = STATEMAP[skey]
                            else:
                                ir[i] = ir[i].upper()
                        elif i == 4: 	# date of birth
                            ir[4] = cleandate(ir[4])
                        # Copy to dest row
                        if ir[i] != '':		# don't clear
                            nr[COLMAP[i]] = ir[i]
                nb[key] = nr
    nb.sync()
    print(u'Category Set: ' + repr(sorted(catchecka)))
    print(u'Closing namebank: ' + unicode(len(nb)) + u' entries.')
    print(u'Re-creating index...')
    tid = {}
    nblen = len(nb)
    cnt = 0
    indexlics = {}
    licbuckets = {}
    for r in nb:
        rno = nb[r][0].encode('ascii','ignore')
        if rno not in indexlics:
            indexlics[rno] = r	# assign lookup to license no
        else:			# this is a duplicate
				# so start a new dupe bucket.
	    if rno not in licbuckets:
                licbuckets[rno] = [indexlics[rno]]
            licbuckets[rno].append(r)
            
        for llen in [3, 4]:
            for nm in [nb[r][1], nb[r][2]]:
                bucket = strops.search_name(nm)[0:llen]
                if bucket not in tid:
                    t = []
                else:
                    t = tid[bucket]
                if r not in t:
                    t.append(r)
                tid[bucket] = t	# write back
        cnt += 1
        if cnt % 100 == 0:
            print(u'Scanning {0}/{1}            '.format(cnt,nblen),
                    end='\r',file=sys.stderr)
    # add all the duplicate buckets to index
    dupecnt = 0
    for rno in licbuckets:
        tid[rno] = licbuckets[rno]
        dupecnt += 1
    max = 0
    sum = 0
    cnt = 0
    maxb = u''
    for i in tid:
        blen = len(tid[i])
        idx[i] = tid[i]
        if blen > 0:
            if blen > max:
                max = blen
                maxb = i
            sum += blen
            cnt += 1
    avg = sum / cnt
    print(u'Added {0} Duplicate license nos.'.format(dupecnt))
    print(u'Wrote {0} buckets, Max: {1} ({2} ids), Avg: {3} ids.'.format(
               cnt, repr(maxb), max, avg))

print(u'Done.')
