#!/usr/bin/python

# Metarace : Cycle Race Abstractions
# Copyright (C) 2012  Nathan Fraser
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""Update the metarace shared namebank database from a dump file.

Dump file should have the same columns as the namebank DB:

	ID	License no (aka member no)
	First	First name
	Last	Last name
	Club	Full affiliated club name (blank for internationals)
	Cat	License category (some work needed here!)
	State	State or country of origin as three letter abbreviation
	DOB	Date of birth in ISO format
	Gender	Male or Female
	Refid	Optional fixed transponder id

"""

from __future__ import print_function

import os
import sys
import metarace
import shelve
from metarace import strops
from metarace import ucsv
from contextlib import nested, closing

if len(sys.argv) < 2:
    print (u'Usage: ' + sys.argv[0] + u' namebank_file.csv [-r]')
    sys.exit(1)

if not os.path.isfile(sys.argv[1]):
    print (u'Error: ' + sys.argv[1] + u' not a file.')
    sys.exit(1)
sfile = os.path.abspath(sys.argv[1])
dbflag = 'c'
if len(sys.argv) > 2 and sys.argv[2] == '-r':
    dbflag = 'n'

metarace.init(withgtk=False)

with nested(closing(shelve.open(os.path.join(metarace.DEFAULTS_PATH,
                                             u'namebank'),
                                flag=dbflag)),
            closing(shelve.open(os.path.join(metarace.DEFAULTS_PATH,
                                             u'nameindx'),
                                flag='n'))
           ) as (nb, idx):
    print('Opened namebank: ' + str(len(nb)) + ' entries.')
    with open(sfile) as f:
        print('Reading names from ' + sfile + '...')
        cr = ucsv.UnicodeReader(f)
        rcount = 0
        for row in cr:
            ir = [cell.translate(strops.PRINT_UTRANS).strip() for cell in row]
            if len(ir) > 0 and ir[0].isdigit():
                key = ir[0].encode('ascii','ignore')
                rcount += 1
                if nb.has_key(key):
                    nr = nb[key]
                else:
                    nr = [key, u'', u'', u'', u'', u'', u'', u'', u'']
                for i in range(1,9):
                    if len(ir) >= i+1:
                        # Clean up input fields
                        if i in [2, 4, 5, 7]:
                            ir[i] = ir[i].upper()
                        elif i == 1:
                            ir[i] = ir[i].title() # titlecase first name
                        elif i == 8:
                            ir[i] = ir[i].lower() # lowercase RFIDs
                        # Copy to dest row
                        if ir[i] != '':		# don't overwrite
                            nr[i] = ir[i]
                nb[key] = nr
    nb.sync()
    print(u'Closing namebank: ' + unicode(len(nb)) + u' entries.')
    print(u'Re-creating index...')
    tid = {}
    nblen = len(nb)
    cnt = 0
    for r in nb:
        for llen in [3, 4]:
            for nm in [nb[r][1], nb[r][2]]:
                bucket = strops.search_name(nm)[0:llen]
                if bucket not in tid:
                    t = []
                else:
                    t = tid[bucket]
                if r not in t:
                    t.append(r)
                tid[bucket] = t	# write back
        cnt += 1
        if cnt % 100 == 0:
            print(u'Scanning {0}/{1}            '.format(cnt,nblen),
                    end='\r',file=sys.stderr)
    max = 0
    sum = 0
    cnt = 0
    maxb = u''
    for i in tid:
        blen = len(tid[i])
        idx[i] = tid[i]
        if blen > 0:
            if blen > max:
                max = blen
                maxb = i
            sum += blen
            cnt += 1
    avg = sum / cnt
    print(u'Wrote {0} buckets, Max: {1} ({2} ids), Avg: {3} ids.'.format(
               cnt, repr(maxb), max, avg))

print(u'Done.')
