#! /bin/bash

#
# Collect final output files into a single location, and optionally
# delete intermediate files and directories created during entire
# hotspot process.
#
# For final output, merge overlapping peaks and map z-scores,
# densities and p-values to them.
#

tags=_TAGS_
fdrs=_FDRS_
outdir=_OUTDIR_
thresh=_THRESH_
mrgDist=_MERGE_DIST_
minSize=_MINSIZE_
clean=_CLEAN_
randir=_RANDIR_
gnom=_GENOME_
dupok=_DUPOK_

thisscr="run_final"
echo
echo $thisscr

proj=`basename $tags | sed s/.bam$// | sed s/.bed.starch$//`     ## debug, change .bed.gz to .bed.starch
indir=$outdir/$proj-both-passes
inpv=$indir/$proj.hotspot.twopass.pval.txt
inuh=$indir/$proj.hotspot.twopass.zscore.wig
inuhm=$indir/$proj.twopass.merge${mrgDist}.wgt${minSize}.zgt${thresh}.wig
inuhmpv=$indir/$proj.twopass.merge${mrgDist}.wgt${minSize}.zgt${thresh}.pval.txt

if [ ! -s $inpv ] || [ ! -s $inuh ] || [ ! -s $inuhm ] || [ ! -s $inuhmpv ]; then
    echo "$thisscr: one of the following files is missing; skipping final directory creation."
    echo $inpv
    echo $inuh
    echo $inuhm
    echo $inuhmpv
else
    outd=$proj-final
    mkdir -p $outd
    outuh=$outd/${proj}.hot.bed
    outuhp=$outd/${proj}.hot.pval.txt

    cp $inuhmpv $outuhp
    awk 'NR>1{print $1"\t"$2"\t"$3"\t.\t"$4}' $inuhm \
	> $outuh

    for fdr in $fdrs
    do
	echo "fdr $fdr"
	inp=$indir/$proj.hotspot.twopass.fdr$fdr.merge.pks.bed
	inh=$indir/$proj.hotspot.twopass.fdr$fdr.merge.wig
	if [ ! -s $inp ] || [ ! -s $inh ]; then
	    echo "$thisscr: one of the following files is missing; skipping final output."
	    echo $inp
	    echo $inh
	    continue
	fi
	outh=$outd/${proj}.fdr$fdr.hot.bed
	outb=$outd/${proj}.fdr$fdr.pks.bed
	outz=$outd/${proj}.fdr$fdr.pks.zscore.txt
	outpv=$outd/${proj}.fdr$fdr.pks.pval.txt
	outdns=$outd/${proj}.fdr$fdr.pks.dens.txt
	awk 'NR>1{print $1"\t"$2"\t"$3"\t.\t"$4}' $inh \
	    > $outh
	bedops -m $inp \
	    > $outb
	bedmap --max $outb $outh \
	    > $outz
	bedmap --max $outb $inp \
	    > $outdns
	awk 'NR>1' $inuh \
	    | paste - $inpv \
	    | bedmap --min --sci $outb - \
	    > $outpv
    done
fi

## Clean up
if [ $clean == "T" ]; then
    if [ $dupok == "T" ]; then
	libs=$outdir/$proj.lib.filter.txt
    else
	libs=$outdir/$proj.lib.filter.nodup.txt
    fi
    ntag=$(cat $libs.counts)
    ntagr=$((($ntag+50000)/100000))00000
    libs="$libs ${ntagr}-ran.${gnom}.bed.starch"
    if [ $dupok == "T" ]; then
	libs="$libs $randir/${ntagr}-ran.${gnom}.lib.filter.txt"
    else
	libs="$libs $randir/${ntagr}-ran.${gnom}.lib.filter.nodup.txt"
    fi
    rm -f $libs
    drs="$outdir/$proj $randir/$ntagr-ran"
    for dir in $drs
    do
	pass1dir=$dir-pass1
	pass2dir=$dir-pass2
	bothpdir=$dir-both-passes
	rm -rf $pass1dir $pass2dir $bothpdir
    done
    rm -rf $outdir/$proj-peaks
fi

