#!/usr/bin/env Rscript
# Takes as input a filename for a file generated by ngCGH.
# This file has six columns: chromosome, start, end, normal counts, tumor counts, and log2 ratio
# chr1    5       735386  1000    1426    0.688225
# chr1    735412  799400  1000    1311    0.566918
# chr1    799400  852122  1000    1282    0.534647
# chr1    852128  870231  1000    1103    0.317683
# chr1    870231  877312  1000    1194    0.432053

cgh2seg <- function(fname) {
  message(fname)
  dat = read.delim(fname,header=FALSE)
  dat = dat[grep('random',dat[,1],invert=TRUE),]
  require(DNAcopy)
  d1=CNA(genomdat=dat[,6],chrom=as.character(dat[,1]),maploc=floor(rowMeans(dat[,2:3])))
  d1=smooth.CNA(d1)
  d2=segment(d1,min.width=4,undo.splits="sdundo",verbose=2)
  x=rep(d2$output$seg.mean,times=d2$output$num.mark)
  y=density(x)
  z=y$x[which.max(y$y)]
  d2$output$seg.mean=  d2$output$seg.mean-z
  message("Centrality parameter: ",z)
  sampname=gsub('\\.cgh\\.txt','',basename(fname))
  d2$output[,1]=rep(sampname,nrow(d2$output))
  write.table(d2$output,file=paste(sampname,".seg",sep=""),col.names=TRUE,row.names=FALSE,quote=FALSE,sep="\t")
}
fname = commandArgs(TRUE)[1]
cgh2seg(fname)
