#!/bin/bash
#######################################################################
#################### HEADER, CHANGE VALUES HERE #######################
#######################################################################
starters_file="/usr/share/doc/mapsembler2/sample_example/fragments.fa"
read_sets="/usr/share/doc/mapsembler2/sample_example/reads.fa.gz" # FOR instance: "read_set1.fa.gz read_set2.fq.gz"
prefix_index="index" # all files used for indexing will be written will start with this prefix
prefix_results="res" # all final files will be written will start with this prefix
k=31 # size of kmers
c=2 # minimal coverage
d=1 # estimated number of error per read (used by kissreads only)
g=10000000 # estimated genome size. Used only to control kissnp2 memory usage. e.g. 3 billion (3000000000) uses 4Gb of RAM.
t=0
PATH_TOOLS="/usr/bin/" # path were executables kissnp2 and kissreads are. Leave blank if they are located in a directory located in the PATH environnement variable
#######################################################################
#################### END HEADER                 #######################
#######################################################################

function help {
echo "run_read2SNPs.sh, a pipelining kissnp2 and kissreads for calling SNPs from NGS reads without the need of a reference genome"
echo "Usage: ./run_read2SNPs.sh OPT"
echo -e "\tOPT:"
echo -e "\t \t -s: file containing starters (fasta)"
echo -e "\t \t -r list of reads separated by space, surrounded by the '\"' character. Note that reads may be in fasta or fastq format, gzipped or not. Example: -r \"data_sample/reads_sequence1.fasta   data_sample/reads_sequence2.fasta.gz\"."
echo -e "\t \t -t: kind of assembly: 1=unitig (fasta), 2=contig (fasta), 3=unitig (graph), 4=unitig(graph)"
echo -e "\t\t -p prefix. All out files will start with this prefix. Example: -p my_prefix"
echo -e "\t\t -k value. Set the length of used kmers. Must fit the compiled value. Default=31. Example -k 31"
echo -e "\t\t -c value. Set the minimal coverage: Used by kissnp2 (don't use kmers with lower coverage) and kissreads (read coherency threshold). Default=4. Example -c 4"
echo -e "\t\t -d value. Set the number of authorized substitutions used while mapping reads on found SNPs (kissreads). Default=1. Example: -d 1"
echo -e "\t\t -g value. Estimated genome size. Used only to control kissnp2 memory usage. e.g. 3 billion (3000000000) uses 4Gb of RAM. Default=10 million. Example: -d 10000000"
echo -e "\t\t -h: Prints this message and exist"
echo "Any further question: read the readme file or contact us: pierre.peterlongo@inria.fr"
}


#######################################################################
#################### GET OPTIONS                #######################
#######################################################################
while getopts ":s:r:t:p:k:c:d:g:h" opt; do
case $opt in

h)
help
exit
;;
s)
echo "use starter file: $OPTARG" >&2
starters_file=$OPTARG
;;
r)
echo "use read set: $OPTARG" >&2
read_sets=$OPTARG
;;

p)
echo "use prefix=$OPTARG" >&2
prefix_index=$OPTARG
prefix_results=$OPTARG
;;

k)
echo "use k=$OPTARG" >&2
k=$OPTARG
;;

t)
echo "use t=$OPTARG" >&2
t=$OPTARG
;;

c)
echo "use c=$OPTARG" >&2
c=$OPTARG
;;

d)
echo "use d=$OPTARG" >&2
d=$OPTARG
;;

g)
echo "use g=$OPTARG" >&2
g=$OPTARG
;;

\?)
echo "Invalid option: -$OPTARG" >&2
exit 1
;;

:)
echo "Option -$OPTARG requires an argument." >&2
exit 1
;;
esac
done
#######################################################################
#################### END GET OPTIONS            #######################
#######################################################################

if [ $t -eq 0 ]
then
echo "-t is mandatory, please fix t in value 1, 2, 3, or 4"
exit
fi


#######################################################################
#################### OPTIONS SUMMARY            #######################
#######################################################################
MY_PATH="`( cd \"$MY_PATH\" && pwd )`"  # absolutized and normalized
if [ -z "$MY_PATH" ] ; then
# error; for some reason, the path is not accessible
# to the script (e.g. permissions re-evaled after suid)
exit 1  # fail
fi
echo -e "\tRunning read2SNPs, in directory "$MY_PATH" with following parameters:"
echo -e "\t\t fasta file containing starters="$starters_file
echo -e "\t\t read_sets="$read_sets
echo -e "\t\t prefix_index="$prefix_index
echo -e "\t\t prefix_results="$prefix_results
echo -e "\t\t k="$k
echo -e "\t\t t="$t
echo -e "\t\t c="$c
echo -e "\t\t d="$d
echo -e "\t\t g="$g
echo -e -n "\t starting date="
date
echo
#######################################################################
#################### END OPTIONS SUMMARY        #######################
#######################################################################






######### CHECK THE k PARITY ##########
rest=$(( $k % 2 ))
if [ $rest -eq 0 ]
then
echo "k=$k is even number, to avoid palindromes we set it to $(($k-1))"
k=$(($k-1))
fi
#######################################


#######################################################################
#################### mapsembler                 #######################
#######################################################################
echo "running $PATH_TOOLS\mapsembler $starters_file $read_sets -t $t -k $k -c $c  -g $g -i $prefix_index -o $prefix_results"
$PATH_TOOLS\mapsembler $starters_file $read_sets -t $t -k $k -c $c  -g $g -i $prefix_index -o $prefix_results
if [ $? -ne 0 ]
then
echo "there was a problem with mapsembler, command line: $PATH_TOOLS\mapsembler $starters_file $read_sets -t $t -k $k -c $c  -g $g -i $prefix_index -o $prefix_results"
exit
fi


#######################################################################
#################### kissreads                  #######################
#######################################################################

echo $prefix_results\_k_$k\_q_25_c_$c\_t_$t.json
if [ $t -gt 2 ]
then
$PATH_TOOLS\kissreads_graph $prefix_results\_k_$k\_q_25_c_$c\_t_$t.json sample_example/reads.fa -t c -M -o $prefix_results\_k_$k\_q_25_c_$c\_t_$t\_coverages.json
res_file=$prefix_results\_k_$k\_q_25_c_$c\_t_$t\_coverages.json
if [ $? -ne 0 ]
then
echo "there was a problem with kissreads_graph"
exit
fi
fi
#/Users/grizk/gassst  -d test_k_31_c_4_coherent.fa  -i /Users/grizk/kissnp2/kissnp2_tool/snp_list_readsnp  -p 100 -w 15 -m 8 -l 0 -r 1 -o temp;



echo "mapsembler done, results are in $prefix_results\_k_$k\_q_25_c_$c\_t_$t\_coverages.json"
echo -e -n "\t ending date="
date
echo -e "\t Thanks for using mapsembler - http://http://colibread.inria.fr/mapsembler/"


