#!/bin/bash

# ADD YOUR PATH TO SAMTOOLS HERE!!!!!
SAMTOOLS=/Users/zach/Desktop/bioinformatics/tools/samtools
#

# grab the latest source from the repository
#
if [ ! -d "neat-genreads" ]; then
    git clone https://github.com/zstephens/neat-genreads.git
fi
cd neat-genreads
git pull
cd ../

# learn mutation model from input vcf
#
python neat-genreads/utilities/genMutModel.py -r chr1_subset.fa -m mutations_tumor.vcf -o myMutModel.p --no-whitelist

# generate separate tumor/normal datasets
#    normal coverage: 80x - contains random germline mutations
#    tumor coverage:  20x - contains the same germline variants, plus our tumor variants
#
python neat-genreads/genReads.py -r chr1_subset.fa -R 101 --pe 300 30 -c 80 -M 0.002 -o output_normal --vcf

python neat-genreads/genReads.py -r chr1_subset.fa -R 101 --pe 300 30 -c 20 -m myMutModel.p -o output_tumor -v output_normal_golden.vcf --vcf

# merge tumor/normal reads together
cat output_normal_read1.fq output_tumor_read1.fq > output_merged_read1.fq
cat output_normal_read2.fq output_tumor_read2.fq > output_merged_read2.fq

# you will wind up with the following files:
#
#    - output_merged_read*.fq   : read files for mixed tumor/normal data. Suitable as input to a somatic variant calling workflow
#    - output_normal_golden.vcf : germline mutations
#    - output_tumor_golden.vcf  : germline + tumor mutations
#
