changeset 0:c44c43d185ef draft default tip

NEUMA-1.2.1 Uploaded
author chawhwa
date Thu, 08 Aug 2013 00:46:13 -0400
parents
children
files NEUMA-1.2.1/MA2LVKM_single_4.pl NEUMA-1.2.1/NIR2LVKM.pl NEUMA-1.2.1/NIR2LVKM_SE.pl NEUMA-1.2.1/README NEUMA-1.2.1/auto_NEUMA_PE.pl NEUMA-1.2.1/auto_NEUMA_SE.pl NEUMA-1.2.1/bowtie2genecount.11.pl NEUMA-1.2.1/bowtieout2insertlendis.PE.pl NEUMA-1.2.1/bowtieout2mappingstat.2.pl NEUMA-1.2.1/bowtieout2mappingstat.3.pl NEUMA-1.2.1/calculate_gEUMA.2.pl NEUMA-1.2.1/calculate_gEUMA.pl NEUMA-1.2.1/calculate_iEUMA.2.pl NEUMA-1.2.1/calculate_iEUMA.pl NEUMA-1.2.1/combine_gFVKM_iFVKM_max.pl NEUMA-1.2.1/filter.best.from.bowtieout.2.pl NEUMA-1.2.1/filter.best.from.bowtieout.3.pl NEUMA-1.2.1/filter_exclusive_NRgene_from_gene_table.pl NEUMA-1.2.1/gNIR2gFVKM.pl NEUMA-1.2.1/iNIR2iFVKM.pl NEUMA-1.2.1/merge_LVKM.pl NEUMA-1.2.1/merge_LVKM_readcount.pl NEUMA-1.2.1/merge_readcount.pl NEUMA-1.2.1/normalize_gFVKM_5.pl NEUMA-1.2.1/normalize_iFVKM_6.pl
diffstat 25 files changed, 2514 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/NEUMA-1.2.1/MA2LVKM_single_4.pl	Thu Aug 08 00:46:13 2013 -0400
@@ -0,0 +1,53 @@
+#!/usr/bin/perl
+
+if(@ARGV<13) { print "usage: $0 sample_name sample_MA_dir midrefix(samplename.midrefix.i.all.gMA) EUMAprefix(before .gEUMA or .iEUMA) gFVKM_out_dir LGPM_out_dir EUMA_cut mapping_stat_column mapping_stat_file presence_of_i_at_end_of_midrefix[1/0] final_adjustment_option[1:average & rescaling/2:max & no rescaling] gene2NM_file gene2symbol_file datatype(R(Refseq)_or_E(Ensembl)) scriptdir\n"; exit; }
+
+my $sample = shift @ARGV;
+my $sample_MA_dir = shift @ARGV;
+my $midrefix = shift @ARGV;
+my $EUMAprefix = shift @ARGV;
+my $FVKM_out_dir = shift @ARGV;
+my $LVKM_out_dir = shift @ARGV;
+my $EUMA_cut = shift @ARGV;
+my $mapping_stat_column = shift @ARGV;
+my $mapping_stat_file = shift @ARGV;
+my $i_after_midrefix = shift @ARGV;
+my $final_adjustment_option = shift @ARGV;
+my $gene2NM_file = shift @ARGV;
+my $gene2symbol_file = shift @ARGV;
+my $datatype = shift @ARGV;
+my $scriptdir = shift @ARGV;
+
+if(!-d $FVKM_out_dir) { `mkdir -p $FVKM_out_dir`; }
+if(!-d $LVKM_out_dir) { `mkdir -p $LVKM_out_dir`;}
+
+$perlcommand= "perl -I $scriptdir";
+
+$gMA2gFVKM_script = "$scriptdir/gMA2gFVKM_2.pl";
+$iMA2iFVKM_script = "$scriptdir/iMA2iFVKM.pl";
+$combine_gFVKM_iFVKM_script = "$scriptdir/combine_gFVKM_iFVKM_max.pl";
+$normalize_gFVKM_script = "$scriptdir/normalize_gFVKM_5.pl";
+$normalize_iFVKM_script = "$scriptdir/normalize_iFVKM_6.pl";
+$NR_filter_script = "$scriptdir/filter_exclusive_NRgene_from_gene_table.pl";
+
+
+#computing gFVKM and iFVKM, from EUMA/iEUMA and gMA/iMA files.
+if($i_after_midrefix eq '1'){ $MA_midrefix = "$midrefix.i";}
+else { $MA_midrefix = $midrefix; }
+`$perlcommand $gMA2gFVKM_script $sample_MA_dir/$sample.$MA_midrefix.all.gMA $EUMAprefix.gEUMA > $FVKM_out_dir/$sample.$midrefix.gFVKM`;
+`$perlcommand $iMA2iFVKM_script $sample_MA_dir/$sample.$MA_midrefix.all.iMA $EUMAprefix.iEUMA > $FVKM_out_dir/$sample.$midrefix.iFVKM`;
+
+#updating gFVKM & iFVKM baed on each other
+`$perlcommand $combine_gFVKM_iFVKM_script $FVKM_out_dir/$sample.$midrefix.gFVKM $FVKM_out_dir/$sample.$midrefix.iFVKM $gene2NM_file $EUMA_cut $FVKM_out_dir/$sample.$midrefix.$EUMA_cut.final.gFVKM $FVKM_out_dir/$sample.$midrefix.$EUMA_cut.final.iFVKM`;
+
+#normalize and compute LGPM/LTPM
+`$perlcommand $normalize_gFVKM_script $FVKM_out_dir/$sample.$midrefix.$EUMA_cut.final.gFVKM $mapping_stat_file $gene2symbol_file $mapping_stat_column $sample $LVKM_out_dir/$sample.$midrefix.$EUMA_cut.gLVKM`;
+`$perlcommand $normalize_iFVKM_script $FVKM_out_dir/$sample.$midrefix.$EUMA_cut.final.iFVKM $mapping_stat_file $gene2symbol_file $mapping_stat_column $sample $LVKM_out_dir/$sample.$midrefix.$EUMA_cut.iLVKM`;
+
+#filter NR-only genes
+if($datatype eq 'R'){
+ `$perlcommand $NR_filter_script $gene2NM_file $LVKM_out_dir/$sample.$midrefix.$EUMA_cut.gLVKM > $LVKM_out_dir/$sample.$midrefix.$EUMA_cut.-NR.gLVKM`;
+ `$perlcommand $NR_filter_script $gene2NM_file $LVKM_out_dir/$sample.$midrefix.$EUMA_cut.iLVKM > $LVKM_out_dir/$sample.$midrefix.$EUMA_cut.-NR.iLVKM`;
+}
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/NEUMA-1.2.1/NIR2LVKM.pl	Thu Aug 08 00:46:13 2013 -0400
@@ -0,0 +1,64 @@
+#!/usr/bin/perl
+
+
+# This version (v.12) has an option of datatype (Refseq vs Ensembl).
+
+if(@ARGV<13) { print "usage: $0 sample_name readcount_dir midrefix(samplename.midrefix.i.all.gMA) EUMA_out_dir FVKM_out_dir LVKM_out_dir EUMA_cut mapping_stat_column mapping_stat_file presence_of_i_at_end_of_midrefix[1/0] final_adjustment_option[1:average & rescaling/2:max & no rescaling] gene2NM_file gene2symbol_file datatype(R(Refseq)_or_E(Ensembl)) scriptdir\n"; exit; }
+
+my $sample = shift @ARGV;
+my $readcount_dir = shift @ARGV;
+my $midrefix = shift @ARGV;
+my $EUMA_out_dir = shift @ARGV;
+my $FVKM_out_dir = shift @ARGV;
+my $LVKM_out_dir = shift @ARGV;
+my $EUMA_cut = shift @ARGV;
+my $mapping_stat_column = shift @ARGV;
+my $mapping_stat_file = shift @ARGV;
+my $i_after_midrefix = shift @ARGV;
+my $final_adjustment_option = shift @ARGV;
+my $gene2NM_file = shift @ARGV;
+my $gene2symbol_file = shift @ARGV;
+my $datatype = shift @ARGV;
+my $scriptdir = shift @ARGV;
+
+if(!-d $EUMA_out_dir) { `mkdir -p $EUMA_out_dir`; }
+if(!-d $FVKM_out_dir) { `mkdir -p $FVKM_out_dir`; }
+if(!-d $LVKM_out_dir) { `mkdir -p $LVKM_out_dir`;}
+
+#$perlcommand = "perl -I $scriptdir";
+$perlcommand = "perl";
+
+$gNIR2gFVKM_script = "$scriptdir/gNIR2gFVKM.pl";
+$iNIR2iFVKM_script = "$scriptdir/iNIR2iFVKM.pl";
+
+$combine_gFVKM_iFVKM_script = "$scriptdir/combine_gFVKM_iFVKM_max.pl";
+
+$normalize_gFVKM_script = "$scriptdir/normalize_gFVKM_5.pl";
+$normalize_iFVKM_script = "$scriptdir/normalize_iFVKM_6.pl";
+$NR_filter_script = "$scriptdir/filter_exclusive_NRgene_from_gene_table.pl";
+
+
+#computing gFVKM and iFVKM, from EUMA/iEUMA and gMA/iMA files.
+#`$perlcommand $gNIR2gFVKM_script $readcount_dir/$sample.$midrefix.gNIR $EUMA_out_dir/$sample.$midrefix.gEUMA > $FVKM_out_dir/$sample.$midrefix.gFVKM`;
+#`$perlcommand $iNIR2iFVKM_script $readcount_dir/$sample.$midrefix.iNIR $EUMA_out_dir/$sample.$midrefix.iEUMA > $FVKM_out_dir/$sample.$midrefix.iFVKM`;
+
+`$gNIR2gFVKM_script $readcount_dir/$sample.$midrefix.gNIR $EUMA_out_dir/$sample.$midrefix.gEUMA > $FVKM_out_dir/$sample.$midrefix.gFVKM`;
+`$iNIR2iFVKM_script $readcount_dir/$sample.$midrefix.iNIR $EUMA_out_dir/$sample.$midrefix.iEUMA > $FVKM_out_dir/$sample.$midrefix.iFVKM`;
+
+#updating gFVKM & iFVKM baed on each other
+`$perlcommand $combine_gFVKM_iFVKM_script $FVKM_out_dir/$sample.$midrefix.gFVKM $FVKM_out_dir/$sample.$midrefix.iFVKM $gene2NM_file $EUMA_cut $FVKM_out_dir/$sample.$midrefix.$EUMA_cut.final.gFVKM $FVKM_out_dir/$sample.$midrefix.$EUMA_cut.final.iFVKM`;
+
+#normalize and compute LGPM/LTPM
+#`$perlcommand $normalize_gFVKM_script $FVKM_out_dir/$sample.$midrefix.$EUMA_cut.final.gFVKM $mapping_stat_file $gene2symbol_file $mapping_stat_column $sample $LVKM_out_dir/$sample.$midrefix.$EUMA_cut.gLVKM`;
+#`$perlcommand $normalize_iFVKM_script $FVKM_out_dir/$sample.$midrefix.$EUMA_cut.final.iFVKM $mapping_stat_file $gene2symbol_file $mapping_stat_column $sample $LVKM_out_dir/$sample.$midrefix.$EUMA_cut.iLVKM`;
+
+`$normalize_gFVKM_script $FVKM_out_dir/$sample.$midrefix.$EUMA_cut.final.gFVKM $mapping_stat_file $gene2symbol_file $mapping_stat_column $sample $LVKM_out_dir/$sample.$midrefix.$EUMA_cut.gLVKM`;
+`$normalize_iFVKM_script $FVKM_out_dir/$sample.$midrefix.$EUMA_cut.final.iFVKM $mapping_stat_file $gene2symbol_file $mapping_stat_column $sample $LVKM_out_dir/$sample.$midrefix.$EUMA_cut.iLVKM`;
+
+#filter NR-only genes
+if($datatype eq 'R'){
+ `$perlcommand $NR_filter_script $gene2NM_file $LVKM_out_dir/$sample.$midrefix.$EUMA_cut.gLVKM > $LVKM_out_dir/$sample.$midrefix.$EUMA_cut.-NR.gLVKM`;
+ `$perlcommand $NR_filter_script $gene2NM_file $LVKM_out_dir/$sample.$midrefix.$EUMA_cut.iLVKM > $LVKM_out_dir/$sample.$midrefix.$EUMA_cut.-NR.iLVKM`;
+}
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/NEUMA-1.2.1/NIR2LVKM_SE.pl	Thu Aug 08 00:46:13 2013 -0400
@@ -0,0 +1,60 @@
+#!/usr/bin/perl
+
+if(@ARGV<13) { print "usage: $0 sample_name readcount_dir midrefix(samplename.midrefix.i.all.gMA) EUMAprefix(before .gEUMA or .iEUMA) gFVKM_out_dir LGPM_out_dir EUMA_cut mapping_stat_column mapping_stat_file presence_of_i_at_end_of_midrefix[1/0] final_adjustment_option[1:average & rescaling/2:max & no rescaling] gene2NM_file gene2symbol_file datatype(R(Refseq)_or_E(Ensembl)) scriptdir\n"; exit; }
+
+my $sample = shift @ARGV;
+my $readcount_dir = shift @ARGV;
+my $midrefix = shift @ARGV;
+my $EUMAprefix = shift @ARGV;
+my $FVKM_out_dir = shift @ARGV;
+my $LVKM_out_dir = shift @ARGV;
+my $EUMA_cut = shift @ARGV;
+my $mapping_stat_column = shift @ARGV;
+my $mapping_stat_file = shift @ARGV;
+my $i_after_midrefix = shift @ARGV;
+my $final_adjustment_option = shift @ARGV;
+my $gene2NM_file = shift @ARGV;
+my $gene2symbol_file = shift @ARGV;
+my $datatype = shift @ARGV;
+my $scriptdir = shift @ARGV;
+
+if(!-d $FVKM_out_dir) { `mkdir -p $FVKM_out_dir`; }
+if(!-d $LVKM_out_dir) { `mkdir -p $LVKM_out_dir`;}
+
+#$perlcommand= "perl -I $scriptdir";
+$perlcommand= "perl";
+
+$gNIR2gFVKM_script = "$scriptdir/gNIR2gFVKM.pl";
+$iNIR2iFVKM_script = "$scriptdir/iNIR2iFVKM.pl";
+$combine_gFVKM_iFVKM_script = "$scriptdir/combine_gFVKM_iFVKM_max.pl";
+$normalize_gFVKM_script = "$scriptdir/normalize_gFVKM_5.pl";
+$normalize_iFVKM_script = "$scriptdir/normalize_iFVKM_6.pl";
+$NR_filter_script = "$scriptdir/filter_exclusive_NRgene_from_gene_table.pl";
+
+
+#computing gFVKM and iFVKM, from EUMA/iEUMA and gMA/iMA files.
+if($i_after_midrefix eq '1'){ $MA_midrefix = "$midrefix.i";}
+else { $MA_midrefix = $midrefix; }
+#`$perlcommand $gNIR2gFVKM_script $readcount_dir/$sample.$midrefix.gNIR $EUMAprefix.gEUMA > $FVKM_out_dir/$sample.$midrefix.gFVKM`;
+#`$perlcommand $iNIR2iFVKM_script $readcount_dir/$sample.$midrefix.iNIR $EUMAprefix.iEUMA > $FVKM_out_dir/$sample.$midrefix.iFVKM`;
+
+`$gNIR2gFVKM_script $readcount_dir/$sample.$midrefix.gNIR $EUMAprefix.gEUMA > $FVKM_out_dir/$sample.$midrefix.gFVKM`;
+`$iNIR2iFVKM_script $readcount_dir/$sample.$midrefix.iNIR $EUMAprefix.iEUMA > $FVKM_out_dir/$sample.$midrefix.iFVKM`;
+
+#updating gFVKM & iFVKM baed on each other
+`$perlcommand $combine_gFVKM_iFVKM_script $FVKM_out_dir/$sample.$midrefix.gFVKM $FVKM_out_dir/$sample.$midrefix.iFVKM $gene2NM_file $EUMA_cut $FVKM_out_dir/$sample.$midrefix.$EUMA_cut.final.gFVKM $FVKM_out_dir/$sample.$midrefix.$EUMA_cut.final.iFVKM`;
+
+#normalize and compute LGPM/LTPM
+#`$perlcommand $normalize_gFVKM_script $FVKM_out_dir/$sample.$midrefix.$EUMA_cut.final.gFVKM $mapping_stat_file $gene2symbol_file $mapping_stat_column $sample $LVKM_out_dir/$sample.$midrefix.$EUMA_cut.gLVKM`;
+#`$perlcommand $normalize_iFVKM_script $FVKM_out_dir/$sample.$midrefix.$EUMA_cut.final.iFVKM $mapping_stat_file $gene2symbol_file $mapping_stat_column $sample $LVKM_out_dir/$sample.$midrefix.$EUMA_cut.iLVKM`;
+
+`$normalize_gFVKM_script $FVKM_out_dir/$sample.$midrefix.$EUMA_cut.final.gFVKM $mapping_stat_file $gene2symbol_file $mapping_stat_column $sample $LVKM_out_dir/$sample.$midrefix.$EUMA_cut.gLVKM`;
+`$normalize_iFVKM_script $FVKM_out_dir/$sample.$midrefix.$EUMA_cut.final.iFVKM $mapping_stat_file $gene2symbol_file $mapping_stat_column $sample $LVKM_out_dir/$sample.$midrefix.$EUMA_cut.iLVKM`;
+
+#filter NR-only genes
+if($datatype eq 'R'){
+ `$perlcommand $NR_filter_script $gene2NM_file $LVKM_out_dir/$sample.$midrefix.$EUMA_cut.gLVKM > $LVKM_out_dir/$sample.$midrefix.$EUMA_cut.-NR.gLVKM`;
+ `$perlcommand $NR_filter_script $gene2NM_file $LVKM_out_dir/$sample.$midrefix.$EUMA_cut.iLVKM > $LVKM_out_dir/$sample.$midrefix.$EUMA_cut.-NR.iLVKM`;
+}
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/NEUMA-1.2.1/README	Thu Aug 08 00:46:13 2013 -0400
@@ -0,0 +1,284 @@
+###########################
+# README for NEUMA v1.2.0 #
+###########################
+
+## Additional information and files can be obtained from the NEUMA website, http://neuma.kobic.re.kr.
+## Inquiries can be written to duplexa@gmail.com.
+
+# This version has removed the extra tab in the iNIR file. (version 1.2.1)
+# This version has fixed the problem of missing some reads at the 5'end in paired-end data. (Version 1.2.1)
+
+# This version has the following changes.(version 1.2.0)
+## The intermediate cmbt and MA files are not produced and mapping stat, insertlendis and read counts are directly computed from bowtie output files. This drastically improves on memory and speed and uses less hard disk space.
+## A subdirectory named 'readcount' will be generated in which gNIR, iNIR and gReadcount files will be placed.
+## A new option of generating all read counts (not only gene-wise or isoform-wise informative reads) for each gene (--gReadcount option). It is possible to get only these numbers and not compute NIR (--noNIR), EUMA, FVKM and LVKM (--noNEUMA).
+## auto_NEUMA_PE.pl can be used either for initial run to determine maximum insert length (--only_init) for after-initial run (--skip_init) ,or both (default).
+## Mismatches are allowed (--mm), but the alignments are filtered for all best-matching alignments.
+
+
+# This version can handle the newer fastq format for paired-end case. (version 1.1.5)
+# This version can handle the newer fastq format with a space in the sequencd ID. (version 1.1.4)
+# This version includes two distinct strand-sepcificity options (S for forward and R for reverse strand). (version 1.1.3)
+# This version includes a script that generates merged LVKM file and NIR files that can be used for diffNEUMA, to identify differentially expression genes/isoforms. (version 1.1.2)
+# This version runs dos2unix for gene2NM and gene2symbol files in the beginning. (version 1.1.1)
+# This version handles strand-specific data. (version 1.1.1)
+# This version allows multi-thread option for bowtie. (version 1.1.0)
+# This version uses a different way to take argument. (usage has changed). (version 1.1.0)
+# This version fixed a bug in reading mapping stat file in the Ensembl mode. (version 1.0.5)
+# This version handles Ensembl data. (version 1.0.4)
+# This version handles SOLiD colorspace data. (version 1.0.3) 
+
+
+**** Table of contents ****
+
+  1. Installation
+  2. Bowtie
+  3. Ingredients
+  4. How to run
+  5. Output files
+  6. Preliminary run
+  7. Mergin output files
+
+***************************
+
+
+
+
+
+1. Installation
+
+ No Installation is required. Simply, create a directory (eg. neumadir) and extract the .tar.gz file in the directory. Then, make all the perl scripts executable by the following command:
+
+chmod a+x *.pl
+
+
+2. Bowtie
+
+ Please download and install bowtie from the bowtie website http://bowtie-bio.sourceforge.net/index.shtml, following the developers' instruction. The reference index file must be created, using the same fasta file used for generating the gU and iU tables (gEUMA and iEUMA tables). The fasta file can be obtained from the NEUMA website.
+
+
+
+3. Ingredients
+
+ Before you run NEUMA, you need the following files.
+
+    * a fasta file containing raw sequences (single-end) 
+       or a pair of fasta-files containing raw sequences, each mate pair having the same unique ID (paired-end).
+    * bowtie reference index file
+    * gEUMA and iEUMA tables (single-end) 
+       or gU table and iU table (paired-end)
+    * gene2NM file
+    * gene2symbol file
+
+
+ Note that the gene2NM file must be matched to the initial reference fasta file that gU and iU tables (gEUMA and iEUMA tables) were created from. The gU and iU tables (gEUMA and iEUMA tables) along with gene2NM and gene2symbol files can be obtained from the NEUMA website.
+
+
+
+4. How to run
+
+Two scripts, auto_NEUMA_PE.pl (paired-end) and auto_NEUMA_SE.pl (single-end) are what you need and these scripts run the other scripts automatically.
+
+
+  ## paired-end case
+  usage: ./auto_NEUMA_PE.pl [options] -L=<read_length> -D=<maxdist> -1=<input_file1(mate1)> -2=<input_file2(mate2)> -U=<Utable_prefix(fullpath, before .gU.table or .iU.table)> --g2m=<gene2NM_file> --g2s=<gene2symbol_file> -b=<bowtie_dir(eg.bin/bowtie-0.12.5)> --bi=<bowtieindex> -o=<outputdir> -s=<sample_name>
+
+    The order of Arguments and options can be arbitrary.
+
+    ** required arguments **
+    * -L=<read_length> : read_length(eg.36) : sequenced length of a read mate (/not/ the insert length or sum of the two mate lengths) (no default : L must be specified)
+    * -D=<maxdist> : maxdist(eg.400) : maximum outer distance between mates (insert size). This must be identical to the maxdist used for generating gU and iU tables. (no default : D must be specified)
+    * -1=<input_file1(mate1)>  : fasta or fastq file of a series of read mate 1
+    * -2=<input_file2(mate2)>  : fasta or fastq file of a series of read mate 2. The ID of each mate 2 must be matched to that of mate 1 in case of fasta file.
+    * -U=<Utable_prefix(fullpath, before .gU.table or .iU.table)> : the path to the gU.table and iU.table files, except their extension. If you placed your gU.table and iU.table files in /home/paired-end/Utable/ and the file names are hg19.refMrna.L36.D250.gU.table and hg19.refMrna.L36.D250.iU.table, then the value for this argument is '/home/paired-end/Utable/hg19.refMrna.L36.D250'. The files must be matched to the read length, maxdist and reference transcriptome sequence.
+    * --g2m=<gene2NM_file> : gene2NM file, matched to the reference transcriptome model used for generating the gU and iU tables.
+    * --g2s=<gene2symbol_file> : gene2symbol file, containing at least all of the genes in the reference transcriptome sequence.
+    * -b=<bowtie_dir(eg.bin/bowtie-0.12.7)> : directory in which bowtie executable is installed. Either full path or relative path must be used and '~' must be avoided.
+    * --bi=<bowtieindex> : the index file prefix of the reference transcriptome sequence, created by bowtie (bowtie-build). It is the same string put as the reference index argument for the bowtie program. (see bowtie manual)
+    * -o=<outputdir> : a directory that will contain all the output files. If the directory does not exist, the program will create it automatically.
+    * -s=<sample_name> : name of the sample that will be used as the prefix of all the output files.
+
+    ** options **
+    * -f=<file_type> : fasta(f)_or_fastq(q) : f if input files are in fasta format; q if in fastq format. (default : q)
+    * -c=<coding_option> : nucleotide(n)_or_colorspace(c) : n if input files are in DNA sequence (A,C,G,T); c if in colorspace (SOLiD platform). Note that if colorspace is used, the colorspace version of bowtie index file must be used. (default : n)
+    * -t=<euma_cut> : EUMAcut(eg.50) : The cut off of EUMA that determines measurable genes and transcripts. (default : 50)
+    * -d=<data_type> : Refseq data(R) or Ensemble data(E). The R option uses the 'NM' and 'NR' prefices in RefSeq data to discriminate between mRNA and ncRNA. If your reference doesn't have these prefices, use the E option. (default : R) 
+    * -p=<num_cpu> : number of cpu's to use for Bowtie. (default : 1)
+    * --str=<strand_specificity> : Strand-specific(S) vs Non-Strand-specific(N). For strand-specific data, strand-specific U tables must be used. (default : N)
+    * --mm=<number_of_mismatches_allowed> : setting number of mismatches allowed (default : 0)
+    * --gReadcount : Compute the number of all reads (not just gene-wise and isoform-wise informative reads) for each gene.
+    * --noNIR : do not compute NIR. (This option must be used together with --noNEUMA)
+    * --noNEUMA : do not compute EUMA, FVKM and LVKM (but compute NIR unless used with --noNIR).
+    * --only_init : run only the initial part of bowtie-mapping and insert-lendis calculation, preferably with a relatively large -D value (which is way over expected maximum, eg. 1000). A more detailed description can be found below in section 6.
+    * --skip_init : given the maximum insert length to be used is decided, skip the initial bowtie mapping and insert-lendis part and use the output files from the previous run. (No need to run bowtie again with the new -D value because length filtering will be done at the downstream steps.
+
+
+  ## single-end case
+  usage: ./auto_NEUMA_SE.pl [options] -L=<read_length> -i=<input_file> -U=<EUMA_prefix(fullpath, before .gEUMA or .iEUMA)> --g2m=<gene2NM_file> --g2s=<gene2symbol_file> -b=<bowtie_dir(eg.bin/bowtie-0.12.5)> --bi=<bowtieindex> -o=<outputdir> -s=<sample_name>
+
+    The order of Arguments and options can be arbitrary.
+
+    ** required arguments **
+    * -L=<read_length> : read_length(eg.36) : sequenced length of a read mate (/not/ the insert length or sum of the two mate lengths) (no default : L must be specified)
+    * -i=<input_file> : fasta file of a series of sequenced reads
+    * -U=<EUMA_prefix(fullpath, before .gEUMA or .iEUMA)> : the path to the gEUMA and tEUMA files, except their extension. If you placed your gEUMA and iEUMA files in /home/single-end/EUMA/ and the file names are hg19.refMrna.L36.single.gEUMA and hg19.refMrna.L36.single.iEUMA, then the value for this argument is '/home/single-end/EUMA/hg19.refMrna.L36.single'.
+    * --g2m=<gene2NM_file> : same as paired-end case
+    * --g2s=<gene2symbol_file> : same as paired-end case
+    * -b=<bowtiedir(eg.bin/bowtie-0.12.5)> : same as paired-end case
+    * --bi=<bowtieindex> : same as paired-end case
+    * -o=<outputdir> : same as paired-end case
+    * -s=<samplename> : same as paired-end case
+
+    ** options **
+    * -f=<file_type> : fasta(f)_or_fastq(q) : f if input files are in fasta format; q if in fastq format. (default : q)
+    * -c=<coding_option> : nucleotide(n)_or_colorspace(c) : n if input files are in DNA sequence (A,C,G,T); c if in colorspace (SOLiD platform). Note that if colorspace is used, the colorspace version of bowtie index file must be used. (default : n)
+    * -t=<euma_cut> : EUMAcut(eg.50) : The cut off of EUMA that determines measurable genes and transcripts. (default : 50)
+    * -d=<data_type> : Refseq data(R) or Ensemble data(E). The R option uses the 'NM' and 'NR' prefices in RefSeq data to discriminate between mRNA and ncRNA. If your reference doesn't have these prefices, use the E option. (default : R) 
+    * -p=<num_cpu> : number of cpu's to use for Bowtie. (default : 1)
+    * --str=<strand_specificity> : Strand-specific, forward(S), strand-specific, reverse (R), and Non-Strand-specific(N). For strand-specific data, strand-specific EUMA tables must be used. (default : N) 
+    * --mm=<number_of_mismatches_allowed> : setting number of mismatches allowed (default : 0)
+    * --gReadcount : Compute the number of all reads (not just gene-wise and isoform-wise informative reads) for each gene.
+    * --noNIR : do not compute NIR. (This option must be used together with --noNEUMA)
+    * --noNEUMA : do not compute EUMA, FVKM and LVKM (but compute NIR unless used with --noNIR).
+    
+
+Example usages are as follows:
+
+# paired-end
+neumadir/auto_NEUMA_PE.pl --only_init -f=f -L=36 -D=1000 --mm2 -1=MKN28.1.fa -2=MKN28.2.fa -U=U.tables/hg19.refMrna.L36.D250 --g2m=gene2NM.human.fastafiltered --g2s=gene2symbol.human -b=bin/bowtie-0.12.7 --bi=ebwt/hg19.RefmRNA -o=MKN28.hg19 -s=MKN28
+# This command will run initial bowtie mapping, mapping stat and insert length distribution.
+neumadir/auto_NEUMA_PE.pl --skip_init --gReadcount --noNEUMA --mm2 -f=f -L=36 -D=250 -1=MKN28.1.fa -2=MKN28.2.fa -U=U.tables/hg19.refMrna.L36.D250 --g2m=gene2NM.human.fastafiltered --g2s=gene2symbol.human -b=bin/bowtie-0.12.7 --bi=ebwt/hg19.RefmRNA -o=MKN28.hg19 -s=MKN28
+# This command will skip bowtie-mapping and insert length distribution and use the files from the previous (eg. above) run, and reports counts of all reads (gReadcount), gNIR and iNIR, and will not compute EUMA, FVKM, LVKM.
+# Only reads with insert length up to 250 will be used, although the bowtie output from the previous run contains larger insert lengths.
+
+
+# single-end
+auto_NEUMA_SE.pl --noNEUMA --mm2 -L=36 -t=30 -p=2 -i=MKN28.txt -U=hg19.refMrna.L36.single --g2m=gene2NM.human.fastafiltered --g2s=gene2symbol.human -b=../bin/bowtie-0.12.5 --bi=ebwt/hg19.RefmRNA -o=MKN28.hg19 -s=MKN28
+# This command will only do bowtie-mapping and reports mapping stat and gNIR and iNIR.
+
+
+
+5. OUTPUT Files
+
+   ## paired-end
+
+   For Refseq, the final log2(x+1) transformed values of FVKM (LVKM) can be found in  outputdir/LVKM/samplename.ebwtname.maxinsMAX_DIST.mm0.-EUMAcut.-NR.gLVKM (gene-wise) and outputdir/LVKM/samplename.ebwtname.maxinsMAX_DIST.mm0.-EUMAcut.-NR.iLVKM (isoform-wise).
+
+   Files containing '-NR' excludes genes with no mRNA (eg. rRNA, tRNA, snRNA, snoRNA and other small RNA genes). If your sample is poly-A-selected, these RNAs cannot be quantified along with mRNAs. If your sample did not use any enrichment step and mRNAs and ncRNAs are represented in their exact proportion as in the cell, you can use the LVKM files without the '-NR' tag. The ncRNAs were removed at the last step because reads mapping to both an mRNA and an ncRNA must be considered a multi-read. For Ensembl, -NR version are not provided (ncRNAs are not removed).
+
+  
+
+   The unadjusted and adjusted FVKM values (as 'FVK' and 'FVKM' for before and after sample-normalization, respectively) can be found in the LVKM files as well, along with gEUMA and iEUMA values and the number of isoforms and the number of measurable isoforms.
+
+   The mapping stat can be found in outputdir/mapping_stat.samplename.
+   
+   The NIR and/or readcount values can be found in outputdir/readcount/samplename.ebwtname.maxinsMAX_DIST.mm0.gNIR, outputdir/readcount/samplename.ebwtname.maxinsMAX_DIST.mm0.iNIR and/or outputdir/readcount/samplename.ebwtname.maxinsMAX_DIST.mm0.gReadcount
+
+   The EUMA values can be found in outputdir/EUMA/samplename.ebwtname.maxinsMAX_DIST.mm0.gEUMA and outputdir/EUMA/samplename.ebwtname.maxinsMAX_DIST.mm0.iEUMA.
+
+   The insert length distribution can be found in outputdir/insertlendis/samplename.ebwtname.maxinsMAX_DIST.mm0.i.insertlendis.
+
+   If --mm option was used, replace 'mm0' in the above file names with 'mm1' or 'mm2'.
+
+
+   ## single-end
+
+   The formats for LVKM and NIR files(gMA & iMA) are the same as in the paired-end case. The LVKM file names are outputdir/LVKM/samplename.ebwtname.maxinsMAX_DIST.mm0.single.-EUMAcut.-NR.gLVKM (gene-wise) and outputdir/LVKM/samplename.ebwtname.maxinsMAX_DIST.mm0.single.-EUMAcut.-NR.iLVKM (isoform-wise). The NIR values can be found in outputdir/MA/samplename.ebwtname.maxinsMAX_DIST.mm0.single.all.gMA (the #uniq.common column) and outputdir/MA/samplename.ebwtname.maxins400.mm0.single.all.iMA (the #uniq column).
+
+   The mapping stat can be found in outputdir/mappint_stat.samplename.single.
+
+
+
+
+6. Preliminary run
+
+   Given a new data set, users can run the first part of auto_NEUMA_PE.pl, without the U.tables, to find out the insert length distribution and mapping stats. Based on the length distribution, the user can determine a safe MAXDIST and request us to build U.tables (through the NEUMA website). After having the U.tables ready, the user can run the latter part of auto_NEUMA_PE.pl. Note that -U, --g2m and --g2s are not required for this run.
+
+   * Usages:
+
+    auto_NEUMA_PE.pl --only_init [options] -L=<read_length> -D=<maxdist> -1=<input_file1(mate1)> -2=<input_file2(mate2)> -b=<bowtie_dir(eg.bin/bowtie-0.12.5)> --bi=<bowtieindex> -o=<outputdir> -s=<sample_name>
+
+    ** required arguments **
+    * -L=<read_length> : read_length(eg.36) : sequenced length of a read mate (/not/ the insert length or sum of the two mate lengths) (no default : L must be specified)
+    * -D=<maxdist> : maxdist(eg.400) : maximum outer distance between mates (insert size). This must be identical to the maxdist used for generating gU and iU tables. (no default : D must be specified)
+    * -1=<input_file1(mate1)>  : fasta or fastq file of a series of read mate 1
+    * -2=<input_file2(mate2)>  : fasta or fastq file of a series of read mate 2. The ID of each mate 2 must be matched to that of mate 1 in case of fasta file.
+    * -b=<bowtie_dir(eg.bin/bowtie-0.12.7)> : directory in which bowtie executable is installed. Either full path or relative path must be used and '~' must be avoided.
+    * --bi=<bowtieindex> : the index file prefix of the reference transcriptome sequence, created by bowtie (bowtie-build). It is the same string put as the reference index argument for the bowtie program. (see bowtie manual)
+    * -o=<outputdir> : a directory that will contain all the output files. If the directory does not exist, the program will create it automatically.
+    * -s=<sample_name> : name of the sample that will be used as the prefix of all the output files.
+
+    ** options **
+    * -f=<file_type> : fasta(f)_or_fastq(q) : f if input files are in fasta format; q if in fastq format. (default : q)
+    * -c=<coding_option> : nucleotide(n)_or_colorspace(c) : n if input files are in DNA sequence (A,C,G,T); c if in colorspace (SOLiD platform). Note that if colorspace is used, the colorspace version of bowtie index file must be used. (default : n)
+    * -p=<num_cpu> : number of cpu's to use for Bowtie. (default : 1)
+    * --str=<strand_specificity> : Strand-specific(S) vs Non-Strand-specific(N). (default : N)
+    * --mm=<number_of_mismatches_allowed> : setting number of mismatches allowed (default : 0)
+
+   * The maxdist for this run can be set to something very large, eg. 1000. Then, for the real runs using the --skip_init option, the maxdist must be set identical to the one used to build the U.tables.
+
+   * For single-end reads, insert length distribution does not have to be pre-determined. Simply tell us the read length to request the EUMA tables for single-end data.
+  
+   ** output **
+   The quickest way to check the output insert length distribution is to look at the insertlendis directory under outputdir.
+
+
+
+
+
+7. Merging output files
+
+
+   1) merge_LVKM.pl
+
+   The script produces a text file that contains gLVKM or iLVKM values for all samples for all genes/isoforms.
+
+   usage: ./merge_LVKM.pl genewise/isoformwise[g/i] EUMAcut LVKM_out_dir NR(1/0) > output.gLVKM(iLVKM).merged
+
+   Example usage:
+   ./merge_LVKM.pl g 50 data/LVKM 0 > data/LVKM/all.gLVKM.merged
+   ./merge_LVKM.pl i 50 data/LVKM 0 > data/LVKM/all.iLVKM.merged
+
+   * genewise/isoformwise[g/i] : put g for gLVKM and i for iLVKM.
+   * EUMAcut : put the same EUMA cut off used to generate the LVKM files. This will be used to recognize the files.
+   * LVKM_out : This is usually your_basedir/LVKM, which contains all the LVKM files generated. All the LVKM files in this directory that matches the EUMAcut specified will be included in the merged table.
+   * NR(1/0) : 1 means the script must use the LVKM files generated after the noncoding RNAs starting with the NR prefix are removed. 0 is otherwise.
+
+
+   2) merge_LVKM_readcount.pl
+
+
+   This script produces a gNIR / iNIR file that contains the read counts for all samples for all genes/isoforms. The gNIR / iNIR files can be fed to diffNEUMA (http://neuma.kobic.re.kr), for identification of differentially expressed genes/isoforms. This script is identical to merge_NEUMA_readcount.pl in previous versions.
+
+   usage: ./merge_LVKM_readcount.pl genewise/isoformwise[g/i] EUMAcut LVKM_out_dir NR(1/0) > output.gLVKM(iLVKM).merged
+
+   Example usage:
+   ./merge_LVKM_readcount.pl g 10 data/LVKM 1 > data/LVKM/all.gNIR
+   ./merge_LVKM_readcount.pl i 10 data/LVKM 1 > data/LVKM/all.iNIR
+
+   * genewise/isoformwise[g/i] : put g for gLVKM and i for iLVKM.
+   * EUMAcut : put the same EUMA cut off used to generate the LVKM files. This will be used to recognize the files.
+   * LVKM_out : This is usually your_basedir/LVKM, which contains all the LVKM files generated. All the LVKM files in this directory that matches the EUMAcut specified will be included in the NIR file.
+   * NR(1/0) : 1 means the script must use the LVKM files generated after the noncoding RNAs starting with the NR prefix are removed. 0 is otherwise.
+
+
+
+   3) merge_readcount.pl
+
+
+   This script produces a gNIR.merged / iNIR.merged / gReadcount.merged file that contains the read counts for all samples. The gNIR / iNIR /gReadcount files can be fed to diffNEUMA (http://neuma.kobic.re.kr), for identification of differentially expressed genes/isoforms. The result is not filtered for EUMA cut or NR as in merge_LVKM_readcount.pl.
+
+   usage: ./merge_readcount.pl type[gNIR/iNIR/gReadcount] readcount_dir > output.gNIR(iNIR/gReadcount).merged
+
+   Example usage:
+   ./merge_readcount.pl gNIR data/readcount > data/readcount/all.gNIR.merged
+   ./merge_readcount.pl iNIR data/readcount > data/readcount/all.iNIR.merged
+
+   * type (gNIR/iNIR/gReadcount) : the type of the read counts to be merged
+   * readcount_dir : This is usually your_basedir/readcount, which contains all the readcount files generated.
+
+
+//
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/NEUMA-1.2.1/auto_NEUMA_PE.pl	Thu Aug 08 00:46:13 2013 -0400
@@ -0,0 +1,218 @@
+#!/usr/bin/perl
+
+if(@ARGV<8) { print "usage: $0 [options] -L=<read_length> -D=<maxdist> -1=<input_file1(mate1)> -2=<input_file2(mate2)> -U=<Utable_prefix(fullpath, before .gU.table or .iU.table)> --g2m=<gene2NM_file> --g2s=<gene2symbol_file> -b=<bowtie_dir(eg.bin/bowtie-0.12.7)> --bi=<bowtieindex> -o=<outputdir> -s=<sample_name>\n\nOrder of arguments can be interchangeable.\n\n"; exit; }
+
+
+## options ##
+($fastoption)= grep /^-f=/, @ARGV;
+if(!defined($fastoption)){$fastoption='q'; }
+else { $fastoption =~s/^-f=//; if($fastoption !~/^[fq]$/) { die "ERROR: wrong file type (-f).\n"; } }
+
+($coding_option)= grep /^-c=/, @ARGV;
+if(!defined($coding_option)){$coding_option='n'; }
+else { $coding_option =~s/^-c=//; if($coding_option !~/^[nc]$/) { die "ERROR: wrong coding option (-c).\n"; } }
+
+($datatype)= grep /^-d=/, @ARGV;
+if(!defined($datatype)){$datatype='R'; }
+else { $datatype =~s/^-d=//; if($datatype !~/^[RE]$/) { die "ERROR: wrong data type (-d).\n"; } }
+
+($parallel)= grep /^-p=/, @ARGV;
+if(!defined($parallel)){$parallel=1; }
+else { $parallel =~s/^-p=//; if($parallel !~/^[\d]+$/) { die "ERROR: wrong parallel (multi-thread) option (-p).\n"; } }
+
+($EUMAcut)= grep /^-t=/, @ARGV;
+if(!defined($EUMAcut)){$EUMAcut=50; }
+else { $EUMAcut =~s/^-t=//; if($EUMAcut !~/^[\d\.]+$/) { die "ERROR: wrong EUMAcut (-t).\n"; } }
+
+($StrandSpecificity)= grep /^--str=/, @ARGV;
+if(!defined($StrandSpecificity)){ $StrandSpecificity='N'; }
+else { $StrandSpecificity =~s/^--str=//; if($StrandSpecificity !~/^[SN]+$/) { die "ERROR: wrong strand specificity (--str).\n"; } }
+
+($skip_init)= grep /^--skip_init/, @ARGV;
+if(!defined($skip_init)){ $skip_init=0; }
+else { $skip_init=1; }
+
+($only_init)= grep /^--only_init/, @ARGV;
+if(!defined($only_init)){ $only_init=0; }
+else { $only_init=1; }
+
+if($only_init==1 && $skip_init==1) { die "ERROR: --only_init and --skip_init cannot be used simultaneously.\n"; }
+
+($noNIR)= grep /^--noNIR/, @ARGV;
+if(!defined($noNIR)){ $noNIR=0; }
+else { $noNIR=1; }
+
+($noNEUMA)= grep /^--noNEUMA/, @ARGV;
+if(!defined($noNEUMA)){ $noNEUMA=0; }
+else { $noNEUMA=1; }
+
+if($noNIR==1 && $noNEUMA==0) { die "ERROR: --noNIR must be used together with --noNEUMA.\n"; }
+
+($get_gReadcount)= grep /^--gReadcount/, @ARGV;
+if(!defined($get_gReadcount)){ $get_gReadcount=0; }
+else { $get_gReadcount=1; }
+
+($mm)= grep /^--mm=/, @ARGV;
+if(!defined($mm)){ $mm=0; }
+else { $mm =~s/^--mm=//; if($mm !~/^[\d\.]+$/) { die "ERROR: wrong number of mismatches (--mm).\n"; } }
+
+
+
+## required arguments ##
+
+($READ_LENGTH)= grep /^-L=/, @ARGV;
+if(!defined($READ_LENGTH)){ die "ERROR: READ_LENGTH must be specified (-L).\n"; }
+else { $READ_LENGTH =~s/^-L=//; if($READ_LENGTH !~/^[\d]+$/) { die "ERROR: wrong READ_LENGTH (-L).\n"; } }
+
+($MAXDIST)= grep /^-D=/, @ARGV;
+if(!defined($MAXDIST)){ die "ERROR: MAXDIST must be specified (-D).\n"; }
+else { $MAXDIST =~s/^-D=//; if($MAXDIST !~/^[\d]+$/) { die "ERROR: wrong MAXDIST (-D).\n"; } }
+
+($input_file1)= grep /^-1=/, @ARGV;
+if(!defined($input_file1)){ die "ERROR: input_file1 must be specified (-1).\n"; }
+else { $input_file1 =~s/^-1=//; if(!-e $input_file1) { die "ERROR: wrong input_file1 (-1).\n"; } }
+
+($input_file2)= grep /^-2=/, @ARGV;
+if(!defined($input_file2)){ die "ERROR: input_file2 must be specified (-2).\n"; }
+else { $input_file2 =~s/^-2=//; if(!-e $input_file2) { die "ERROR: wrong input_file2 (-2).\n"; } }
+
+($Utable_prefix)= grep /^-U=/, @ARGV;
+if($only_init==0 && !defined($Utable_prefix)){ die "ERROR: Utable_prefix must be specified (-U).\n"; }
+else { $Utable_prefix =~s/^-U=//; if(!-e $Utable_prefix.".gU.table" || !-e $Utable_prefix.".iU.table") { die "ERROR: wrong Utable_prefix (-U).\n"; } }
+
+($gene2NM_file)= grep /^--g2m=/, @ARGV;
+if($only_init==0 && !defined($gene2NM_file)){ die "ERROR: gene2NM_file must be specified (--g2m).\n"; }
+else { $gene2NM_file =~s/^--g2m=//; if(!-e $gene2NM_file) { die "ERROR: wrong gene2NM_file (--g2m).\n"; } }
+
+($gene2symbol_file)= grep /^--g2s=/, @ARGV;
+if($only_init==0 && !defined($gene2symbol_file)){ die "ERROR: gene2symbol_file must be specified (--g2s).\n"; }
+else { $gene2symbol_file =~s/^--g2s=//; if(!-e $gene2symbol_file) { die "ERROR: wrong gene2symbol_file (--g2s).\n"; } }
+
+($bowtie_dir)= grep /^-b=/, @ARGV;
+if(!defined($bowtie_dir)){ die "ERROR: bowtie_dir must be specified (-b).\n"; }
+else { $bowtie_dir =~s/^-b=//;  if(!-d $bowtie_dir || !-e "$bowtie_dir/bowtie") { die "ERROR: wrong bowtie_dir(please avoid using '~') (-b).\n"; } }
+
+($bowtieindex)= grep /^--bi=/, @ARGV;
+if(!defined($bowtieindex)){ die "ERROR: bowtieindex must be specified (--bi).\n"; }
+else { $bowtieindex =~s/^--bi=//; if(!-e "$bowtieindex.1.ebwt") { die "ERROR: wrong bowtieindex (--bi).\n"; } }
+
+($basedir)= grep /^-o=/, @ARGV;
+if(!defined($basedir)){ die "ERROR: output dir must be specified (-o).\n"; }
+else { $basedir =~s/^-o=//; if($basedir=~/~/) { die "ERROR: wrong base_dir(please avoid using '~') (-o).\n"; }  }
+
+($sample)= grep /^-s=/, @ARGV;
+if(!defined($sample)){ die "ERROR: sample name must be specified (-s).\n"; }
+else { $sample =~s/^-s=//;  }
+
+
+##############
+
+
+$gUtable_file = $Utable_prefix.".gU.table";
+$iUtable_file = $Utable_prefix.".iU.table";
+
+my $coding='';
+if($coding_option eq 'n') {$coding='';}
+elsif($coding_option eq 'c') {$coding='-C';}
+else { die "ERROR: Error: invalid coding option.\n"; }
+
+if($datatype eq 'R') { $mapping_stat_column = 4; }
+elsif($datatype eq 'E') { $mapping_stat_column = 6; }
+else { die "ERROR: Error: wrong datatype.\n"; }
+
+if($StrandSpecificity eq 'S') { $bowtie_strand_option = "--norc"; }
+else { $bowtie_strand_option = ""; }
+
+($scriptdir) = $0 =~ /(.+)\/[^\/]+$/;
+
+$bowtieoutdir = "$basedir/bowtieout";
+$readcount_dir = "$basedir/readcount";
+$lendis_dir = "$basedir/insertlendis";
+$EUMAdir = "$basedir/EUMA";
+$FVKM_dir = "$basedir/FVKM";
+$LVKM_dir = "$basedir/LVKM";
+
+$mapping_stat_file = "$basedir/mapping_stat.$sample";
+
+($midrefix) = $bowtieindex=~/([^\/]+)$/;
+
+
+if(!-d $bowtieoutdir) { `mkdir -p $bowtieoutdir`; }
+if(!-d $readcount_dir) { `mkdir -p $readcount_dir`; }
+if(!-d $lendis_dir) { `mkdir -p $lendis_dir`; }
+if(!-d $EUMAdir) { `mkdir -p $EUMAdir`; }
+if(!-d $FVKM_dir) { `mkdir -p $FVKM_dir`; }
+if(!-d $LVKM_dir) { `mkdir -p $LVKM_dir`; }
+
+
+#`dos2unix $gene2NM_file`;
+#`dos2unix $gene2symbol_file`;
+
+
+
+#$perlcommand = "perl -I $scriptdir";
+$perlcommand = "perl";
+$bestbowtieout_script = "$scriptdir/filter.best.from.bowtieout.3.pl";
+$bowtie2insertlendis_script = "$scriptdir/bowtieout2insertlendis.PE.pl";
+$bowtieout2mappingstat_script = "$scriptdir/bowtieout2mappingstat.3.pl";
+$bowtieout2readcount_script = "$scriptdir/bowtie2genecount.11.pl";
+
+$calc_gEUMA_script = "$scriptdir/calculate_gEUMA.2.pl";
+$calc_iEUMA_script = "$scriptdir/calculate_iEUMA.2.pl";
+$NIR2LVKM_script = "$scriptdir/NIR2LVKM.pl";
+
+
+system("date");
+
+
+if($skip_init==0){
+  print STDERR "Mapping reads using bowtie...\n";
+  system("$bowtie_dir/bowtie -$fastoption $coding --minins 0 --maxins $MAXDIST -v $mm -a --suppress 5,6,7 -p $parallel $bowtie_strand_option $bowtieindex -1 $input_file1 -2 $input_file2 > $bowtieoutdir/$sample.$midrefix.maxins$MAXDIST.mm$mm.bowtieout"); 
+
+  print STDERR "Filtering best-matching alignments...\n";
+  if($mm > 0) {
+    system("$perlcommand $bestbowtieout_script -d $datatype --rm $bowtieoutdir/$sample.$midrefix.maxins$MAXDIST.mm$mm.bowtieout 1");
+  }
+  else {
+    system("mv $bowtieoutdir/$sample.$midrefix.maxins$MAXDIST.mm$mm.bowtieout $bowtieoutdir/$sample.$midrefix.maxins$MAXDIST.mm$mm.best.bowtieout");
+  }
+
+  print STDERR "Computing fragment length distribution...\n";
+  system("$perlcommand $bowtie2insertlendis_script $bowtieoutdir/$sample.$midrefix.maxins$MAXDIST.mm$mm.best.bowtieout $READ_LENGTH >  $lendis_dir/$sample.$midrefix.maxins$MAXDIST.mm0.i.insertlendis");
+}
+
+if($only_init==0) {
+
+  print STDERR "Computing the mapping stat...\n";
+  system("$perlcommand $bowtieout2mappingstat_script -d $datatype -m $MAXDIST -l $READ_LENGTH $bowtieoutdir/$sample.$midrefix.maxins$MAXDIST.mm$mm.best.bowtieout $sample 1 > $mapping_stat_file");
+  print STDERR "Mapping stat :\n";
+  system("cat $mapping_stat_file");
+
+  if($noNIR==0){
+    print STDERR "Computing NIRs...\n";
+    system("$perlcommand $bowtieout2readcount_script -d $datatype -m $MAXDIST -l $READ_LENGTH --gNIR -f -s $sample -v $gene2NM_file $bowtieoutdir/$sample.$midrefix.maxins$MAXDIST.mm$mm.best.bowtieout 1 > $readcount_dir/$sample.$midrefix.maxins$MAXDIST.mm$mm.gNIR");
+    system("$perlcommand $bowtieout2readcount_script -d $datatype -m $MAXDIST -l $READ_LENGTH --iNIR -f -s $sample -v $gene2NM_file $bowtieoutdir/$sample.$midrefix.maxins$MAXDIST.mm$mm.best.bowtieout 1 > $readcount_dir/$sample.$midrefix.maxins$MAXDIST.mm$mm.iNIR");
+  }  
+
+  if($get_gReadcount==1) { 
+     print STDERR "Computing total gene read counts (gReadcount)...\n";
+     print STDERR "$perlcommand $bowtieout2readcount_script -d $datatype -m $MAXDIST -l $READ_LENGTH -f -s $sample -v $gene2NM_file $bowtieoutdir/$sample.$midrefix.maxins$MAXDIST.mm$mm.best.bowtieout 1 > $readcount_dir/$sample.$midrefix.maxins$MAXDIST.mm$mm.gReadcount\n"; 
+     system("$perlcommand $bowtieout2readcount_script -d $datatype -m $MAXDIST -l $READ_LENGTH -f -s $sample -v $gene2NM_file $bowtieoutdir/$sample.$midrefix.maxins$MAXDIST.mm$mm.best.bowtieout 1 > $readcount_dir/$sample.$midrefix.maxins$MAXDIST.mm$mm.gReadcount"); 
+  }
+    
+  if($noNEUMA==0){
+    print STDERR "Computing gEUMA and iEUMA...\n";
+    system("$perlcommand $calc_gEUMA_script $gUtable_file $lendis_dir/$sample.$midrefix.maxins$MAXDIST.mm0.i.insertlendis $READ_LENGTH > $EUMAdir/$sample.$midrefix.maxins$MAXDIST.mm$mm.gEUMA");
+    system("$perlcommand $calc_iEUMA_script $iUtable_file $lendis_dir/$sample.$midrefix.maxins$MAXDIST.mm0.i.insertlendis $READ_LENGTH > $EUMAdir/$sample.$midrefix.maxins$MAXDIST.mm$mm.iEUMA");
+  
+    print STDERR "Computing FVKM and LVKM...\n";
+    system("$perlcommand $NIR2LVKM_script $sample $readcount_dir $midrefix.maxins$MAXDIST.mm$mm $EUMAdir $FVKM_dir $LVKM_dir $EUMAcut $mapping_stat_column $mapping_stat_file 1 2 $gene2NM_file $gene2symbol_file $datatype $scriptdir");
+
+  }
+}
+
+print STDERR "Done.\n";
+
+system("date");
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/NEUMA-1.2.1/auto_NEUMA_SE.pl	Thu Aug 08 00:46:13 2013 -0400
@@ -0,0 +1,176 @@
+#!/usr/bin/perl
+
+if(@ARGV<9) { print "usage: $0 [options] -L=<read_length> -i=<input_file> -U=<EUMA_prefix(fullpath, before .gEUMA or .iEUMA)> --g2m=<gene2NM_file> --g2s=<gene2symbol_file> -b=<bowtie_dir(eg.bin/bowtie-0.12.7)> --bi=<bowtieindex> -o=<outputdir> -s=<sample_name>\n\nOrder of arguments can be interchangeable.\n\n"; exit; }
+
+
+
+## options ##
+($fastoption)= grep /^-f=/, @ARGV;
+if(!defined($fastoption)){$fastoption='q'; }
+else { $fastoption =~s/^-f=//; if($fastoption !~/^[fq]$/) { die "wrong file type (-f).\n"; } }
+
+($coding_option)= grep /^-c=/, @ARGV;
+if(!defined($coding_option)){$coding_option='n'; }
+else { $coding_option =~s/^-c=//; if($coding_option !~/^[nc]$/) { die "wrong coding option (-c).\n"; } }
+
+($datatype)= grep /^-d=/, @ARGV;
+if(!defined($datatype)){$datatype='R'; }
+else { $datatype =~s/^-d=//; if($datatype !~/^[RE]$/) { die "wrong data type (-d).\n"; } }
+
+($parallel)= grep /^-p=/, @ARGV;
+if(!defined($parallel)){$parallel=1; }
+else { $parallel =~s/^-p=//; if($parallel !~/^[\d]+$/) { die "wrong parallel (multi-thread) option (-p).\n"; } }
+
+($EUMAcut)= grep /^-t=/, @ARGV;
+if(!defined($EUMAcut)){$EUMAcut=50; }
+else { $EUMAcut =~s/^-t=//; if($EUMAcut !~/^[\d\.]+$/) { die "wrong EUMAcut (-t).\n"; } }
+
+($StrandSpecificity)= grep /^--str=/, @ARGV;
+if(!defined($StrandSpecificity)){ $StrandSpecificity='N'; }
+else { $StrandSpecificity =~s/^--str=//; if($StrandSpecificity !~/^[SRN]+$/) { die "wrong strand specificity (--str).\n"; } }
+
+($get_gReadcount)= grep /^--gReadcount/, @ARGV;
+if(!defined($get_gReadcount)){ $get_gReadcount=0; }
+else { $get_gReadcount=1; }
+
+($mm)= grep /^--mm=/, @ARGV;
+if(!defined($mm)){ $mm=0; }
+else { $mm =~s/^--mm=//; if($mm !~/^[\d\.]+$/) { die "ERROR: wrong number of mismatches (--mm).\n"; } }
+
+($noNIR)= grep /^--noNIR/, @ARGV;
+if(!defined($noNIR)){ $noNIR=0; }
+else { $noNIR=1; }
+
+($noNEUMA)= grep /^--noNEUMA/, @ARGV;
+if(!defined($noNEUMA)){ $noNEUMA=0; }
+else { $noNEUMA=1; }
+
+if($noNIR==1 && $noNEUMA==0) { die "ERROR: --noNIR must be used together with --noNEUMA.\n"; }
+
+
+
+
+## required arguments ##
+
+($READ_LENGTH)= grep /^-L=/, @ARGV;
+if(!defined($READ_LENGTH)){ die "READ_LENGTH must be specified (-L).\n"; }
+else { $READ_LENGTH =~s/^-L=//; if($READ_LENGTH !~/^[\d]+$/) { die "wrong READ_LENGTH (-L).\n"; } }
+
+($input_fa)= grep /^-i=/, @ARGV;
+if(!defined($input_fa)){ die "input_fa must be specified (-i).\n"; }
+else { $input_fa =~s/^-i=//; if(!-e $input_fa) { die "wrong input_file (-i).\n"; } }
+
+($EUMAprefix)= grep /^-U=/, @ARGV;
+if(!defined($EUMAprefix)){ die "EUMAprefix must be specified (-U).\n"; }
+else { $EUMAprefix =~s/^-U=//; if(!-e $EUMAprefix.".gEUMA" || !-e $EUMAprefix.".iEUMA") { die "wrong EUMAprefix (-U).\n"; } }
+
+($gene2NM_file)= grep /^--g2m=/, @ARGV;
+if(!defined($gene2NM_file)){ die "gene2NM_file must be specified (--g2m).\n"; }
+else { $gene2NM_file =~s/^--g2m=//; if(!-e $gene2NM_file) { die "wrong gene2NM_file (--g2m).\n"; } }
+
+($gene2symbol_file)= grep /^--g2s=/, @ARGV;
+if(!defined($gene2symbol_file)){ die "gene2symbol_file must be specified (--g2s).\n"; }
+else { $gene2symbol_file =~s/^--g2s=//; if(!-e $gene2symbol_file) { die "wrong gene2symbol_file (--g2s).\n"; } }
+
+($bowtie_dir)= grep /^-b=/, @ARGV;
+if(!defined($bowtie_dir)){ die "bowtie_dir must be specified (-b).\n"; }
+else { $bowtie_dir =~s/^-b=//;  if(!-d $bowtie_dir || !-e "$bowtie_dir/bowtie") { die "wrong bowtie_dir(please avoid using '~') (-b).\n"; } }
+
+($bowtieindex)= grep /^--bi=/, @ARGV;
+if(!defined($bowtieindex)){ die "bowtieindex must be specified (--bi).\n"; }
+else { $bowtieindex =~s/^--bi=//; if(!-e "$bowtieindex.1.ebwt") { die "wrong bowtieindex (--bi).\n"; } }
+
+($basedir)= grep /^-o=/, @ARGV;
+if(!defined($basedir)){ die "output dir must be specified (-o).\n"; }
+else { $basedir =~s/^-o=//; if($basedir=~/~/) { die "wrong base_dir(please avoid using '~') (-o).\n"; }  }
+
+($sample)= grep /^-s=/, @ARGV;
+if(!defined($sample)){ die "sample name must be specified (-s).\n"; }
+else { $sample =~s/^-s=//;  }
+
+
+##############
+
+
+my $coding='';
+if($coding_option eq 'n') {$coding='';}
+elsif($coding_option eq 'c') {$coding='-C';}
+else { die "Error: invalid coding option.\n"; }
+
+if($datatype eq 'R') { $mapping_stat_column = 1; }
+elsif($datatype eq 'E') { $mapping_stat_column = 3; }
+else { die "Error: wrong datatype.\n"; }
+
+if($StrandSpecificity eq 'S') { $bowtie_strand_option = "--norc"; }
+elsif($StrandSpecificity eq 'R') { $bowtie_strand_option = "--nofc"; }
+else { $bowtie_strand_option = ""; }
+
+
+($scriptdir) = $0 =~ /(.+)\/[^\/]+$/;
+
+$bowtieout_dir = "$basedir/bowtieout";
+$readcount_dir = "$basedir/readcount";
+$FVKMdir = "$basedir/FVKM";
+$LVKMdir = "$basedir/LVKM";
+
+$bowtieoutfile = "$bowtieout_dir/$sample.single.mm$mm.bowtieout";
+$bestbowtieoutfile = "$bowtieout_dir/$sample.single.mm$mm.best.bowtieout";
+$mapping_stat_file = "$basedir/mapping_stat.$sample.single";
+
+`mkdir -p $bowtieout_dir`;
+`mkdir -p $readcount_dir`;
+`mkdir -p $FVKMdir`;
+`mkdir -p $LVKMdir`;
+
+`dos2unix $gene2NM_file`;
+`dos2unix $gene2symbol_file`;
+
+
+#$perlcommand = "perl -I $scriptdir";
+$perlcommand = "perl";
+$bestbowtieout_script = "$scriptdir/filter.best.from.bowtieout.3.pl";
+$bowtieout2mappingstat_script = "$scriptdir/bowtieout2mappingstat.3.pl";
+$bowtieout2readcount_script = "$scriptdir/bowtie2genecount.11.pl";
+$NIR2LVKM_script = " $scriptdir/NIR2LVKM_SE.pl";
+
+=cut
+print STDERR "Mapping reads using bowtie...\n";
+`$bowtie_dir/bowtie -$fastoption $coding -a -v $mm --suppress 5,6,7 -p $parallel -m 100 $bowtie_strand_option $bowtieindex $input_fa > $bowtieoutfile`; 
+
+
+
+print STDERR "Filtering best-matching alignments...\n";
+if($mm > 0) {
+  system("$perlcommand $bestbowtieout_script -d $datatype --rm $bowtieoutfile 0");
+}
+else {
+  system("mv $bowtieoutfile $bestbowtieoutfile");
+}
+
+print STDERR "Computing the mapping stat...\n";
+system("$perlcommand $bowtieout2mappingstat_script -d $datatype $bestbowtieoutfile $sample 0 > $mapping_stat_file");
+print STDERR "Mapping stat :\n";
+system("cat $mapping_stat_file");
+
+if($noNIR==0){
+  system("$perlcommand $bowtieout2readcount_script -d $datatype --gNIR -f -s $sample -v $gene2NM_file $bestbowtieoutfile 0 > $readcount_dir/$sample.mm$mm.gNIR");
+  system("$perlcommand $bowtieout2readcount_script -d $datatype --iNIR -f -s $sample -v $gene2NM_file $bestbowtieoutfile 0 > $readcount_dir/$sample.mm$mm.iNIR");
+}
+=cut
+#if($get_gReadcount==1) { 
+  system("$perlcommand $bowtieout2readcount_script -d $datatype -f -s $sample -v $gene2NM_file $bestbowtieoutfile 0 > $readcount_dir/$sample.mm$mm.gReadcount");
+#}
+
+
+=cut
+
+if($noNEUMA==0){
+  print STDERR "Computing FVKM and LVKM...\n";
+  `$perlcommand $NIR2LVKM_script $sample $readcount_dir mm$mm $EUMAprefix $FVKMdir $LVKMdir $EUMAcut $mapping_stat_column $mapping_stat_file 0 2 $gene2NM_file $gene2symbol_file $datatype $scriptdir`;
+}
+
+print STDERR "Done.\n";
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/NEUMA-1.2.1/bowtie2genecount.11.pl	Thu Aug 08 00:46:13 2013 -0400
@@ -0,0 +1,283 @@
+#!/usr/bin/perl
+
+# This version has an option of counting only the pairs with a single mate distance. (version 11).
+# This version has fixed the problem in filtering max_insert_length. (version 11).
+# This version does not print out the additional tab in the iNIR file. (version 11).
+# This version has data type option for parsing refseq headers. (version 10)
+# This version fixed the max_insert_lendis option name from max_insert_length. (version 10)
+# This version can take multiple bowtieout files separated by comma. (version 10)
+# This version can take option of max insert lendis (version 9)
+# This version can take sample name (column title) as option, if -f is selected. (version 8)
+# This version fixed the problem of double-counting cases that are strand-flipped. (version 7)
+# This version has an option of counting all reads, gNIR or iNIR. (version 7)
+# This version has two levels of verbosity and uses whole file name if run with a single bowtie file. (version 6)
+# This version has an option of using a single bowtie file name instead of bowtie directory. (version 6)
+# This version fixed the problem of not being able to find common suffix and assumes .bowtieout suffix for bowtieout files, to prevent reading other files in the directory. (version 5)
+# This version fixed the problem of going into infinite loop when run with one sample. (Version 3)
+# This version is verbose. (version 2)
+
+use Getopt::Long;
+my $verbose=0;
+my $LINEBIN=1000000;
+
+my $bowtie = 'd';  # default is directory.
+my $count_opt = 'gReadcount';
+my $samplename;  
+my $max_insert_length;
+my $readlength;
+my $datatype='E';
+my $uniq_d;  # reads with single mate distance only (applicable only for PE)
+&GetOptions ( 'v' => sub { $verbose=1 },
+              'vv' => sub { $verbose=2 },
+              'f' => sub { $bowtie='f'},  # read bowtie file instead of bowtiu directory
+              'gNIR' => sub { $count_opt = 'gNIR' },
+              'iNIR' => sub { $count_opt = 'iNIR' },
+              's|sample=s' => \$samplename,
+              'm|max_insert_length=i' => \$max_insert_length,
+              'l|readlength=i' => \$readlength,
+              'd|datatype=s' => \$datatype,
+              'u|uniq_d' => \$uniq_d
+);
+
+if($bowtie eq 'd' && defined($samplename)) { print STDERR "Warning:  -s|--sample option is ignored, when -f is not selected.\n"; }
+if(defined($max_insert_length) && !defined($readlength)) { die "readlength must be defined if max_insert_length is defined.\n"; }
+
+
+if(@ARGV<3) { &help;  exit; }
+
+
+my $gene2NM = shift @ARGV;
+my $bowtieoutdir = shift @ARGV;
+my $PE = shift @ARGV;
+
+if($PE==0 && defined($max_insert_length)) { print STDERR "Warning: -m|--max_insert_length option is ignored for single end data.\n"; }
+
+
+if($bowtie eq 'd'){
+  @bowtiefiles = split/\n/,`ls -1 $bowtieoutdir/*`;
+  if($verbose) { printf STDERR "Finished reading bowtieout dir $bowtieoutdir. Total %d files\n", scalar(@bowtiefiles); }
+}
+else {
+  push @bowtiefiles, (split/,/,$bowtieoutdir);
+}
+
+
+open GENE2NM, $gene2NM or die "Can't open gene2NM file $gene2NM\n";
+while(<GENE2NM>){
+  chomp;
+  my ($g,$t) = split/\t/;
+  $t2g{$t}=$g;
+  push @{$g2t{$g}}, $t;
+}
+close GENE2NM;
+if($verbose) { printf STDERR "Finished reading gene2NM file $gene2NM. Total %d transcripts and %d genes\n", scalar(keys(%t2g)), scalar(keys(%g2t)); }
+
+
+
+
+
+for my $i (0..$#bowtiefiles) {
+
+  my $bowtieout = $bowtiefiles[$i];
+  my %mapped_genes =();
+  my %mapped_tr =();
+  my %mapped_insert_lengths =();
+
+  if($verbose) { print STDERR "Processing bowtieout $bowtieout (file $i)\n"; }
+
+
+  my $prev_head=''; my $prev_tr='';
+  open $BOWTIE, $bowtieout or die "Can't open bowtiefile $bowtieout\n";
+  while(<$BOWTIE>){
+    chomp;
+    my ($head,$tr,$pos1) = (split/\t/)[0,2,3];
+    if($datatype eq 'R') { my ($tmp_tr) = (split/\|/,$tr)[3]; if(defined($tmp_tr)) {$tr = $tmp_tr; $tr=~s/\.\d+$//; }}
+
+
+    my $insert_length=0;   ## default, for SE.
+    if($PE==1){
+      chomp(my $secondline = <$BOWTIE>);
+      my ($head2,$pos2) = (split/\t/,$secondline)[0,3];
+      $head = &min($head,$head2);
+      $insert_length = $pos2 - $pos1 + $readlength;
+      if(defined $max_insert_length) { if($insert_length > $max_insert_length) { next; } }
+    }
+
+
+    if($verbose==2) { if($prev_head eq '') { print STDERR "head= $head, tr= $tr, PE = $PE\n"; }  }   # print only at first line
+
+
+    if($count_opt eq 'gNIR'){
+       if($head eq $prev_head) {
+          $mapped_tr { $tr } = 1;
+          $mapped_tr { $prev_tr } = 1;
+          $mapped_genes { $t2g{$tr} } = 1;
+          $mapped_genes { $t2g{$prev_tr} } = 1;
+       }
+       else {
+          if(scalar(keys(%mapped_genes)) == 1) { my ($gene) = keys %mapped_genes; if(scalar(keys(%mapped_tr)) == scalar(@{$g2t{$gene}})) { $count{$gene}[$i]++; }}  
+          ## count reads that are unique to the gene and common to all of its isoforms
+          %mapped_genes =();
+          %mapped_tr = ();
+          $mapped_tr { $tr } = 1;
+          $mapped_genes { $t2g{$tr} } = 1;
+       }
+
+    }
+    elsif($count_opt eq 'iNIR'){
+       if($head eq $prev_head) {
+          $mapped_tr { $tr } = 1;
+          $mapped_tr { $prev_tr } = 1;
+       }
+       else {
+          if(scalar(keys(%mapped_tr)) == 1) { my ($transcript) = keys %mapped_tr; $count{$transcript}[$i]++; }  ## count reads that are unique to the transcript
+          %mapped_tr = ();
+          $mapped_tr { $tr } = 1;
+       }
+    }
+    else {
+       if($head eq $prev_head) {
+          $mapped_genes { $t2g{$tr} } = 1;
+          $mapped_genes { $t2g{$prev_tr} } = 1;
+          $mapped_insert_lengths { "$insert_length" } =1;
+          $mapped_insert_lengths { "$prev_insert_length" } =1;
+       }
+       else {
+           if(scalar(keys(%mapped_genes)) == 1) { 
+              my ($gene) = keys %mapped_genes; 
+              if(defined($uniq_d)) { if(scalar(keys(%mapped_insert_lengths))==1) { $count{$gene}[$i]++; } }
+              else { $count{$gene}[$i]++;  }
+           }  ## count reads that are unique to the gene
+           %mapped_genes =();
+           %mapped_insert_lengths =();
+           $mapped_genes { $t2g{$tr} } = 1;
+           $mapped_insert_lengths { "$insert_length" } =1;
+       }
+    }
+
+
+    $prev_head = $head;
+    $prev_tr = $tr;
+    $prev_insert_length = $insert_length;
+
+    if($verbose==2) { if($. % $LINEBIN==0) { print STDERR ":line $.\n"; } }
+
+  }
+  close $BOWTIE;
+
+
+  # last line
+  if($count_opt eq 'gNIR'){
+    if(scalar(keys(%mapped_genes)) == 1) { my ($gene) = keys %mapped_genes; if(scalar(keys(%mapped_tr)) == scalar(@{$g2t{$gene}})) { $count{$gene}[$i]++; }}  
+    ## count reads that are unique to the gene and common to all of its isoforms
+  }
+  elsif($count_opt eq 'iNIR'){
+    if(scalar(keys(%mapped_tr)) == 1) { my ($transcript) = keys %mapped_tr; $count{$transcript}[$i]++; }  ## count reads that are unique to the transcript
+  }
+  else{
+    if(scalar(keys(%mapped_genes)) == 1) { 
+      my ($gene) = keys %mapped_genes; 
+      if(defined($uniq_d)) { if(scalar(keys(%mapped_insert_lengths))==1) { $count{$gene}[$i]++; } }
+      else { $count{$gene}[$i]++;  }
+    }  ## count reads that are unique to the gene
+  }
+
+}
+
+
+$"="\t";
+
+
+if($count_opt eq 'gNIR' || $count_opt eq 'gReadcount') { $count_target='genes'; } else { $count_target = 'transcripts'; }
+if($verbose) { printf STDERR "Finished reading bowtieout files. Total %d $count_target counted\n",scalar(keys(%count)); }
+
+
+$minlen=1000000;
+for my $file (@bowtiefiles){
+  $minlen = length($file) if length($file)<$minlen;
+}
+
+
+# find longest common suffix for bowtiefiles
+my %suffs;
+my $k=0;
+do {
+ %suffs=();
+ $k++;
+ for my $file (@bowtiefiles){
+   $suff = substr($file,length($file)-$k);
+   $suffs{$suff}=1;
+ }
+} while ( scalar(keys(%suffs))==1 &&  $k<$minlen); 
+
+my ($commonsuffix) = substr($bowtiefiles[0],length($file)-($k-1));
+if(scalar(@bowtiefiles) == 1) { $commonsuffix = ''; }
+
+# title is between "/" and common suffix.
+my @title=();
+for my $file (@bowtiefiles){
+  if($file=~ /([^\/]+)$commonsuffix$/) { push @title, $1; }
+  else { push @title, $file; }
+}
+if(defined($samplename)) { @title=(); push @title, $samplename; }
+
+
+if($verbose) { print STDERR "Printing to file...\n"; }
+
+if($count_opt eq 'gNIR' || $count_opt eq 'gReadcount'){
+  print "gene\t@title\n";
+  for my $gene (sort keys %count){
+   print "$gene";
+   for my $i (0..$#bowtiefiles){
+     if(defined $count{$gene}[$i]) { print "\t$count{$gene}[$i]"; }
+     else { print "\t0"; }
+   }
+   print "\n";
+  }
+}
+else {  ## iNIR
+  print "gene\ttranscript\t@title\n";
+  for my $tr (sort keys %count){
+   print "$t2g{$tr}\t$tr";
+   for my $i (0..$#bowtiefiles){
+     if(defined $count{$tr}[$i]) { print "\t$count{$tr}[$i]"; }
+     else { print "\t0"; }
+   }
+   print "\n";
+  }
+}
+
+if($verbose) { print STDERR "done.\n"; }
+
+
+
+## subroutines ##
+
+
+
+sub help {
+ print STDERR<<EOF
+
+usage: $0 <options> gene2NM bowtieoutdir/file PE[1/0] > output.readcount
+
+Options:
+  -v : verbose
+  -vv : more verbose
+  -f : use bowtie file name instead of bowtie directory name.
+       ex) $0 -f gene2NM bowtieoutfile PE > output.readcount
+       ex2) $0 gene2NM bowtieoutdir PE > output.readcount
+  --gNIR : count gNIR instead of gReadcount (recommended outfile suffix is .gNIR)
+  --iNIR : count iNIR instead of gReadcount (recommended outfile suffix is .iNIR)
+  -s|--sample <samplename> : sample name to be used as a column title. This options is available only when -f option is used.
+  -m|--max_insert_length <max_insert_length> : maximum insert length. If this option is used, length filtering is done. This option must accompany option -l (--readlength). This option doesn't do anything for single-end data (ignored)
+  -l|--readlength <readlength> : read length. This is required only when -m (--max_insert_length) option is used.
+  -d|--datatype <datatype> : Either 'R' (Refseq) or 'E' (Ensembl). Use R if transcript names are in Refseq format (gi|xxx|xxx|name). (default E).
+  -u|--uniq_d : filter reads with a single mate distance only. (applicable for only PE gReadcount)
+EOF
+}
+
+sub min {
+   if($_[0] lt $_[1]) { $_[0] } else { $_[1] };
+}
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/NEUMA-1.2.1/bowtieout2insertlendis.PE.pl	Thu Aug 08 00:46:13 2013 -0400
@@ -0,0 +1,25 @@
+#!/usr/bin/perl
+
+if(@ARGV<2) { print "usage: $0 bowtiefile readlength > insertlendis_file\n"; exit; }
+
+my $bowtiefile = shift @ARGV;
+my $readlength = shift @ARGV;
+
+
+open BOWTIE, $bowtiefile or die "Can't open bowtieout file $bowtiefile\n";
+while(<BOWTIE>){
+  chomp;
+  my ($pos1) = (split/\t/)[3];
+  chomp(my $secondline = <BOWTIE>);
+  my ($pos2) = (split/\t/,$secondline)[3];
+  my $dist = $pos2 + $readlength - $pos1;
+  $Insert_lendis[$dist]++;
+
+}
+close BOWTIE;
+
+
+for my $i (0..$#Insert_lendis){
+   print "$i\t$Insert_lendis[$i]\n" if defined($Insert_lendis[$i]);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/NEUMA-1.2.1/bowtieout2mappingstat.2.pl	Thu Aug 08 00:46:13 2013 -0400
@@ -0,0 +1,103 @@
+#!/usr/bin/perl
+use Getopt::Long;
+# This version includes max_insert_length filtered mapping stat, optionally. (version 2)
+
+
+my $max_insert_length;
+my $readlength;
+my $datatype = 'E';  # E or R.
+&GetOptions ( 'm|max_insert_length=i' => \$max_insert_length,
+              'l|readlength=i' => \$readlength,
+              'd|datatype=s' => \$datatype
+);
+
+if(defined($max_insert_length) && !defined($readlength)) {  die "readlength (-l|--readlength) must be defined if -m (--max_insert_length) is defined.\n"; }
+if(@ARGV<3){ &help; exit; }
+
+
+my $bowtie = shift @ARGV;
+my $sample_name = shift @ARGV;
+my $PE = shift @ARGV;
+
+my $prev_head='';
+my $NM=0; my $NR=0; my $NM_lf = 0; my $NR_lf = 0;   # lf : insert-length-filtered version
+my $countNM=0; my $countNM_lf =0;
+my $countNR=0; my $countNR_lf = 0;
+my $countAll=0; my $countAll_lf=0;
+
+
+open IN, $bowtie or die "Can't open bowtiefile $bowtie\n";
+while(<IN>){
+  chomp;
+  my ($head,$tr,$pos1)= (split/\t/)[0,2,3];
+  if($datatype eq 'R') { ($tr) = (split/\|/,$tr)[3];  $tr=~s/\.\d+$//; }
+
+  if($PE==1){
+    chomp(my $secondline = <IN>);
+    my ($head2,$pos2) = (split/\t/,$secondline)[0,3];
+    $head = &min($head,$head2);
+
+  }
+
+  if($prev_head eq $head) { 
+
+     if(defined($max_insert_length) && $pos2+$readlength-$pos1 <= $max_insert_length) { 
+           if($tr=~/^NM_/) { $NM_lf = 1; }
+           elsif($tr=~/^NR_/) { $NR_lf = 1; }
+     }
+
+     if($tr=~/^NM_/) { $NM=1; }
+     elsif($tr=~/^NR_/) { $NR=1; }
+  }
+  else {
+
+     if(defined($max_insert_length) && $pos2+$readlength-$pos1 <= $max_insert_length) {
+         if($NM_lf==1) { $countNM_lf++; $NM_lf=0; }
+         if($NR_lf==1) { $countNR_lf++; $NR_lf=0; }
+         $countAll_lf++;
+     }
+     if($NM==1) { $countNM++; $NM=0; }    ## if a single read is mapped to both NR and NM, it is counted once for each. Therefore total count may not be the same as the sum of NR and NM counts.
+     if($NR==1) { $countNR++; $NR=0; }
+     $countAll++;
+  }
+  $prev_head=$head;
+}
+# no need to do additional last line
+close IN;
+
+
+if($datatype eq 'E') { ($countNM,$countNR,$countNM_lf,$countNM_lf) = ('NA','NA','NA','NA'); }
+
+$"="\t";
+if(defined($max_insert_length)){
+  print "sample_name\tmRNA_mapped\tncRNA_mapped\ttotalRNA_mapped\tmRNA_mapped(lenfiltered)\tncRNA_mapped(lenfiltered)\ttotalRNA_mapped(lenfiltered)\n";
+  print "$sample_name\t$countNM\t$countNR\t$countAll\t$countNM_lf\t$countNR_lf\t$countAll_lf\n";
+}
+else {
+  print "sample_name\tmRNA_mapped\tncRNA_mapped\ttotalRNA_mapped\n";
+  print "$sample_name\t$countNM\t$countNR\t$countAll\n";
+}
+
+
+
+sub min {
+   if($_[0] lt $_[1]) { $_[0] } else { $_[1] };
+}
+
+
+sub help {
+  print STDERR<<EOF;
+
+usage: $0 <options> bowtieoutfile samplename PE(1/0) (> mappingstat) 
+
+Options:
+  -m|--max_insert_length <max_insert_length> : the output includes insert-length-filtered mapping stats as well. -l(--readlength) option must be accompanied. This option is valid only for paired-end data. (For single-end, it is ignored)
+  -l|--readlength <readlength> : read length. This is required only when -m(--max_insert_length) option is used.
+  -d|--datatype <datatype> : Either E (Ensembl) or R (Refseq). Use R if the data is Refseq and the transcript names are in format 'gi|xxx|xxx|name'. (Default E)
+
+EOF
+}
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/NEUMA-1.2.1/bowtieout2mappingstat.3.pl	Thu Aug 08 00:46:13 2013 -0400
@@ -0,0 +1,111 @@
+#!/usr/bin/perl
+use Getopt::Long;
+# This version includes max_insert_length filtered mapping stat, optionally. (version 2)
+
+
+my $max_insert_length;
+my $readlength;
+my $datatype = 'E';  # E or R.
+&GetOptions ( 'm|max_insert_length=i' => \$max_insert_length,
+              'l|readlength=i' => \$readlength,
+              'd|datatype=s' => \$datatype
+);
+
+if(defined($max_insert_length) && !defined($readlength)) {  die "readlength (-l|--readlength) must be defined if -m (--max_insert_length) is defined.\n"; }
+if(@ARGV<3){ &help; exit; }
+
+
+my $bowtie = shift @ARGV;
+my $sample_name = shift @ARGV;
+my $PE = shift @ARGV;
+
+my $prev_head='';
+my $NM=0; my $NR=0; my $all=0; my $NM_lf = 0; my $NR_lf = 0;  my $all_lf=0; # lf : insert-length-filtered version
+my $countNM=0; my $countNM_lf =0;
+my $countNR=0; my $countNR_lf = 0;
+my $countAll=0; my $countAll_lf=0;
+
+
+open IN, $bowtie or die "Can't open bowtiefile $bowtie\n";
+while(<IN>){
+  chomp;
+  my ($head,$tr,$pos1)= (split/\t/)[0,2,3];
+  if($datatype eq 'R') { my ($tmp_tr) = (split/\|/,$tr)[3]; if(defined($tmp_tr)) { $tr = $tmp_tr;  $tr=~s/\.\d+$//; }}
+
+  if($PE==1){
+    chomp(my $secondline = <IN>);
+    my ($head2,$pos2) = (split/\t/,$secondline)[0,3];
+    $head = &min($head,$head2);
+
+  }
+
+  if($prev_head ne $head) { 
+
+     if($NM_lf==1) { $countNM_lf++; $NM_lf=0; }
+     if($NR_lf==1) { $countNR_lf++; $NR_lf=0; }
+     if($all_lf==1) { $countAll_lf++; $all_lf=0; }
+
+     if($NM==1) { $countNM++; $NM=0; }    ## if a single read is mapped to both NR and NM, it is counted once for each. Therefore total count may not be the same as the sum of NR and NM counts.
+     if($NR==1) { $countNR++; $NR=0; }
+     if($all==1) { $countAll++; $all=0; }
+  }
+  if(defined($max_insert_length) && $pos2+$readlength-$pos1 <= $max_insert_length) { 
+           if($tr=~/^NM_/) { $NM_lf = 1; }
+           elsif($tr=~/^NR_/) { $NR_lf = 1; }
+           $all_lf = 1; 
+  }
+
+  if($tr=~/^NM_/) { $NM=1; }
+  elsif($tr=~/^NR_/) { $NR=1; }
+  $all=1;
+  $prev_head=$head;
+}
+
+# last line
+if($NM_lf==1) { $countNM_lf++; $NM_lf=0; }
+if($NR_lf==1) { $countNR_lf++; $NR_lf=0; }
+if($all_lf==1) { $countAll_lf++; $all_lf=0; }
+
+if($NM==1) { $countNM++; $NM=0; }    ## if a single read is mapped to both NR and NM, it is counted once for each. Therefore total count may not be the same as the sum of NR and NM counts.
+if($NR==1) { $countNR++; $NR=0; }
+if($all==1) { $countAll++; $all=0; }
+
+
+close IN;
+
+
+if($datatype eq 'E') { ($countNM,$countNR,$countNM_lf,$countNM_lf) = ('NA','NA','NA','NA'); }
+
+$"="\t";
+if(defined($max_insert_length)){
+  print "sample_name\tmRNA_mapped\tncRNA_mapped\ttotalRNA_mapped\tmRNA_mapped(lenfiltered)\tncRNA_mapped(lenfiltered)\ttotalRNA_mapped(lenfiltered)\n";
+  print "$sample_name\t$countNM\t$countNR\t$countAll\t$countNM_lf\t$countNR_lf\t$countAll_lf\n";
+}
+else {
+  print "sample_name\tmRNA_mapped\tncRNA_mapped\ttotalRNA_mapped\n";
+  print "$sample_name\t$countNM\t$countNR\t$countAll\n";
+}
+
+
+
+sub min {
+   if($_[0] lt $_[1]) { $_[0] } else { $_[1] };
+}
+
+
+sub help {
+  print STDERR<<EOF;
+
+usage: $0 <options> bowtieoutfile samplename PE(1/0) (> mappingstat) 
+
+Options:
+  -m|--max_insert_length <max_insert_length> : the output includes insert-length-filtered mapping stats as well. -l(--readlength) option must be accompanied. This option is valid only for paired-end data. (For single-end, it is ignored)
+  -l|--readlength <readlength> : read length. This is required only when -m(--max_insert_length) option is used.
+  -d|--datatype <datatype> : Either E (Ensembl) or R (Refseq). Use R if the data is Refseq and want to have NM and NR counts. If option R is used, transcript name format is automatically detected between NM_xxx and gi|xxx|xxx|NM_xxx. (Default E)
+
+EOF
+}
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/NEUMA-1.2.1/calculate_gEUMA.2.pl	Thu Aug 08 00:46:13 2013 -0400
@@ -0,0 +1,41 @@
+#!/usr/bin/perl
+# Compute gEUMA for each gene from the gUtable file and insertlendis file and print out only genes whose gEUMA is larger than 0.
+
+if(@ARGV<3) { print "usage: $0 gUtable_file insertlendis_file READLENGTH\n"; exit; }
+
+my ($gUtable_file,$insertlendis_file,$READLENGTH) = @ARGV;
+
+
+my $totalfreq=0;
+open LENDIS, $insertlendis_file or die "Can't open insertlendis file $insertlendis_file\n";
+while(<LENDIS>){
+ chomp;
+ my ($insert_size, $freq) = split/\t/;
+ $insertlendis{"$insert_size"}=$freq;
+ $totalfreq+=$freq;
+}
+close LENDIS;
+
+
+
+print "gene\tgEUMA\n";  #header
+open GUTABLE, $gUtable_file or die "Can't open gUtable file $gUtable_file\n";
+chomp($header = <GUTABLE>); 
+@distances = split/\t/,$header;
+shift @distances;   #'gene'
+while(<GUTABLE>){
+ chomp;
+ my @line = split/\t/;
+ my $gene = shift @line;
+ my $EUMA=0;
+ for my $i (0..$#distances){
+    $d=$distances[$i];
+    $pUMA = $line[$i] * ($insertlendis{$d}/$totalfreq) ;
+    $EUMA += $pUMA;
+ }
+ if($EUMA>=0.005) { printf "$gene\t%.2f\n",$EUMA; }  # use >=0.005  instead of >0, because I'm plotting two decimals after point. (in bp)
+}
+close GUTABLE;
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/NEUMA-1.2.1/calculate_gEUMA.pl	Thu Aug 08 00:46:13 2013 -0400
@@ -0,0 +1,42 @@
+#!/usr/bin/perl
+# Compute gEUMA for each gene from the gUtable file and insertlendis file and print out only genes whose gEUMA is larger than 0.
+
+if(@ARGV<3) { print "usage: $0 gUtable_file insertlendis_file READLENGTH\n"; exit; }
+
+my ($gUtable_file,$insertlendis_file,$READLENGTH) = @ARGV;
+
+open GUTABLE, $gUtable_file or die "Can't open gUtable file $gUtable_file\n";
+chomp($header = <GUTABLE>); 
+@distances = split/\t/,$header;
+shift @distances;   #'gene'
+while(<GUTABLE>){
+ chomp;
+ my @line = split/\t/;
+ my $gene = shift @line;
+ for my $i (0..$#distances){
+   $gUtable{$gene}{"$distances[$i]"}=$line[$i];
+ }
+}
+close GUTABLE;
+
+
+my $totalfreq=0;
+open LENDIS, $insertlendis_file or die "Can't open insertlendis file $insertlendis_file\n";
+while(<LENDIS>){
+ chomp;
+ my ($insert_size, $freq) = split/\t/;
+ $insertlendis{"$insert_size"}=$freq;
+ $totalfreq+=$freq;
+}
+close LENDIS;
+
+print "gene\tgEUMA\n";  #header
+for my $gene (sort keys %gUtable){
+  my $EUMA=0;
+  for my $d (keys %{$gUtable{$gene}}){
+    $pUMA = $gUtable{$gene}{$d} * ($insertlendis{$d}/$totalfreq) ;
+    $EUMA += $pUMA;
+  }
+  if($EUMA>=0.005) { printf "$gene\t%.2f\n",$EUMA; }  # use >=0.005  instead of >0, because I'm plotting two decimals after point. (in bp)
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/NEUMA-1.2.1/calculate_iEUMA.2.pl	Thu Aug 08 00:46:13 2013 -0400
@@ -0,0 +1,42 @@
+#!/usr/bin/perl
+# Compute gEUMA for each gene from the iUtable file and insertlendis file and print out only genes whose gEUMA is larger than 0.
+
+if(@ARGV<3) { print "usage: $0 iUtable_file insertlendis_file READLENGTH\n"; exit; }
+
+my ($iUtable_file,$insertlendis_file,$READLENGTH) = @ARGV;
+
+
+my $totalfreq=0;
+open LENDIS, $insertlendis_file or die "Can't open insertlendis file $insertlendis_file\n";
+while(<LENDIS>){
+ chomp;
+ my ($insert_size, $freq) = split/\t/;
+ $insertlendis{"$insert_size"}=$freq;
+ $totalfreq+=$freq;
+}
+close LENDIS;
+
+
+
+print "gene\tgEUMA\n";  #header
+open IUTABLE, $iUtable_file or die "Can't open iUtable file $iUtable_file\n";
+chomp($header = <IUTABLE>); 
+@distances = split/\t/,$header;
+shift @distances;   #'gene'
+shift @distances;   #'transcript
+while(<IUTABLE>){
+ chomp;
+ my @line = split/\t/;
+ my $gid = shift @line;
+ my $transcript = shift @line;
+ my $gene = "$gid\t$transcript";
+
+ my $EUMA=0;
+ for my $i (0..$#distances){
+    $d=$distances[$i];
+    $pUMA = $line[$i] * ($insertlendis{$d}/$totalfreq) ;
+    $EUMA += $pUMA;
+ }
+ if($EUMA>=0.005) { printf "$gene\t%.2f\n",$EUMA; }  # use >=0.005  instead of >0, because I'm plotting two decimals after point. (in bp)
+}
+close IUTABLE;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/NEUMA-1.2.1/calculate_iEUMA.pl	Thu Aug 08 00:46:13 2013 -0400
@@ -0,0 +1,44 @@
+#!/usr/bin/perl
+# Compute iEUMA for each isoform from the iUtable file and insertlendis file and print out only isoforms whose iEUMA is larger than 0.
+
+if(@ARGV<2) { print "usage: $0 iUtable_file insertlendis_file READLENGTH\n"; exit; }
+
+my ($iUtable_file,$insertlendis_file,$READLENGTH) = @ARGV;
+
+open IUTABLE, $iUtable_file or die "Can't open iUtable file $iUtable_file\n";
+chomp($header = <IUTABLE>); 
+@distances = split/\t/,$header;
+shift @distances;   #'gene'
+while(<IUTABLE>){
+ chomp;
+ my @line = split/\t/;
+ my $gid = shift @line;
+ my $transcript = shift @line;
+ my $gene = "$gid\t$transcript";
+ for my $i (0..$#distances){
+   $iUtable{$gene}{"$distances[$i]"}=$line[$i];
+ }
+}
+close IUTABLE;
+
+
+my $totalfreq=0;
+open LENDIS, $insertlendis_file or die "Can't open insertlendis file $insertlendis_file\n";
+while(<LENDIS>){
+ chomp;
+ my ($insert_size, $freq) = split/\t/;
+ $insertlendis{$insert_size}=$freq;
+ $totalfreq+=$freq;
+}
+close LENDIS;
+
+print "gene\ttranscript\tiEUMA\n";  #header
+for my $gene (sort keys %iUtable){
+  my $EUMA=0;
+  for my $d (keys %{$iUtable{$gene}}){
+    $pUMA = $iUtable{$gene}{$d} * ($insertlendis{$d}/$totalfreq) ;
+    $EUMA += $pUMA;
+  }
+  if($EUMA>=0.005) { printf "$gene\t%.2f\n",$EUMA; }  # use >=0.005  instead of >0, because I'm plotting two decimals after point. (in bp)
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/NEUMA-1.2.1/combine_gFVKM_iFVKM_max.pl	Thu Aug 08 00:46:13 2013 -0400
@@ -0,0 +1,233 @@
+#!/usr/bin/perl
+
+#This script was adopted and modified from ``scripts/combine_gFVKM_iFVKM_5.pl`.
+
+
+if(@ARGV<4) { print "usage: $0 gFVKM_file iFVKM_file gene2NM_file EUMA_CUT(eg.50, in bp) output.final.gFVKM output.final.iFVKM\n"; exit; }
+my ($gFVKM_file, $iFVKM_file, $gene2NM_file, $CUT, $final_gFVKM_file, $final_iFVKM_file ) = @ARGV;
+
+
+my %gFVKM=();
+my %gEUMA=();
+my %allgenes=();
+&read_gFVKM($gFVKM_file,\%gFVKM,\%gEUMA,\%allgenes);
+
+
+my %iFVKM=();
+my %iEUMA=();
+my %Alltranscirpts=();
+&read_iFVKM($iFVKM_file,\%iFVKM,\%iEUMA,\%Allisoforms);
+
+
+my %gene2NM=();
+&read_gene2NM($gene2NM_file,\%gene2NM);
+
+
+my %derived_gFVKM = ();
+&compute_derived_gFVKM(\%derived_gFVKM,\%iFVKM,\%allgenes);
+
+my %final_gFVKM=();
+&compute_final_gFVKM(\%allgenes,\%gFVKM,\%derived_gFVKM,\%final_gFVKM);
+
+my %final_iFVKM=();
+my %iFVKM_factor=();
+&compute_final_iFVKM(\%final_iFVKM,\%iFVKM,\%final_gFVKM,\%iFVKM_factor,\%Allisoforms)
+
+&generate_final_gFVKM_file($final_gFVKM_file,\%allgenes,\%gFVKM,\%derived_gFVKM,\%gEUMA,\%iFVKM,\%gene2NM,\%final_gFVKM);
+&generate_final_iFVKM_file($final_iFVKM_file,\%iFVKM,\%final_iFVKM,\%iFVKM_factor,\%iEUMA,\%gene2NM,\%Allisoforms);
+
+
+
+#################
+## subroutines ##
+#################
+
+
+sub read_gFVKM {
+ my $gFVKM_file = shift @_;
+ my $pGFVKM = shift @_;
+ my $pgEUMA = shift @_;
+ my $pAllgenes = shift @_;
+
+ open IN, $gFVKM_file or die "Can't open gFVKM file $gFVKM_file\n";
+ while(<IN>){
+  chomp;
+  my ($gene,$gFVKM_0,$gEUMA_0) = (split/\t/)[0,1,3];
+  next if $gEUMA_0 < $CUT;
+  $pGFVKM->{$gene} = $gFVKM_0;
+  $pgEUMA->{$gene} = $gEUMA_0;
+  $pAllgenes->{$gene}=1;
+ }
+ close IN;
+}
+
+
+sub read_iFVKM {
+ my $iFVKM_file = shift @_;
+ my $pIFVKM = shift @_;
+ my $piEUMA = shift @_;
+ my $pAllisoforms = shift @_;
+
+ open IN, $iFVKM_file or die "Can't open iFVKM file $iFVKM_file\n";
+ while(<IN>){
+  chomp;
+  my ($gid,$isoform,$iFVKM_0,$iEUMA_0) = (split/\t/)[0,1,2,4];
+  next if $iEUMA_0 < $CUT;
+  $pIFVKM->{$gid}{$isoform} = $iFVKM_0;
+  $piEUMA->{$gid}{$isoform} = $iEUMA_0;
+  $pAllisoforms->{$gid}{$isoform} = 1;
+ }
+ close IN;
+}
+
+
+sub read_gene2NM {
+ my $gene2NM_file = shift @_;
+ my $pGene2NM = shift @_;
+
+ open IN, $gene2NM_file or die "Can't open gene2NM file $gene2NM_file\n";
+ while(<IN>){
+  chomp;
+  my ($gid,$isoform) = split/\t/;
+  $pGene2NM->{$gid}{$isoform}=1;
+ }
+ close IN;
+}
+
+
+
+sub compute_derived_gFVKM {
+ my $pDerived_gFVKM = shift @_;
+ my $pIFVKM = shift @_;
+ my $pAllgenes = shift @_;
+
+ #compute derived gFVKM
+ for my $gene (keys %$pIFVKM){
+   my $sumiFVKM = &sum(values(%{$pIFVKM->{$gene}}));
+   $pDerived_gFVKM->{$gene} = $sumiFVKM;
+   $pAllgenes->{$gene}=1;
+ }
+}
+
+
+sub compute_final_iFVKM {
+ my $pFinal_iFVKM = shift @_;
+ my $pIFVKM = shift @_;
+ my $pFinalGFVKM = shift @_;
+ my $pIFVKM_factor = shift @_;
+ my $pAllisoforms = shift @_;
+
+ #rescale iFVKM based on final gFVKM
+ for my $gene (keys %$pIFVKM){
+  my $sumiFVKM = &sum(values %{$pIFVKM->{$gene}});
+  if($sumiFVKM == 0) { $pIFVKM_factor->{$gene} = "inf"; }
+  else { $pIFVKM_factor->{$gene} = $pFinalGFVKM->{$gene} / $sumiFVKM; } 
+  for my $isoform (keys %{$pIFVKM->{$gene}}) {
+     $pFinal_iFVKM->{$gene} = $pIFVKM->{$gene};
+     $pAllisoforms->{$gene}{$isoform} =1;
+  }
+ }
+
+}
+
+
+# using max of gFVKM and sumiFVKM, instead of average.
+sub compute_final_gFVKM {
+ my $pAllgenes = shift @_;
+ my $pGFVKM = shift @_;
+ my $pDerived_gFVKM = shift @_;
+ my $pFinal_gFVKM = shift @_;
+
+ for my $gene (sort keys %$pAllgenes){
+  my $orig_gFVKM = (exists $pGFVKM->{$gene})?$pGFVKM->{$gene}:'NA';
+  my $derived_gFVKM_val = (exists $pDerived_gFVKM->{$gene})?$pDerived_gFVKM->{$gene}:'NA';
+  my $final_gFVKM = 'NA';
+  if($orig_gFVKM ne 'NA' && $derived_gFVKM_val ne 'NA'){
+     #$final_gFVKM = ($orig_gFVKM+$derived_gFVKM_val)/2;   #average
+     $final_gFVKM = ($orig_gFVKM>=$derived_gFVKM_val?$orig_gFVKM:$derived_gFVKM_val);   #maximum
+  }
+  elsif($orig_gFVKM eq 'NA') { $final_gFVKM = $derived_gFVKM_val; }
+  elsif($derived_gFVKM_val eq 'NA') { $final_gFVKM = $orig_gFVKM; }
+
+  $pFinal_gFVKM->{$gene} = $final_gFVKM;
+ }
+}
+
+
+
+
+sub generate_final_gFVKM_file {
+ my $final_gFVKM_file = shift @_;
+ my $pAllgenes = shift @_;
+ my $pGFVKM = shift @_;
+ my $pDerived_gFVKM = shift @_;
+ my $pgEUMA = shift @_;
+ my $pIFVKM = shift @_;
+ my $pGene2NM = shift @_;
+ my $pFinal_gFVKM = shift @_;
+
+ open FG_FVKM, ">$final_gFVKM_file" or die "Can't write to final gFVKM file $final_gFVKM_file\n";
+ print FG_FVKM "gene\tfinal.gFVK\torig.gFVK\tderived_gFVK\tgEUMA\t#isoforms\t#measured_isoforms\n";
+ for my $gene (sort keys %$pAllgenes){
+
+  my $orig_gFVKM = (exists $pGFVKM->{$gene})?$pGFVKM->{$gene}:'NA';
+  my $derived_gFVKM = (exists $pDerived_gFVKM->{$gene})?$pDerived_gFVKM->{$gene}:'NA';
+  my $final_gFVKM = (exists $pFinal_gFVKM->{$gene})?$pFinal_gFVKM->{$gene}:'NA';
+
+  my $gEUMA='';
+  if(exists $pgEUMA->{$gene}){ $gEUMA = $pgEUMA->{$gene}; }
+  else { $gEUMA = 'below_cut'; }
+
+  my $nTr = scalar keys %{$pGene2NM->{$gene}};
+  my $nTr_measured = scalar keys %{$pIFVKM->{$gene}};
+
+  print FG_FVKM "$gene\t$final_gFVKM\t$orig_gFVKM\t$derived_gFVKM\t$gEUMA\t$nTr\t$nTr_measured\n";
+ }
+ close FG_FVKM;
+}
+
+
+
+sub generate_final_iFVKM_file {
+ my $final_iFVKM_file = shift @_;
+ my $pIFVKM = shift @_;
+ my $pFinal_iFVKM = shift @_;
+ my $pIFVKM_factor = shift @_;
+ my $piEUMA = shift @_;
+ my $pGene2NM = shift @_;
+ my $pAllisoforms = shift @_;
+
+ #print scalar keys %$pDerived_iFVKM;  ##DEBUGGING
+
+ open FI_FVKM, ">$final_iFVKM_file" or die "Can't write to final iFVKM file $final_iFVKM_file\n";
+ print FI_FVKM "gene\tisoform\tfinal.iFVK\torig.iFVK\tiFVKM.factor\tiEUMA\t#isoforms\t#measured_isoforms\n";
+ for my $gene (sort keys %$pAllisoforms){
+
+  my $nTr = scalar keys %{$pGene2NM->{$gene}};
+  my $nTr_measured = scalar keys %{$pIFVKM->{$gene}};
+
+  my $iFVKM_factor = (exists $pIFVKM_factor->{$gene})?$pIFVKM_factor->{$gene}:'NA';
+
+  for my $isoform (sort keys %{$pAllisoforms->{$gene}}){
+
+   my $iEUMA='';
+   if(exists $piEUMA->{$gene}{$isoform}) { $iEUMA = $piEUMA->{$gene}{$isoform}; }
+   else { $iEUMA = 'below_cut'; }
+ 
+   my $final_iFVKM = (exists $pFinal_iFVKM->{$gene} && exists ${$pFinal_iFVKM->{$gene}}{$isoform})?$pFinal_iFVKM->{$gene}{$isoform}:$pIFVKM->{$gene}{$isoform};
+   print FI_FVKM "$gene\t$isoform\t$final_iFVKM\t$pIFVKM->{$gene}{$isoform}\t$iFVKM_factor\t$iEUMA\t$nTr\t$nTr_measured\n";
+  }
+ }
+ close FI_FVKM;
+}
+
+sub sum {
+ my $s=0;
+ while(defined(my $x=shift @_)){
+  $s+=$x;
+ }
+ return $s;
+}
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/NEUMA-1.2.1/filter.best.from.bowtieout.2.pl	Thu Aug 08 00:46:13 2013 -0400
@@ -0,0 +1,146 @@
+#!/usr/bin/perl
+use Getopt::Long;
+
+# This version fixed the problem of not being able to merge when the order of the two mates were flipped. (version 2)
+
+
+my $delete_original;
+my $newbowtiefile;  ## output bowtiefile
+my $mmcol=4;
+&GetOptions ( 'd|delete_original' => \$delete_original,
+              'o=s' => \$newbowtiefile,
+              'mmcol=i' => \$mmcol   ## column for mismatch info (0-based)
+);
+
+if(@ARGV<2) { &help; exit; }
+
+my $bowtiefile = shift @ARGV;
+my $PE = shift @ARGV;  # 1 for paired-end, 0 for single-end
+
+if(!defined($newbowtiefile)){
+  ($newbowtiefile) = $bowtiefile=~/(.+)\.bowtieout/;
+  $newbowtiefile .= ".best.bowtieout";
+}
+
+
+
+
+my $prev_head1=''; my $prev_head2='';
+my $totmm=100000;  # just some huge number
+my @mapping=();
+open $IN, $bowtiefile or die "Can't open bowtiefile $bowtiefile\n";
+open $OUT, ">$newbowtiefile" or die "Can't write to output bowtiefile $newbowtiefile\n";
+while(<$IN>){
+  chomp;
+  my ($head1,$strand1,$tr,$pos1,$mmstr1) = (split/\t/)[0,1,2,3,$mmcol];
+  my $mm1 = &parse_mmstr($mmstr1);
+  if($PE==1) { 
+    chomp(my $secondline = <$IN>);
+    ($head2,$strand2,$pos2,$mmstr2) = (split/\t/,$secondline)[0,1,3,$mmcol];
+    my $mm2 = &parse_mmstr($mmstr2);
+  }
+  
+  if($PE==1){
+    if($prev_head1 eq $head1 || $prev_head2 eq $head1) {
+      if($totmm>$mm1+$mm2) { @mapping=();  push @mapping,[$tr,$strand1,$pos1,$mmstr1,$strand2,$pos2,$mmstr2];  $totmm=$mm1+$mm2; }
+      elsif($totma==$mm1+$mm2) { push @mapping,[$tr,$strand1,$pos1,$mmstr1,$strand2,$pos2,$mmstr2]; }
+    }
+    else {
+      &print_mapping_PE(\@mapping,$OUT, $prev_head1,$prev_head2);
+      @mapping=();
+      push @mapping,[$tr,$strand1,$pos1,$mmstr1,$strand2,$pos2,$mmstr2];
+      $totmm=$mm1+$mm2;
+    }
+  }
+  else {
+    if($prev_head1 eq $head1) {
+      if($totmm>$mm1) { @mapping=();  push @mapping,[$tr,$strand1,$pos1,$mmstr1];  $totmm=$mm1; }
+      elsif($totmm==$mm1) { push @mapping,[$tr,$strand1,$pos1,$mmstr1]; }
+    }
+    else {
+      &print_mapping_SE(\@mapping,$OUT, $prev_head1);
+      @mapping=();
+      push @mapping,[$tr,$strand1,$pos1,$mmstr1];
+      $totmm=$mm1;
+    }
+  }
+
+  $prev_head1= $head1;
+  if($PE==1) { $prev_head2 = $head2; }
+}
+
+
+## last line
+if($PE==1){
+  &print_mapping_PE(\@mapping,$OUT, $prev_head1, $prev_head2);
+}
+else {
+  &print_mapping_SE(\@mapping,$OUT, $prev_head1);
+}
+
+close $OUT;
+close $IN;
+
+
+
+## delete original bowtieout file
+if(defined $delete_original) { `rm -f $bowtiefile`; }
+
+
+
+
+
+## subroutines @@
+
+
+sub parse_mmstr {
+  if(!defined $_[0]){ return 0; }
+  else {
+    return(scalar(split/,/,$_[0]));
+  }
+}
+
+
+sub print_mapping_PE {
+  my $pMapping = shift @_;
+  my $OUT = shift @_;
+  my $prev_head1 = shift @_;
+  my $prev_head2 = shift @_;
+
+  if($prev_head1 ne ''){
+    for my $i (0..$#$pMapping){
+      my ($tr,$strand1,$pos1,$mm1,$strand2,$pos2,$mm2) = @{$pMapping->[$i]};
+      print $OUT "$prev_head1\t$strand1\t$tr\t$pos1\t$mm1\n";
+      print $OUT "$prev_head2\t$strand2\t$tr\t$pos2\t$mm2\n";
+    }
+  }
+}
+
+
+
+sub print_mapping_SE {
+  my $pMapping = shift @_;
+  my $OUT = shift @_;
+  my $prev_head1 = shift @_;
+
+  if($prev_head1 ne ''){
+    for my $i (0..$#$pMapping){
+      my ($tr,$strand1,$pos1,$mm1) = @{$pMapping->[$i]};
+      print $OUT "$prev_head1\t$strand1\t$tr\t$pos1\t$mm1\n";
+    }
+  }
+}
+
+
+
+sub help {
+  print STDERR<<EOF
+
+usage: $0 <options> bowtieout_file PE(1/0)
+
+Options
+   -d|--delete_original  : delete the original bowtieout file
+   -o <outfilename> : output bowtieout file name (default : originalfiletitle.best.bowtieout )
+
+EOF
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/NEUMA-1.2.1/filter.best.from.bowtieout.3.pl	Thu Aug 08 00:46:13 2013 -0400
@@ -0,0 +1,153 @@
+#!/usr/bin/perl
+use Getopt::Long;
+
+# This version has an option of filtering only exclusively-CDS-mapped reads (useful for ribosome profiling analysis). The option name for delete_original is not cahnged. (version 3)
+# This version fixed the problem of not being able to merge when the order of the two mates were flipped. (version 2)
+
+
+my $delete_original;
+my $newbowtiefile;  ## output bowtiefile
+my $mmcol=4;
+my $datatype='E';
+&GetOptions ( 'rm|delete_original' => \$delete_original,
+              'o=s' => \$newbowtiefile,
+              'mmcol=i' => \$mmcol,   ## column for mismatch info (0-based)
+              'd|datatype=s' => \$datatype  # refseq or ensembl ('refseq' if names must be parsed)
+);
+
+if(@ARGV<2) { &help; exit; }
+
+my $bowtiefile = shift @ARGV;
+my $PE = shift @ARGV;  # 1 for paired-end, 0 for single-end
+
+if($datatype !~/^[RE]$/ ) { die "ERROR: -d (--datatype) must be specified to be either R (Refseq) or E (Ensembl) (default)\n"; }
+
+if(!defined($newbowtiefile)){
+  ($newbowtiefile) = $bowtiefile=~/(.+)\.bowtieout/;
+  $newbowtiefile .= ".best.bowtieout";
+}
+
+
+
+my $prev_head1=''; my $prev_head2='';
+my $totmm=100000;  # just some huge number
+my @mapping=();
+open $IN, $bowtiefile or die "Can't open bowtiefile $bowtiefile\n";
+open $OUT, ">$newbowtiefile" or die "Can't write to output bowtiefile $newbowtiefile\n";
+while(<$IN>){
+  chomp;
+  my ($head1,$strand1,$tr,$pos1,$mmstr1) = (split/\t/)[0,1,2,3,$mmcol];
+  my $mm1 = &parse_mmstr($mmstr1);
+  if($PE==1) { 
+    chomp(my $secondline = <$IN>);
+    ($head2,$strand2,$pos2,$mmstr2) = (split/\t/,$secondline)[0,1,3,$mmcol];
+    my $mm2 = &parse_mmstr($mmstr2);
+  }
+  
+  if($PE==1){
+    if($prev_head1 eq $head1 || $prev_head2 eq $head1) {
+      if($totmm>$mm1+$mm2) { @mapping=();  push @mapping,[$tr,$strand1,$pos1,$mmstr1,$strand2,$pos2,$mmstr2];  $totmm=$mm1+$mm2; }
+      elsif($totma==$mm1+$mm2) { push @mapping,[$tr,$strand1,$pos1,$mmstr1,$strand2,$pos2,$mmstr2]; }
+    }
+    else {
+      &print_mapping_PE(\@mapping,$OUT, $prev_head1,$prev_head2);
+      @mapping=();
+      push @mapping,[$tr,$strand1,$pos1,$mmstr1,$strand2,$pos2,$mmstr2];
+      $totmm=$mm1+$mm2;
+    }
+  }
+  else {
+    if($prev_head1 eq $head1) {
+      if($totmm>$mm1) { @mapping=();  push @mapping,[$tr,$strand1,$pos1,$mmstr1];  $totmm=$mm1; }
+      elsif($totmm==$mm1) { push @mapping,[$tr,$strand1,$pos1,$mmstr1]; }
+    }
+    else {
+      &print_mapping_SE(\@mapping,$OUT, $prev_head1);
+      @mapping=();
+      push @mapping,[$tr,$strand1,$pos1,$mmstr1];
+      $totmm=$mm1;
+    }
+  }
+
+  $prev_head1= $head1;
+  if($PE==1) { $prev_head2 = $head2; }
+}
+
+
+## last line
+if($PE==1){
+  &print_mapping_PE(\@mapping,$OUT, $prev_head1, $prev_head2);
+}
+else {
+  &print_mapping_SE(\@mapping,$OUT, $prev_head1);
+}
+
+close $OUT;
+close $IN;
+
+
+
+## delete original bowtieout file
+if(defined $delete_original) { `rm -f $bowtiefile`; }
+
+
+
+
+
+## subroutines @@
+
+
+sub parse_mmstr {
+  if(!defined $_[0]){ return 0; }
+  else {
+    return(scalar(split/,/,$_[0]));
+  }
+}
+
+
+sub print_mapping_PE {
+  my $pMapping = shift @_;
+  my $OUT = shift @_;
+  my $prev_head1 = shift @_;
+  my $prev_head2 = shift @_;
+
+  if($prev_head1 ne ''){
+    for my $i (0..$#$pMapping){
+      my ($tr,$strand1,$pos1,$mm1,$strand2,$pos2,$mm2) = @{$pMapping->[$i]};
+      print $OUT "$prev_head1\t$strand1\t$tr\t$pos1\t$mm1\n";
+      print $OUT "$prev_head2\t$strand2\t$tr\t$pos2\t$mm2\n";
+    }
+  }
+}
+
+
+
+sub print_mapping_SE {
+  my $pMapping = shift @_;
+  my $OUT = shift @_;
+  my $prev_head1 = shift @_;
+
+  if($prev_head1 ne ''){
+    for my $i (0..$#$pMapping){
+      my ($tr,$strand1,$pos1,$mm1) = @{$pMapping->[$i]};
+      print $OUT "$prev_head1\t$strand1\t$tr\t$pos1\t$mm1\n";
+    }
+  }
+}
+
+
+
+sub help {
+  print STDERR<<EOF
+
+usage: $0 <options> bowtieout_file PE(1/0)
+
+Options
+   --rm|--delete_original  : delete the original bowtieout file
+   -o <outfilename> : output bowtieout file name (default : originalfiletitle.best.bowtieout / originalfiletitle.best.cdsonly.bowtieout (if --cdsonly is used) )
+   --mmcol : the column number (0-based) for mismatch information. (default : 4)
+   -d | datatype <datatype> : Either 'R' (Refseq) or 'E' (Ensembl). Use R if the transcript names are in Refseq format (gi|xxx|xxx|name). This option affects only when --cdsonly is used. (default : E)
+EOF
+}
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/NEUMA-1.2.1/filter_exclusive_NRgene_from_gene_table.pl	Thu Aug 08 00:46:13 2013 -0400
@@ -0,0 +1,57 @@
+#!/usr/bin/perl
+
+
+if(@ARGV<2) { print "usage: $0 gene2NM_file table_file > output_file\n"; exit; }
+
+my $gene2NM_file = shift @ARGV;
+my $table_file = shift @ARGV;
+
+my %gene2NM=();
+&filtered_gene2NM($gene2NM_file,\%gene2NM);
+&read_and_print_table($table_file,\%gene2NM);
+
+
+
+
+
+
+sub read_and_print_table {
+  my $table_file = shift @_;
+  my $pGene2NM = shift @_;
+  open TABLE, $table_file or die "Can't open gene table file $table_file\n";
+  chomp(my $header=<TABLE>); print "$header\n";
+  while(<TABLE>){
+   chomp;
+   my @line = split/\t/;
+   my $gid = shift @line;
+   if(exists $pGene2NM->{$gid}) { print "$_\n"; }
+  }
+  close TABLE;
+}
+
+
+
+
+sub filtered_gene2NM{
+  my $gene2NM_file = shift @_;
+  my $pGene2NM = shift @_;
+
+  my %gene2NM_total = ();
+  my %gene2NR = ();
+  open GENE2NM, $gene2NM_file or die "Can't open gene2NM_file $gene2NM_file\n";
+  while(<GENE2NM>){
+   chomp;
+   my ($gid,$rid) = split/\t/;
+   $gene2NM_total{$gid}++;
+   if($rid=~/^NR/) { $gene2NR{$gid}++; }
+  }
+  close GENE2NM;
+  
+  for my $gid (keys %gene2NM_total){
+   if(!exists $gene2NR{$gid} || $gene2NM_total{$gid} > $gene2NR{$gid}){
+     $pGene2NM->{$gid}=1;
+   }
+  }
+}
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/NEUMA-1.2.1/gNIR2gFVKM.pl	Thu Aug 08 00:46:13 2013 -0400
@@ -0,0 +1,32 @@
+#!/usr/bin/perl
+
+if(@ARGV<2) { print "usage: $0 gNIR_file EUMA_file > output.gFVKM\n"; exit; }
+my ($gNIR_file,$EUMA_file) = @ARGV;
+
+open IN, $gNIR_file or die "Can't open gNIR_file $gNIR_file\n";
+<IN>; # skip header
+while(<IN>){
+  chomp;
+  split/\t/;
+  $RNAseq{$_[0]}=$_[1];
+}
+close IN;
+
+open IN, $EUMA_file or die "Can't open EUMA file $EUMA_file\n";
+while(<IN>){
+  chomp;
+  split/\t/;
+  $EUMA{$_[0]}=$_[1];
+}
+close IN;
+
+
+print "gene\tgFVK\tread.count\tgEUMA(bp)\n";
+for my $gene (sort keys %EUMA){
+ next if $EUMA{$gene} ==0;
+ if(!exists $RNAseq{$gene}){ $RNAseq{$gene} = 0; }
+ $gFVKM = $RNAseq{$gene} / $EUMA{$gene} * 1000;
+ print "$gene\t$gFVKM\t$RNAseq{$gene}\t$EUMA{$gene}\n";
+}
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/NEUMA-1.2.1/iNIR2iFVKM.pl	Thu Aug 08 00:46:13 2013 -0400
@@ -0,0 +1,35 @@
+#!/usr/bin/perl
+
+if(@ARGV<2) { print "usage: $0 iNIR_file iEUMA_file > output.iFVKM\n"; exit; }
+my ($iNIR_file,$iEUMA_file) = @ARGV;
+
+open IN, $iNIR_file or die "Can't open iNIR_file $sample_iNIR_file\n";
+<IN>;  # skip header
+while(<IN>){
+  chomp;
+  split/\t/;
+  $gene = $_[0]."\t".$_[1];
+  $RNAseq{$gene}=$_[2];
+}
+close IN;
+
+open IN, $iEUMA_file or die "Can't open iEUMA file $iEUMA_file\n";
+while(<IN>){
+  chomp;
+  split/\t/;
+  $gene = $_[0]."\t".$_[1];
+  $iEUMA{$gene}=$_[2];
+}
+close IN;
+
+
+print "gene\tisoform\tiFVK\tread.count\tiEUMA(bp)\n";
+for my $gene (sort keys %iEUMA){
+ next if $iEUMA{$gene} ==0;
+ if(!exists $RNAseq{$gene}){ $RNAseq{$gene} = 0; }
+ my $iFVKM = $RNAseq{$gene} / $iEUMA{$gene} * 1000;
+ print "$gene\t$iFVKM\t$RNAseq{$gene}\t$iEUMA{$gene}\n";
+}
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/NEUMA-1.2.1/merge_LVKM.pl	Thu Aug 08 00:46:13 2013 -0400
@@ -0,0 +1,72 @@
+#!/usr/bin/perl
+
+if(@ARGV<4) { print "usage: $0 genewise/isoformwise[g/i] EUMAcut LVKM_out_dir NR(1/0) > output.gLVKM(iLVKM).merged\n"; exit; }
+my $option = shift @ARGV;  #either g or i
+my $EUMAcut = shift @ARGV;
+my $LVKM_dir = shift @ARGV;
+my $NR_option = shift @ARGV;
+
+my $suffix = $option."LVKM";
+
+my @sample_list = split/\n/,`ls -1 $LVKM_dir`;
+for my $i (0..$#sample_list){
+ if($NR_option eq '1'){
+   if($sample_list[$i] =~ /\.\d+.-NR.[gi]LVKM$/) { $samples{$`}=1; }
+ }
+ else {
+   if($sample_list[$i] =~ /\.\d+.[gi]LVKM$/) { $samples{$`}=1; }
+ }
+}
+
+@sample_list = keys %samples;
+
+
+for my $sample (@sample_list){
+  next if($sample eq '');
+  my $file;
+  if($NR_option eq '1'){
+    $file =  "$LVKM_dir/$sample.$EUMAcut.-NR.$suffix";
+  }
+  else {
+    $file =  "$LVKM_dir/$sample.$EUMAcut.$suffix";
+  }
+
+  print STDERR "$file\n";  #DEBUGGING
+  open IN,$file or die "Can't open LVKM file $file\n";
+  <IN>;
+  while(<IN>){
+    chomp;
+
+    my $gene;
+    if($option eq 'g'){
+      my ($gid,$symbol,$expr) = (split/\t/)[0,1,2];
+      $gene = "$gid\t$symbol";
+      $EXPR{$gene}{$sample} = $expr;
+    }
+    elsif($option eq 'i'){
+      my ($gid,$symbol,$NM,$expr) = (split/\t/)[0,1,2,3];
+      $gene = "$gid\t$symbol\t$NM";
+      $EXPR{$gene}{$sample} = $expr;
+    }
+  }
+  close IN;
+}
+
+
+
+$"="\t";
+if($option eq 'g') { print "gene.id\tgene.symbol\t@sample_list\n"; }
+elsif($option eq 'i') { print "gene.id\tgene.symbol\tisoform\t@sample_list\n"; }
+for my $gene (keys %EXPR){
+ print "$gene";
+ for my $sample (@sample_list){
+   if(exists ${$EXPR{$gene}}{$sample}){
+     print "\t$EXPR{$gene}{$sample}";
+   }
+   else { print "\tNA"; }
+ }
+ print "\n";
+}
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/NEUMA-1.2.1/merge_LVKM_readcount.pl	Thu Aug 08 00:46:13 2013 -0400
@@ -0,0 +1,82 @@
+#!/usr/bin/perl
+
+if(@ARGV<4) { print "usage: $0 genewise/isoformwise[g/i] EUMAcut LVKM_out_dir NR(1/0) > output.gNIR(iNIR)\n"; exit; }
+my $option = shift @ARGV;  #either g or i
+my $EUMAcut = shift @ARGV;
+my $LVKM_dir = shift @ARGV;
+my $NR_option = shift @ARGV;
+
+my $suffix = $option."LVKM";
+
+my @sample_list = split/\n/,`ls -1 $LVKM_dir`;
+for my $i (0..$#sample_list){
+ if($NR_option eq '1'){
+   if($sample_list[$i] =~ /\.\d+.-NR.[gi]LVKM$/) { $samples{$`}=1; }
+ }
+ else {
+   if($sample_list[$i] =~ /\.\d+.[gi]LVKM$/) { $samples{$`}=1; }
+ }
+}
+
+@sample_list = keys %samples;
+
+
+for my $sample (@sample_list){
+  next if($sample eq '');
+  my $file;
+  if($NR_option eq '1'){
+    $file =  "$LVKM_dir/$sample.$EUMAcut.-NR.$suffix";
+  }
+  else {
+    $file =  "$LVKM_dir/$sample.$EUMAcut.$suffix";
+  }
+
+  print STDERR "$file\n";  #DEBUGGING
+  open IN,$file or die "Can't open LVKM file $file\n";
+  <IN>;
+  while(<IN>){
+    chomp;
+
+    my $gene;
+    if($option eq 'g'){
+      my ($gid,$symbol,$origGFVK,$gEUMA) = (split/\t/)[0,1,5,7];
+      $gene = "$gid\t$symbol";
+      $temp = $origGFVK * $gEUMA;
+      if($temp == 0){
+        $EXPR{$gene}{$sample} = 0;
+      }else{
+        $EXPR{$gene}{$sample} = int(($temp + 0.5) /1000);
+      }
+    }
+    elsif($option eq 'i'){
+      my ($gid,$symbol,$NM,$finalIFVK,$iEUMA) = (split/\t/)[0,1,2,5,8];
+      $gene = "$gid\t$symbol\t$NM";
+      $temp = $finalIFVK * $iEUMA;
+      if($temp == 0){
+        $EXPR{$gene}{$sample} = 0;
+      }else{
+        $EXPR{$gene}{$sample} = int(($temp + 0.5) /1000);
+      }
+    }
+  }
+  close IN;
+}
+
+
+
+$"="\t";
+if($option eq 'g') { print "gene.id\tgene.symbol\t@sample_list\n"; }
+elsif($option eq 'i') { print "gene.id\tgene.symbol\tisoform\t@sample_list\n"; }
+for my $gene (keys %EXPR){
+ print "$gene";
+ for my $sample (@sample_list){
+   if(exists ${$EXPR{$gene}}{$sample}){
+     print "\t$EXPR{$gene}{$sample}";
+   }
+   else { print "\tNA"; }
+ }
+ print "\n";
+}
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/NEUMA-1.2.1/merge_readcount.pl	Thu Aug 08 00:46:13 2013 -0400
@@ -0,0 +1,60 @@
+#!/usr/bin/perl
+
+if(@ARGV<2) { print "usage: $0 type[gNIR/iNIR/gReadcount] readcount_dir > output.gNIR(iNIR/gReadcount).merged\n"; exit; }
+my $suffix = shift @ARGV;  #either g or i
+my $readcount_dir = shift @ARGV;
+
+my @sample_list = split/\n/,`ls -1 $readcount_dir`;
+for my $i (0..$#sample_list){
+ if($sample_list[$i] =~ /\.$suffix$/) { $samples{$`}=1; }
+}
+
+@sample_list = keys %samples;
+
+
+for my $sample (@sample_list){
+  next if($sample eq '');
+  my $file =  "$readcount_dir/$sample.$suffix";
+
+  print STDERR "$file\n";  #DEBUGGING
+  open IN,$file or die "Can't open $suffix file $file\n";
+  <IN>;
+  while(<IN>){
+    chomp;
+
+    my $gene;
+    if($suffix eq 'gNIR' || $suffix eq 'gReadcount'){
+      my ($gid,$expr) = (split/\t/)[0,1];
+      $gene = "$gid";
+      $EXPR{$gene}{$sample} = $expr;
+    }
+    elsif($suffix eq 'iNIR'){
+      my ($gid,$NM,$expr) = (split/\t/)[0,1,2];
+      #print STDERR "$gid\t$NM\t$expr\n";
+      $gene = "$gid\t$NM";
+      $EXPR{$gene}{$sample} = $expr;
+    }
+  }
+  close IN;
+}
+
+
+
+$"="\t";
+if($suffix eq 'gNIR' || $suffix eq 'gReadcount') { print "gene.id\t@sample_list\n"; }
+elsif($suffix eq 'iNIR') { print "gene.id\tisoform\t@sample_list\n"; }
+for my $gene (keys %EXPR){
+ print "$gene";
+ for my $sample (@sample_list){
+   if(exists ${$EXPR{$gene}}{$sample}){
+     print "\t$EXPR{$gene}{$sample}";
+   }
+   else { print "\t0"; }
+ }
+ print "\n";
+}
+
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/NEUMA-1.2.1/normalize_gFVKM_5.pl	Thu Aug 08 00:46:13 2013 -0400
@@ -0,0 +1,49 @@
+#!/usr/bin/perl
+
+if(@ARGV<6) { print "usage: $0 final.gFVKM_file mapping_stat_file gene2symbol_file factor_col sample_name gLVKM_file\n"; exit; }
+
+my ($gFVKM_file,$mapping_stat_file,$gene2symbol_file,$factor_col,$sample_name,$gLVKM_file) = @ARGV;
+
+open STAT, $mapping_stat_file or die "Can't open mapping stat file $mapping_stat_file\n";
+<STAT>; ##discard header
+while(<STAT>){
+  chomp;
+  next if !/\S/;
+  my ($sample,$factor) = (split/\t/)[0,$factor_col];
+  $norm_factor1{$sample} = $factor/1000000;
+  if($factor==0) { die "Error: Total reads is zero. Please check datatype option. Use option 'E' if your reference doesn't contain 'NM' prefix for mRNA.\n"; }
+}
+close STAT;
+
+
+open GENE2SYM, $gene2symbol_file or die "can't open gene2symbol file $gene2symbol_file\n";
+while(<GENE2SYM>){
+ chomp;
+ split/\t/;
+ $gene2symbol{$_[0]}=$_[1];
+}
+close GENE2SYM;
+
+
+$"="\t";
+open OUT1, ">$gLVKM_file" or die "Can't write to norm1outfile $gLVKM_file\n";
+print OUT1 "gene.id\tgene.symbol\tgLVKM\tgFVKM\tfinal.gFVK\torig.gFVK\tderived.gFVK\tgEUMA\t#isoforms\t#measured_isoforms\n";
+
+open GFVKM, $gFVKM_file or die "Can't open gFVKM file $gFVKM_file\n";
+<GFVKM>;  #discard header
+while(<GFVKM>){
+  chomp;
+  my @line = split/\t/;
+  my $gene_id = shift @line;
+  my $gene_symbol = $gene2symbol{$gene_id};
+  $norm1=$line[0]/$norm_factor1{$sample_name};
+  $norm2=log($norm1+1)/log(2);
+  printf OUT1 "$gene_id\t$gene_symbol\t%.3f\t%.3f\t@line\n",$norm2,$norm1,;
+}
+close GFVKM;
+
+
+close OUT1;
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/NEUMA-1.2.1/normalize_iFVKM_6.pl	Thu Aug 08 00:46:13 2013 -0400
@@ -0,0 +1,49 @@
+#!/usr/bin/perl
+
+if(@ARGV<6) { print "usage: $0 final.iFVKM_file mapping_stat_file gene2symbol_file factor_col sample_name iLVKM_file\n"; exit; }
+my ($iFVKM_file,$mapping_stat_file,$gene2symbol_file,$factor_col,$sample_name,$iLVKM_file) = @ARGV;
+
+open STAT, $mapping_stat_file or die "Can't open mapping stat file $mapping_stat_file\n";
+<STAT>; #discard header
+while(<STAT>){
+  chomp;
+  next if !/\S/;
+  my ($sample,$factor) = (split/\t/)[0,$factor_col];
+  $norm_factor1{$sample} = $factor/1000000;
+  if($factor==0) { die "Error: Total reads is zero. Please check datatype option. Use option 'E' if your reference doesn't contain 'NM' prefix for mRNA.\n"; }
+
+}
+close STAT;
+
+open GENE2SYM, $gene2symbol_file or die "can't open gene2symbol file $gene2symbol_file\n";
+while(<GENE2SYM>){
+ chomp;
+ split/\t/;
+ $gene2symbol{$_[0]}=$_[1];
+}
+close GENE2SYM;
+
+
+$"="\t";
+open OUT1, ">$iLVKM_file" or die "Can't write to norm1outfile $iLVKM_file\n";
+print OUT1 "gene.id\tgene.symbol\tisoform.id\tiLVKM\tiFVKM\tfinal.iFVK\torig.iFVK\tiFVKM.factor\tiEUMA\t#isoforms\t#measured_isoforms\n";
+
+open IFVKM, $iFVKM_file or die "Can't open iFVKM file $iFVKM_file\n";
+<IFVKM>;  #discard header
+while(<IFVKM>){
+  chomp;
+  my @line = split/\t/;
+  my $gene_id = shift @line;
+  my $isoform_id = shift @line;
+  my $gene_symbol = $gene2symbol{$gene_id};
+  $norm1=$line[0]/$norm_factor1{$sample_name};
+  $norm2=log($norm1+1)/log(2);
+  printf OUT1 "$gene_id\t$gene_symbol\t$isoform_id\t%.3f\t%.3f\t@line\n",$norm2,$norm1,;
+}
+close IFVKM;
+
+
+close OUT1;
+
+
+