# HG changeset patch
# User chrisd
# Date 1456115515 18000
# Node ID f95150c37d38a3ac1168356c02e45144eacbadab
planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
diff -r 000000000000 -r f95150c37d38 README
diff -r 000000000000 -r f95150c37d38 csa
Binary file csa has changed
diff -r 000000000000 -r f95150c37d38 gene_fraction.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gene_fraction.xml Sun Feb 21 23:31:55 2016 -0500
@@ -0,0 +1,64 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r f95150c37d38 gene_fraction/src/Alignments.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gene_fraction/src/Alignments.cpp Sun Feb 21 23:31:55 2016 -0500
@@ -0,0 +1,35 @@
+#include "Alignments.h"
+#include "int_util.h"
+
+#include
+
+#include
+#include
+
+Alignments::Alignments(std::string alignment) : _alignment(alignment) {
+ fill_alignment_fields(alignment);
+}
+
+void Alignments::fill_alignment_fields(const std::string &alignment) {
+ std::istringstream ss(alignment);
+ ss >> field.QNAME >> field.FLAG >> field.RNAME >> field.POS >>
+ field.MAPQ >> field.CIGAR >> field.RNEXT >> field.PNEXT >>
+ field.TLEN >> field.SEQ >> field.QUAL;
+}
+
+std::vector> Alignments::cigar() {
+ return get_cigar_operations(field.CIGAR);
+}
+
+std::vector> Alignments::get_cigar_operations(const std::string &cigar) {
+ std::vector> p;
+ int count;
+ char operation;
+
+ std::istringstream ss(cigar);
+ while(ss >> count >> operation) {
+ p.push_back(std::make_pair(count, operation));
+ }
+
+ return p;
+}
diff -r 000000000000 -r f95150c37d38 gene_fraction/src/Alignments.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gene_fraction/src/Alignments.h Sun Feb 21 23:31:55 2016 -0500
@@ -0,0 +1,68 @@
+#ifndef ALIGNMENTS_H
+#define ALIGNMENTS_H
+
+#include
+#include
+
+/**
+ * Stores information about an alignment
+ */
+struct alignment_fields {
+ std::string QNAME;
+ int FLAG;
+ std::string RNAME;
+ int POS;
+ int MAPQ;
+ std::string CIGAR;
+ std::string RNEXT;
+ int PNEXT;
+ int TLEN;
+ std::string SEQ;
+ std::string QUAL;
+};
+
+/**
+ * Class for dealing with alignments
+ */
+class Alignments {
+public:
+ /**
+ * Ctor that initializes alignment
+ */
+ Alignments(std::string alignment);
+
+ /**
+ * Stores information about each of the eleven
+ * required alignment fields
+ */
+ void fill_alignment_fields(const std::string &alignment);
+
+ std::vector> cigar();
+
+ inline std::string alignment() const { return _alignment; };
+
+ inline std::string qname() const { return field.QNAME; };
+ inline std::string rname() const { return field.RNAME; };
+ inline std::string cigar() const { return field.CIGAR; };
+ inline std::string rnext() const { return field.RNEXT; };
+ inline std::string seq() const { return field.SEQ; };
+ inline std::string qual() const { return field.QUAL; };
+
+ inline int flag() const { return field.FLAG; };
+ inline int pos() const { return field.POS; };
+ inline int mapq() const { return field.MAPQ; };
+ inline int pnext() const { return field.PNEXT; };
+ inline int tlen() const { return field.TLEN; };
+
+private:
+ /**
+ * Returns a pair of cigar operations as (occurrence, operation)
+ * Ex: 10M5I -> (10, M), (5, I)
+ */
+ std::vector> get_cigar_operations(const std::string &cigar);
+
+ std::string _alignment;
+ alignment_fields field;
+};
+
+#endif /* ALIGNMENTS_H */
diff -r 000000000000 -r f95150c37d38 gene_fraction/src/Alignments.o
Binary file gene_fraction/src/Alignments.o has changed
diff -r 000000000000 -r f95150c37d38 gene_fraction/src/Fasta.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gene_fraction/src/Fasta.cpp Sun Feb 21 23:31:55 2016 -0500
@@ -0,0 +1,39 @@
+#include "Fasta.h"
+#include "args.h"
+
+#include
+#include
+#include
+#include
+
+Fasta::Fasta(std::string amr_fp) : _amr_fp(amr_fp) {}
+
+void Fasta::read_fasta(const std::string &amr_fp) {
+ std::ifstream in(amr_fp.c_str());
+ if(!in) {
+ usage();
+ exit(EXIT_FAILURE);
+ }
+
+ std::string gene_id, gene, line;
+ while(std::getline(in, line)) {
+ std::size_t gene_idx = line.find(" ");
+
+ if(gene_idx != std::string::npos)
+ gene_id = line.substr(1, gene_idx-1);
+ else
+ gene_id = line.substr(1, line.length());
+
+ std::getline(in, gene);
+ records.push_back(FastaRecord(gene_id, gene));
+ }
+ in.close();
+
+ FastaRecord::sort_by_gene_id(records);
+}
+
+
+
+
+
+
diff -r 000000000000 -r f95150c37d38 gene_fraction/src/Fasta.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gene_fraction/src/Fasta.h Sun Feb 21 23:31:55 2016 -0500
@@ -0,0 +1,28 @@
+#ifndef FASTA_H
+#define FASTA_H
+
+#include
+#include
+#include "FastaRecord.h"
+
+/**
+ * Class for dealing with fasta files
+ */
+class Fasta {
+public:
+ /**
+ * Constructor that sets amr file path
+ */
+ Fasta(std::string amr_fp);
+
+ /**
+ * Reads fasta file from file path
+ */
+ void read_fasta(const std::string &amr_fp);
+
+ std::vector records;
+private:
+ std::string _amr_fp;
+};
+
+#endif /* FASTA_H */
diff -r 000000000000 -r f95150c37d38 gene_fraction/src/Fasta.o
Binary file gene_fraction/src/Fasta.o has changed
diff -r 000000000000 -r f95150c37d38 gene_fraction/src/FastaRecord.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gene_fraction/src/FastaRecord.cpp Sun Feb 21 23:31:55 2016 -0500
@@ -0,0 +1,62 @@
+#include "FastaRecord.h"
+
+#include
+
+FastaRecord::FastaRecord(std::string gene_id, std::string gene) :
+ _gene_id(gene_id), _gene(gene), _base_hits(_gene.length(), 0),
+ _gene_hits(0) {}
+
+std::string FastaRecord::gene_id() const { return _gene_id; }
+
+std::string FastaRecord::gene() const { return _gene; }
+
+void FastaRecord::update_gene_hits() {
+ _gene_hits++;
+}
+
+int FastaRecord::gene_hits() const {
+ return _gene_hits;
+}
+
+int FastaRecord::get_base_hits() const {
+ return static_cast(count(_base_hits.begin(), _base_hits.end(), 1));
+}
+
+int FastaRecord::find_gene(const std::vector &records,
+ const std::string &gene_id, std::string seq) {
+ int gene_index;
+
+ std::vector::const_iterator low;
+ // binary search for fasta record index
+ low = std::lower_bound(records.begin(), records.end(), FastaRecord(gene_id, seq),
+ [](const FastaRecord &a, const FastaRecord &b)
+ { return a.gene_id() < b.gene_id(); });
+ gene_index = (low - records.begin());
+
+ return gene_index;
+}
+
+void FastaRecord::sort_by_gene_id(std::vector &records) {
+ // sort records by gene id
+ sort(records.begin(), records.end(), [](const FastaRecord &a, const FastaRecord &b) { return a.gene_id() < b.gene_id(); });
+}
+
+void FastaRecord::reset_base_hits(std::vector &records) {
+ for_each(records.begin(), records.end(), [](FastaRecord &a) { std::fill(a.base_hits().begin(), a.base_hits().end(), 0); });
+}
+
+void FastaRecord::reset_gene_hits(std::vector &records) {
+ for_each(records.begin(), records.end(), [](FastaRecord &a) { a._gene_hits = 0; });
+}
+
+std::vector &FastaRecord::base_hits() {
+ return _base_hits;
+}
+
+
+
+
+
+
+
+
diff -r 000000000000 -r f95150c37d38 gene_fraction/src/FastaRecord.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gene_fraction/src/FastaRecord.h Sun Feb 21 23:31:55 2016 -0500
@@ -0,0 +1,86 @@
+#ifndef FASTA_RECORD_H
+#define FASTA_RECORD_H
+
+#include
+#include
+
+/**
+ * Class for dealing with fasta records
+ */
+class FastaRecord {
+public:
+ /**
+ * Ctor that initializes gene id and gene
+ */
+ FastaRecord(std::string gene_id, std::string gene);
+
+ /**
+ * Returns a string gene id
+ */
+ std::string gene_id() const;
+
+ /**
+ * Returns the gene associated with gene id
+ */
+ std::string gene() const;
+
+ /**
+ * Returns the total base hits for a gene
+ */
+ int get_base_hits() const;
+
+ /**
+ * Returns the amount of genes that were hit
+ * during the gene fraction calculation
+ */
+ int gene_hits() const;
+
+ /**
+ *
+ */
+ void update_base_hits(const int &index);
+
+ /**
+ *
+ */
+ void update_gene_hits();
+
+ /**
+ * Searches for a fasta record corresponding
+ * to gene id
+ */
+ static int find_gene(const std::vector &records,
+ const std::string &gene_id,
+ std::string seq = "");
+
+ /**
+ * Sorts fasta records by gene id
+ */
+ static void sort_by_gene_id(std::vector &records);
+
+ /**
+ * Resets base hits vector to 0's.
+ * This occurs after each sample is processed
+ */
+ static void reset_base_hits(std::vector &records);
+
+ /**
+ * Resets gene hits primitive to 0.
+ * This happens after each sample is processed
+ */
+ static void reset_gene_hits(std::vector &records);
+
+ std::vector &base_hits();
+
+ std::string _gene_id;
+ std::string _gene;
+ std::vector _base_hits;
+
+private:
+ int _gene_hits;
+};
+
+
+
+
+#endif /* FASTA_RECORD_H */
diff -r 000000000000 -r f95150c37d38 gene_fraction/src/FastaRecord.o
Binary file gene_fraction/src/FastaRecord.o has changed
diff -r 000000000000 -r f95150c37d38 gene_fraction/src/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gene_fraction/src/Makefile Sun Feb 21 23:31:55 2016 -0500
@@ -0,0 +1,14 @@
+output: main.o Sam.o Alignments.o Fasta.o FastaRecord.o
+ g++ -std=c++11 main.o Sam.o Alignments.o Fasta.o FastaRecord.o -o csa
+main.o: main.cpp
+ g++ -c -std=c++11 main.cpp
+Sam.o: Sam.cpp
+ g++ -c -std=c++11 Sam.cpp
+Alignments.o: Alignments.cpp
+ g++ -c -std=c++11 Alignments.cpp
+Fasta.o: Fasta.cpp
+ g++ -c -std=c++11 Fasta.cpp
+FastaRecord.o: FastaRecord.cpp
+ g++ -c -std=c++11 FastaRecord.cpp
+clean:
+ rm *.o csa
diff -r 000000000000 -r f95150c37d38 gene_fraction/src/Sam.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gene_fraction/src/Sam.cpp Sun Feb 21 23:31:55 2016 -0500
@@ -0,0 +1,41 @@
+#include "Sam.h"
+#include "args.h"
+#include "dir_util.h"
+#include "alignment_util.h"
+
+#include
+#include
+
+Sam::Sam(std::string sam_fp) : _sam_fp(sam_fp) {}
+
+void Sam::read_sam(cmd_args args) {
+ if(args.bam_stream) read_from_stdin();
+ else read_from_file(args.sam_fp);
+}
+
+void Sam::read_from_stdin() {
+ std::string line;
+ while(std::getline(std::cin, line)) {
+ if(line[0] == '@') continue;
+ alignment.push_back(line);
+ }
+}
+
+void Sam::read_from_file(const std::string &sam_fp) {
+ std::ifstream in(sam_fp.c_str());
+ if(!in) {
+ usage();
+ exit(EXIT_FAILURE);
+ }
+
+ std::string line;
+ while(getline(in, line)) {
+ if(line[0] == '@') continue;
+ if(is_good_alignment(line))
+ alignment.push_back(line);
+ }
+
+ in.close();
+}
+
+
diff -r 000000000000 -r f95150c37d38 gene_fraction/src/Sam.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gene_fraction/src/Sam.h Sun Feb 21 23:31:55 2016 -0500
@@ -0,0 +1,48 @@
+#ifndef SAM_H
+#define SAM_H
+
+#include
+#include
+
+#include "args.h"
+#include "Alignments.h"
+
+/**
+ * Class for dealing with sam files
+ */
+
+class Sam {
+public:
+ /**
+ * Ctor initializes sam file path
+ */
+ Sam(std::string sam_fp);
+ void read_sam(cmd_args args);
+
+ /**
+ * Reads sam file from stdin
+ */
+ void read_from_stdin();
+
+ /**
+ * Reads sam file from directory or file path
+ */
+ void read_from_file(const std::string &sam_fp);
+
+ /**
+ *
+ */
+ void read_from_dir(const std::string &sam_dir_fp);
+
+ std::vector alignment;
+
+private:
+ std::string _sam_fp;
+};
+
+
+
+
+
+#endif /* SAM_H */
+
diff -r 000000000000 -r f95150c37d38 gene_fraction/src/Sam.o
Binary file gene_fraction/src/Sam.o has changed
diff -r 000000000000 -r f95150c37d38 gene_fraction/src/SamRatio.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gene_fraction/src/SamRatio.h Sun Feb 21 23:31:55 2016 -0500
@@ -0,0 +1,248 @@
+#ifndef SAM_RATIO_H
+#define SAM_RATIO_H
+
+#include
+#include
+#include
+
+#include "FastaRecord.h"
+#include "Alignments.h"
+#include "args.h"
+
+typedef std::vector> cigar_str;
+
+/**
+ *
+ */
+struct header {
+ std::string level = "Level";
+ std::string iteration = "Iteration";
+ std::string gene_id = "Gene id";
+ std::string gene_fraction = "Gene Fraction";
+ std::string hits = "Hits";
+};
+
+/**
+ * Reports the total number of bases that were touched for each
+ * gene. This is largely calculated using the positional and seq
+ * information found in fields four and ten of each alignment
+ */
+void analyze_coverage(FastaRecord &record, Alignments &alignment) {
+ record.update_gene_hits();
+
+ cigar_str cigar = alignment.cigar();
+
+ int len;
+ char op;
+
+ int occurrence;
+ int pos_in_gene = alignment.pos();
+
+ int start, stop;
+ int base_hits = record._base_hits.size(); // func this
+ int read_length = alignment.seq().length(); //func this
+
+ if(pos_in_gene == 1) {
+ occurrence = 0;
+ for(int i = 0; i < cigar.size(); i++) {
+ len = cigar[i].first;
+ op = cigar[i].second;
+
+ switch(op) {
+ case 'M':
+ occurrence += len;
+ break;
+ case 'I':
+ occurrence += len;
+ break;
+ default:
+ break;
+ }
+ }
+
+ start = read_length - occurrence;
+ stop = start + read_length;
+
+ for(int i = start; i < base_hits; i++) {
+ if(i == stop) break;
+ record._base_hits[i] = 1;
+ }
+ }
+ else {
+ start = pos_in_gene - 1;
+ stop = start + read_length;
+
+ for(int i = start; i < base_hits; i++) {
+ if(i == stop) break;
+ record._base_hits[i] = 1;
+ }
+ }
+}
+
+/**
+ * Returns gene fraction of fasta record
+ * Returns -1 if gene fraction is not greater than threshold
+ */
+double coverage(const FastaRecord &record, const int &threshold) {
+ double gene_coverage;
+
+ int base_hits, gene_size;
+
+ base_hits = record.get_base_hits();
+ gene_size = record.gene().length();
+
+ gene_coverage = (static_cast(base_hits)/static_cast(gene_size))*100;
+
+ if(gene_coverage > threshold)
+ return gene_coverage;
+ return -1;
+}
+
+/**
+ * Writes header to output file when
+ * reading from stdin
+ */
+void bam_stream_header() {
+ header h;
+ char sep = ',';
+
+ std::cout << h.level << sep << h.iteration << sep
+ << h.gene_id << sep << h.gene_fraction << sep
+ << h.hits << '\n';
+}
+
+/**
+ * Writes header to output file when
+ * reading from sam file
+ */
+void file_header(const std::string &out_fp, const std::string &sam_fp) {
+ header h;
+ std::ofstream out(out_fp.c_str(), std::ofstream::app );
+ char sep = ',';
+
+ //out << "@" << sam_fp << '\n';
+ out << h.level << sep << h.iteration << sep
+ << h.gene_id << sep << h.gene_fraction << sep
+ << h.hits << '\n';
+ out.close();
+}
+
+/**
+ *
+ */
+void create_header(cmd_args &args) {
+ if(args.bam_stream) {
+ bam_stream_header();
+ }
+ else {
+ file_header(args.out_fp, args.sam_fp);
+ }
+}
+
+/**
+ * Writes results to output file when reading from
+ * stdin
+ */
+void bam_stream_results(std::vector &records,
+ const int &level, const int &iteration,
+ cmd_args &args) {
+
+ double gene_fraction;
+ int hits_seen;
+ std::string id;
+ char sep = ',';
+
+ for(auto &rec : records) {
+ gene_fraction = coverage(rec, args.threshold);
+ hits_seen = rec.gene_hits();
+ id = rec.gene_id();
+
+ if(gene_fraction > 0) {
+ std::cout << level << sep << iteration << sep
+ << id << sep << gene_fraction << sep
+ << hits_seen << '\n';
+ }
+ }
+}
+
+/**
+ * Write results when reading sam file from
+ * path
+ */
+void file_results(std::vector &records,
+ const int level, const int &iteration,
+ cmd_args &args) {
+
+ std::ofstream out(args.out_fp.c_str(), std::ofstream::app);
+
+ double gene_fraction;
+ int hits_seen;
+ std::string id;
+ char sep = ',';
+
+ for(auto &rec : records) {
+ gene_fraction = coverage(rec, args.threshold);
+ hits_seen = rec.gene_hits();
+ id = rec.gene_id();
+
+ if(gene_fraction > 0) {
+ out << level << sep << iteration << sep
+ << id << sep << gene_fraction << sep
+ << hits_seen << '\n';
+ }
+ }
+ out.close();
+}
+
+/**
+ *
+ */
+void report_results(std::vector &records,
+ const int level, const int &iteration,
+ cmd_args &args) {
+
+ if(args.bam_stream) {
+ bam_stream_results(records,level,iteration,args);
+ }
+ else {
+ file_results(records,level,iteration,args);
+ }
+}
+
+/**
+ * Generates a sequence of samples from sam file specified
+ * by the starting level, max level, and skip pattern
+ */
+void generate_samples(std::vector &records,
+ std::vector &alignments,
+ cmd_args &args) {
+
+ int read_count = alignments.size();
+ int sample_size;
+
+ srand(unsigned(time(0)));
+
+ std::vector sequence(read_count);
+ iota(sequence.begin(), sequence.end(), 0);
+
+ create_header(args);
+
+ for(int level = args.min; level <= args.max; level += args.skip) {
+ for(int sample = 0; sample < args.s_per_lev; sample++) {
+ random_shuffle(sequence.begin(), sequence.end(), randomize);
+ sample_size = round(((static_cast(level)/100)*read_count));
+ std::vector chosen(sequence.begin(), sequence.begin()+sample_size);
+
+ for(int a_idx = 0; a_idx < chosen.size(); a_idx++) {
+ std::string rname = alignments[chosen[a_idx]].rname();
+ int gene_idx = FastaRecord::find_gene(records, rname);
+ analyze_coverage(records[gene_idx], alignments[chosen[a_idx]]);
+ }
+ report_results(records,level,sample+1,args);
+ FastaRecord::reset_base_hits(records);
+ FastaRecord::reset_gene_hits(records);
+ }
+ }
+}
+
+#endif /* SAM_RATIO_H */
diff -r 000000000000 -r f95150c37d38 gene_fraction/src/alignment_util.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gene_fraction/src/alignment_util.h Sun Feb 21 23:31:55 2016 -0500
@@ -0,0 +1,96 @@
+#ifndef ALIGNMENT_UTIL_H
+#define ALIGNMENT_UTIL_H
+
+#include
+#include
+
+#include
+
+#include "int_util.h"
+
+// macro to check if read mapped
+#define READ_UNMAPPED 4
+
+/**
+ * Splits alignment into separate parts
+ */
+std::vector split_alignment(std::string &alignment) {
+ std::vector parts;
+
+ boost::trim_if(alignment, boost::is_any_of("\t "));
+ // split on tab delimeter
+ boost::split(parts, alignment, boost::is_any_of("\t "), boost::token_compress_on);
+
+ return parts;
+}
+
+/**
+ * Validates bit flag
+ */
+bool is_good_flag(const int &bit_flag) {
+ if( (bit_flag & READ_UNMAPPED) > 0) return false;
+ return true;
+}
+
+/**
+ * Validates rname
+ */
+bool is_good_rname(const std::string &rname) {
+ return rname.compare("*") != 0;
+}
+
+/**
+ * Validates pos
+ */
+bool is_good_pos(const int &pos) {
+ return pos > 0;
+}
+
+/**
+ * Validates cigar
+ */
+bool is_good_cigar(const std::string &cigar) {
+ return cigar.compare("*") != 0;
+}
+
+/**
+ * Validates seq
+ */
+bool is_good_seq(const std::string &seq) {
+ return seq.compare("*") != 0;
+}
+
+/**
+ * Validates alignment fields
+ */
+bool fields_are_good(std::vector &parts) {
+ int bit_flag = s_to_i(parts[1]);
+ int pos = s_to_i(parts[3]);
+
+ std::string rname = parts[2];
+ std::string cigar = parts[5];
+ std::string seq = parts[9];
+
+ if(!(is_good_flag(bit_flag))) return false;
+ if(!(is_good_pos(pos))) return false;
+ if(!(is_good_rname(rname))) return false;
+ if(!(is_good_cigar(cigar))) return false;
+ if(!(is_good_seq(seq))) return false;
+
+ return true;
+}
+
+/**
+ * Stores alignments that pass validity checks
+ */
+bool is_good_alignment(std::string &alignment) {
+ std::vector alignment_parts;
+
+ alignment_parts = split_alignment(alignment);
+
+ if(!(fields_are_good(alignment_parts)))
+ return false;
+ return true;
+}
+
+#endif /* ALIGNMENT_UTIL_H */
diff -r 000000000000 -r f95150c37d38 gene_fraction/src/args.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gene_fraction/src/args.h Sun Feb 21 23:31:55 2016 -0500
@@ -0,0 +1,90 @@
+#ifndef ARGS_H
+#define ARGS_H
+
+#include "int_util.h"
+
+#include
+#include
+#include
+
+static void usage() {
+ fprintf(stderr, "\n");
+ fprintf(stderr, "Program: Coverage Sampler \n");
+ fprintf(stderr, "Contact: Chris Dean sam_dir_fp;
+ std::string out_fp;
+
+ int threshold;
+ int min;
+ int max;
+ int skip;
+ int s_per_lev;
+
+ bool sam_dir = false; /* This will be set to true when parsing a
+ directory of sam files. */
+ bool bam_stream = false; /* This will be set to true when executing
+ samtools view -h example.bam | csa > output
+ from the command line. */
+};
+
+/**
+ * Returns a struct of command line arguments.
+ */
+static inline cmd_args
+parse_command_line(int argc, char *argv[]) {
+ std::vector args(argv, argv + argc);
+
+ cmd_args arg;
+
+ for(int i = 1; i < argc; i++) {
+ if(args[i].compare("-amr_fp") == 0)
+ arg.amr_fp = args[++i];
+ else if(args[i].compare("-sam_fp") == 0)
+ arg.sam_fp = args[++i];
+ else if(args[i].compare("-out_fp") == 0)
+ arg.out_fp = args[++i];
+ else if(args[i].compare("-t") == 0)
+ arg.threshold = s_to_i(args[++i]);
+ else if(args[i].compare("-min") == 0)
+ arg.min = s_to_i(args[++i]);
+ else if(args[i].compare("-max") == 0)
+ arg.max = s_to_i(args[++i]);
+ else if(args[i].compare("-skip") == 0)
+ arg.skip = s_to_i(args[++i]);
+ else if(args[i].compare("-samples") == 0)
+ arg.s_per_lev = s_to_i(args[++i]);
+ else if(args[i].compare("-d") == 0)
+ arg.sam_dir = true;
+ else if(args[i].compare("-bam") == 0)
+ arg.bam_stream = true;
+ else {
+ usage();
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ return arg;
+}
+
+#endif /* ARGS_H */
diff -r 000000000000 -r f95150c37d38 gene_fraction/src/dir_util.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gene_fraction/src/dir_util.h Sun Feb 21 23:31:55 2016 -0500
@@ -0,0 +1,43 @@
+#ifndef DIR_UTIL_H
+#define DIR_UTIL_H
+
+#include
+#include
+#include
+#include
+
+/*
+ * Parses a directory of sam files
+ */
+static inline std::list
+parse_sam_dir(const std::string &directory) {
+ DIR *dir;
+ struct dirent *ent;
+ std::string dir_path = directory;
+
+ dir = opendir(dir_path.c_str());
+ // is dir open/valid?
+ if(dir == NULL) {
+ std::cout << "Not a valid directory" << std::endl;
+ exit(EXIT_FAILURE);
+ }
+
+ std::list sam_files;
+ std::string fn;
+ std::string ext;
+ std::string file_type = ".sam";
+
+ // get all files with a .sam file extension
+ while((ent = readdir(dir)) != NULL) {
+ fn = dir_path + std::string(ent->d_name);
+ ext = fn.substr(fn.length()-4, fn.length());
+ if(ext.compare(file_type) == 0) {
+ sam_files.push_back(fn);
+ }
+ }
+ closedir(dir);
+
+ return sam_files;
+}
+
+#endif /* DIR_UTIL_H */
diff -r 000000000000 -r f95150c37d38 gene_fraction/src/gene_fraction.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gene_fraction/src/gene_fraction.xml Sun Feb 21 23:31:55 2016 -0500
@@ -0,0 +1,57 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r f95150c37d38 gene_fraction/src/int_util.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gene_fraction/src/int_util.h Sun Feb 21 23:31:55 2016 -0500
@@ -0,0 +1,29 @@
+#ifndef INT_UTIL_H
+#define INT_UTIL_H
+
+#include
+#include
+
+/**
+ * Given a string, return its integer.
+ */
+static inline int
+s_to_i(const std::string &s) {
+ std::istringstream ss(s);
+ int i;
+ ss >> i;
+ return i;
+}
+
+/**
+ * Given an integer, return a random number
+ * between 0 and i.
+ */
+static inline int
+randomize(const int &i) {
+ return rand() % i;
+}
+
+#endif /*INT_UTIL_H */
+
+
diff -r 000000000000 -r f95150c37d38 gene_fraction/src/main.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gene_fraction/src/main.cpp Sun Feb 21 23:31:55 2016 -0500
@@ -0,0 +1,37 @@
+#include
+#include
+#include
+
+#include "int_util.h"
+#include "dir_util.h"
+#include "args.h"
+#include "Fasta.h"
+#include "Sam.h"
+#include "SamRatio.h"
+
+using namespace std;
+
+int main(int argc, char *argv[]) {
+ cmd_args args;
+ args = parse_command_line(argc, argv);
+
+ Fasta f(args.amr_fp);
+ f.read_fasta(args.amr_fp);
+
+ if(args.sam_dir) {
+ list sam_files = parse_sam_dir(args.sam_fp);
+ for(auto &fn : sam_files) {
+ args.sam_fp = fn;
+ Sam s(args.sam_fp);
+ s.read_sam(args);
+ generate_samples(f.records, s.alignment, args);
+ }
+ }
+ else {
+ Sam s(args.sam_fp);
+ s.read_sam(args);
+ generate_samples(f.records, s.alignment, args);
+ }
+
+ return 0;
+}
diff -r 000000000000 -r f95150c37d38 gene_fraction/src/main.o
Binary file gene_fraction/src/main.o has changed
diff -r 000000000000 -r f95150c37d38 gene_fraction/src/ref.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gene_fraction/src/ref.fa Sun Feb 21 23:31:55 2016 -0500
@@ -0,0 +1,2 @@
+>seq1
+AAAAAAAAAA
diff -r 000000000000 -r f95150c37d38 gene_fraction/src/res
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gene_fraction/src/res Sun Feb 21 23:31:55 2016 -0500
@@ -0,0 +1,3 @@
+@test.sam
+Level,Iteration,Gene id,Gene Fraction,Hits
+100,1,seq1,50,1
diff -r 000000000000 -r f95150c37d38 gene_fraction/src/test.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gene_fraction/src/test.sam Sun Feb 21 23:31:55 2016 -0500
@@ -0,0 +1,1 @@
+QNAME 0 seq1 3 0 5M 0 0 0 AAAAA ;;;;;