Mercurial > repos > iuc > deg_annotate
diff deg_annotate.py @ 1:e98d4ab5b5bc draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deg_annotate commit 44d3dae188cabf4a64dee7c1ebe41c855d95d1b0
author | iuc |
---|---|
date | Wed, 23 Jan 2019 07:47:22 -0500 |
parents | b42373cddb77 |
children |
line wrap: on
line diff
--- a/deg_annotate.py Fri Nov 23 01:59:47 2018 -0500 +++ b/deg_annotate.py Wed Jan 23 07:47:22 2019 -0500 @@ -64,7 +64,7 @@ pass bed_entries = [] - # create BED lines only for deseq output + # create BED lines only for dexeq output if input_type == "dexseq": for txid in exon_pos.keys(): starts = sorted(exon_pos[txid]) @@ -87,8 +87,8 @@ parser.add_argument('-in', '--input', required=True, help='DESeq2/DEXSeq output. It is allowed to have extra information, ' 'but make sure that the original output columns are not altered') - parser.add_argument('-m', '--mode', required=True, choices=["deseq2", "dexseq"], default='deseq2', - help='Input file type') + parser.add_argument('-m', '--mode', required=True, choices=["degseq", "dexseq"], + default='degseq', help='Input file type') parser.add_argument('-g', '--gff', required=True, help='The same annotation GFF/GTF file used for couting') parser.add_argument('-t', '--type', default='exon', required=False, help='feature type (3rd column in GFF file) to be used (default: exon)') @@ -146,19 +146,10 @@ with open(args.input) as fh_input, open(args.output, 'w') as fh_output: for line in fh_input: annot = [] - # Append the extra information from GFF to DESeq2 output - if args.mode == "deseq2": - geneid = line.split('\t')[0] - annot = [str(annotation[geneid]['chr']), - str(annotation[geneid]['start']), - str(annotation[geneid]['end']), - str(annotation[geneid]['strand'])] - for a in attr: - annot.append(annotation[geneid][a]) # DEXSeq exonic bins might originate from aggrigating multiple genes. They are are separated by '+' # Append the attributes from the GFF but keep the order of the aggregated genes and use '+' # Aappend the transcript id and exon number from the annotation that correspond to the DEXseq counting bins - elif args.mode == "dexseq": + if args.mode == "dexseq": geneids = line.split('\t')[1].split('+') for a in attr: tmp = [] @@ -171,6 +162,15 @@ annot.append(','.join(sorted(set(d_binexon[binid])))) except KeyError: annot.append('NA') + # Append the extra information from GFF to DESeq2/edgeR/limma output + else: + geneid = line.split('\t')[0] + annot = [str(annotation[geneid]['chr']), + str(annotation[geneid]['start']), + str(annotation[geneid]['end']), + str(annotation[geneid]['strand'])] + for a in attr: + annot.append(annotation[geneid][a]) fh_output.write(line.rstrip('\n') + '\t' + '\t'.join(annot) + '\n')