Mercurial > repos > jjohnson > contig_annotation_tool
changeset 2:9a01840eac52 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 6b978e61a7f66160f2577d907f52dd641f103986-dirty
author | jjohnson |
---|---|
date | Mon, 25 Nov 2019 15:09:24 -0500 |
parents | 86cd2e70b0dc |
children | 18ece3d5bcde |
files | cat_bins.xml macros.xml tabpad.py |
diffstat | 3 files changed, 31 insertions(+), 12 deletions(-) [+] |
line wrap: on
line diff
--- a/cat_bins.xml Sun Nov 24 22:35:05 2019 -0500 +++ b/cat_bins.xml Mon Nov 25 15:09:24 2019 -0500 @@ -24,6 +24,10 @@ @USE_INTERMEDIATES@ @CUSTOM_SETTINGS@ && @TXT2TSV@ *.ORF2LCA.txt *.bin2classification.txt + #if len($mags) > 1: + #set pat = '$' + '{i/concatenated./}' + && (for i in *.concatenated.*; do ln -s "\$i" "${pat}"; done) + #end if @ADD_NAMES@ @SUMMARISE@ ]]></command>
--- a/macros.xml Sun Nov 24 22:35:05 2019 -0500 +++ b/macros.xml Mon Nov 25 15:09:24 2019 -0500 @@ -120,12 +120,12 @@ && CAT add_names $names.only_official $names.exclude_scores @CAT_TAXONOMY@ #if $bcat == 'CAT' - -i cat_output.contigs2classification.tsv + -i cat_output.contig2classification.tsv #else -i cat_output.bin2classification.tsv #end if -o classification_names.txt - && @TXT2TSV@ -i classification_names -o $classification_names + && @TXT2TSV@ -i classification_names.txt -o $classification_names #end if #if $names.add_names in ['orf2lca','both']: && CAT add_names $names.only_official $names.exclude_scores @@ -146,11 +146,11 @@ #if $names.add_names in ['classification','both'] and $names.only_official: #set $summary_input = $classification_names #else - #set $summary_input = classification_offical_names + #set $summary_input = 'classification_offical_names' && CAT add_names --only_official @CAT_TAXONOMY@ #if $bcat == 'CAT' - -i cat_output.contigs2classification.tsv + -i cat_output.contig2classification.tsv #else -i cat_output.bin2classification.tsv #end if @@ -168,7 +168,7 @@ <xml name="select_outputs"> <param name="select_outputs" type="select" multiple="true" optional="false" label="Select outputs"> - <option value="log" selected="true">CAT.log</option> + <option value="log" selected="true">log</option> <option value="predicted_proteins_faa" selected="true">predicted_proteins.faa</option> <option value="predicted_proteins_gff">predicted_proteins.gff</option> <option value="alignment_diamond">alignment.diamond</option> @@ -179,6 +179,8 @@ <xml name="select_cat_outputs"> <param name="bcat" type="hidden" value="CAT"/> <param name="seqtype" type="hidden" value="contig"/> + <param name="sum_titles" type="hidden" value="contigs,number of ORFs,number of positions"/> + <param name="bin_col" type="hidden" value=""/> <expand macro="select_outputs"> <option value="contig2classification" selected="true">contig2classification.txt</option> </expand> @@ -186,6 +188,8 @@ <xml name="select_bat_outputs"> <param name="bcat" type="hidden" value="BAT"/> <param name="seqtype" type="hidden" value="bin"/> + <param name="sum_titles" type="hidden" value="bins"/> + <param name="bin_col" type="hidden" value="bin,"/> <expand macro="select_outputs"> <option value="bin2classification" selected="true">bin2classification.txt</option> </expand> @@ -215,7 +219,7 @@ <filter>'orf2lca' in select_outputs</filter> <actions> <action name="comment_lines" type="metadata" default="1" /> - <action name="column_names" type="metadata" default="ORF,lineage,bit-score" /> + <action name="column_names" type="metadata" default="ORF,${bin_col}lineage,bit-score" /> </actions> </data> <data name="contig2classification" format="tabular" label="${bcat}.contig2classification.txt" from_work_dir="cat_output.contig2classification.tsv"> @@ -233,14 +237,14 @@ </actions> </data> <data name="orf2lca_names" format="tabular" label="${bcat}.ORF2LCA.names.txt"> - <filter>'orf2lca' in names.add_names</filter> + <filter>names['add_names'] in ['both','orf2lca']</filter> <actions> <action name="comment_lines" type="metadata" default="1" /> - <action name="column_names" type="metadata" default="ORF,lineage,bit-score,superkingdom,phylum,class,order,family,genus,species" /> + <action name="column_names" type="metadata" default="ORF,${bin_col}lineage,bit-score,superkingdom,phylum,class,order,family,genus,species" /> </actions> </data> <data name="classification_names" format="tabular" label="${bcat}.${seqtype}2classification.names.txt"> - <filter>'classification' in names.add_names</filter> + <filter>names['add_names'] in ['both','classification']</filter> <actions> <action name="comment_lines" type="metadata" default="1" /> <action name="column_names" type="metadata" default="${seqtype},classification,reason,lineage,lineage scores,superkingdom,phylum,class,order,family,genus,species" /> @@ -249,8 +253,8 @@ <data name="classification_summary" format="tabular" label="${bcat}.${seqtype}2classification.summary.txt"> <filter>'classification' in summarise</filter> <actions> - <action name="comment_lines" type="metadata" default="1" /> - <action name="column_names" type="metadata" default="rank,clade,number of contigs,number of ORFs,number of positions" /> + <action name="comment_lines" type="metadata" default="4" /> + <action name="column_names" type="metadata" default="rank,clade,number of ${sum_titles}" /> </actions> </data> </xml>
--- a/tabpad.py Sun Nov 24 22:35:05 2019 -0500 +++ b/tabpad.py Mon Nov 25 15:09:24 2019 -0500 @@ -7,13 +7,24 @@ def padfile(infile, outfile, fieldcnt=None): with open(infile, 'r') as fh: out = open(outfile, 'w') + commentlines = [] tabs = '\t' * fieldcnt if fieldcnt is not None else None - for i, txtline in enumerate(fh): + def pad_line(txtline, tabs=None): line = txtline.rstrip('\r\n') fields = line.split('\t') if not tabs: tabs = '\t' * len(fields) out.write('%s%s\n' % (line, tabs[len(fields):])) + for i, txtline in enumerate(fh): + if txtline.lstrip().startswith('#'): + commentlines.append(txtline) + else: + if commentlines: + for i in range(len(commentlines)-1): + out.write(commentlines[i]) + pad_line(commentlines[-1], tabs=tabs) + commentlines = [] + pad_line(txtline, tabs=tabs) out.close()