Repository 'extract_min_max_lines'
hg clone https://toolshed.g2.bx.psu.edu/repos/bebatut/extract_min_max_lines

Changeset 0:90fc00b34716 (2016-04-15)
Commit message:
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/extract_min_max_lines commit 718c006213709b631862b8d6d655fafc92e79ef7-dirty
added:
extract_min_max_lines.py
extract_min_max_lines.xml
test-data/input_file.tabular
test-data/output_test_col_3_max.tabular
test-data/output_test_col_3_min.tabular
test-data/output_test_col_4_max.tabular
test-data/output_test_col_4_min.tabular
b
diff -r 000000000000 -r 90fc00b34716 extract_min_max_lines.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/extract_min_max_lines.py Fri Apr 15 07:59:28 2016 -0400
[
@@ -0,0 +1,63 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+import os
+import argparse
+import re
+import time
+
+def extract_lines(input_content, column_id, extraction_type, extraction_nb):
+    conserved_lines = []
+    for line in input_content:
+        split_line = line[:-1].split('\t')
+        value = float(split_line[column_id])
+
+        if len(conserved_lines) < extraction_nb:
+            conserved_lines.append(split_line)
+        else:
+            best_pos = None
+            #print value
+            #print conserved_lines
+            for i in range(len(conserved_lines)-1,-1,-1):
+                compared_value = float(conserved_lines[i][column_id])
+                if extraction_type(value, compared_value) == value:
+                    print value, compared_value, extraction_type(value, compared_value)
+                    best_pos = i
+                else:
+                    break
+            if best_pos != None:
+                print best_pos
+                tmp_conserved_lines = conserved_lines
+                conserved_lines = tmp_conserved_lines[:best_pos]
+                conserved_lines += [split_line]
+                conserved_lines += tmp_conserved_lines[best_pos:-1]
+                print conserved_lines
+                print 
+    return conserved_lines
+
+def extract_min_max_lines(args):
+    if args.extraction_type == 'max':
+        extraction_type = max
+    elif args.extraction_type == 'min':
+        extraction_type = min
+
+    with open(args.input_file, 'r') as input_file:
+        input_content = input_file.readlines()
+        conserved_lines = extract_lines(input_content, args.column_id - 1, 
+            extraction_type, args.extraction_nb)
+
+    with open(args.output_file, 'w') as output_file:
+        for line in conserved_lines:
+            output_file.write('\t'.join(line) + "\n")
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--input_file', required=True)
+    parser.add_argument('--output_file', required=True)
+    parser.add_argument('--column_id', required=True, type=int)
+    parser.add_argument('--extraction_type', required=True, choices = ['min','max'])
+    parser.add_argument('--extraction_nb', required=True, type=int)
+    args = parser.parse_args()
+
+    extract_min_max_lines(args)
\ No newline at end of file
b
diff -r 000000000000 -r 90fc00b34716 extract_min_max_lines.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/extract_min_max_lines.xml Fri Apr 15 07:59:28 2016 -0400
[
@@ -0,0 +1,80 @@
+<tool id="extract_min_max_lines" name="Extract lines corresponding" version="0.1.0">
+    <description>to minimum and maximum values of a column</description>
+
+    <requirements>
+    </requirements>
+
+    <stdio>
+    </stdio>
+
+    <version_command></version_command>
+
+    <command><![CDATA[
+        python $__tool_directory__/extract_min_max_lines.py
+            --input_file $input_file
+            --output_file $output_file
+            --column_id $column_id
+            --extraction_type $extraction_type
+            --extraction_nb $extraction_nb
+    ]]></command>
+
+    <inputs>
+        <param name="input_file" type="data" format="tabular,tsv,csv" label="Input file" help="File in tabular format with tab-separated columns (--input_file)"/>
+
+        <param name="column_id" type="data_column" data_ref="input_file" label="Column containing data to extract minimum or maximum values" multiple="false" numerical="true" help="(--column_id)"/>
+
+        <param name="extraction_type" label="Type of values to extract lines" type="select" help="(--extraction_type)">
+            <option value="min" selected="True">Minimal values</option>
+            <option value="max">Maximal values</option>
+        </param>
+
+        <param name="extraction_nb" type="integer" value="10" label="Number of lines to extract" help="(--extraction_nb)"/>
+    </inputs>
+
+    <outputs>
+        <data name="output_file" format="tabular"
+            label="${tool.name} on ${on_string}: Extracted lines" />
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="input_file" value="input_file.tabular"/>
+            <param name="column_id" value="3"/>
+            <param name="extraction_type" value="min"/>
+            <param name="extraction_nb" value="10"/>
+            <output name="output_file" file="output_test_col_3_min.tabular"/>
+        </test>
+        <test>
+            <param name="input_file" value="input_file.tabular"/>
+            <param name="column_id" value="3"/>
+            <param name="extraction_type" value="max"/>
+            <param name="extraction_nb" value="10"/>
+            <output name="output_file" file="output_test_col_3_max.tabular"/>
+        </test>
+        <test>
+            <param name="input_file" value="input_file.tabular"/>
+            <param name="column_id" value="4"/>
+            <param name="extraction_type" value="min"/>
+            <param name="extraction_nb" value="10"/>
+            <output name="output_file" file="output_test_col_4_min.tabular"/>
+        </test>
+        <test>
+            <param name="input_file" value="input_file.tabular"/>
+            <param name="column_id" value="4"/>
+            <param name="extraction_type" value="max"/>
+            <param name="extraction_nb" value="10"/>
+            <output name="output_file" file="output_test_col_4_max.tabular"/>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+**What it does**
+
+This tool extract a variable number of lines corresponding to minimum or maximum values of a chosen column.
+
+The file must be in tabular format with tabular separated columns. To chosen column to extract minimum or maximum values must be data columns.
+    ]]></help>
+
+    <citations>
+    </citations>
+</tool>
\ No newline at end of file
b
diff -r 000000000000 -r 90fc00b34716 test-data/input_file.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input_file.tabular Fri Apr 15 07:59:28 2016 -0400
b
b'@@ -0,0 +1,44933 @@\n+UniRef50_F0RML5\tPeptidoglycan glycosyltransferase\t0.000181399063257\t0.023462188863\t0.0232807897997\n+UniRef50_D9SMK5\t4Fe 4S ferredoxin iron sulfur binding domain containing protein\t0.000974298729943\t0.00104942971233\t7.5130982387e-05\n+UniRef50_UPI000289C850\tglutamine synthetase\t3.02071076987e-06\t1.13251396637e-05\t8.30442889383e-06\n+UniRef50_UPI0003344A46\tPREDICTED\t2.92448096954e-06\t1.23166316876e-05\t9.39215071806e-06\n+UniRef50_A5MZ45\tCheA2\t0.000168607563575\t0.00156010359401\t0.00139149603044\n+UniRef50_Q2LVL0\tLipid A export ATP binding permease protein MsbA\t1.24797999013e-05\t2.11666856208e-05\t8.6868857195e-06\n+UniRef50_P31134\tPutrescine transport ATP binding protein PotG\t0.00276274252245\t0.0013215449275\t-0.00144119759495\n+UniRef50_T1T0E7\t\t0.000113062785186\t0.00014719447673\t3.4131691544e-05\n+UniRef50_B1ES30\t\t4.65083689682e-05\t7.40274272935e-05\t2.75190583253e-05\n+UniRef50_U5FV07\t\t6.97112841074e-06\t1.69093447272e-05\t9.93821631646e-06\n+UniRef50_UPI00037AE754\thypothetical protein\t4.96975012537e-05\t7.77591897983e-05\t2.80616885446e-05\n+UniRef50_E3EZF3\t\t3.7834529758e-05\t5.98806720443e-05\t2.20461422863e-05\n+UniRef50_UPI00040F4463\tMULTISPECIES\t0.000347916254551\t0.000538916925611\t0.00019100067106\n+UniRef50_A6TTD6\tXanthine phosphoribosyltransferase\t3.29952916783e-05\t8.400468316e-05\t5.10093914817e-05\n+UniRef50_UPI00047775E1\tosmotically inducible protein C\t3.20342739866e-05\t0.000143696613898\t0.000111662339911\n+UniRef50_UPI00035F4F82\thypothetical protein\t1.38179068894e-05\t2.13437062856e-05\t7.5257993962e-06\n+UniRef50_V6QDM9\t\t0.00879068711831\t0.000649365504839\t-0.00814132161347\n+UniRef50_UPI000366B462\thypothetical protein\t8.96520934309e-06\t5.13157979404e-06\t-3.83362954905e-06\n+UniRef50_UPI0003A50DF1\thypothetical protein\t1.78941032845e-05\t2.0766777608e-05\t2.8726743235e-06\n+UniRef50_A7X4V0\tATP synthase subunit c\t0.00200926083898\t0.00378523004621\t0.00177596920723\n+UniRef50_A6M004\tSignal transduction histidine kinase, nitrogen specific, NtrB\t0.00056455066055\t0.00112908749909\t0.00056453683854\n+UniRef50_Q6G4Z3\tRibosomal RNA large subunit methyltransferase E\t9.91937485213e-05\t4.15778712915e-05\t-5.76158772298e-05\n+UniRef50_UPI00031D9BC6\thypothetical protein\t1.21726134302e-05\t5.23901784827e-06\t-6.93359558193e-06\n+UniRef50_K6GJU3\tPyridoxal phosphate dependent protein\t5.22036525674e-06\t7.45619049448e-06\t2.23582523774e-06\n+UniRef50_UPI0004781473\tppGpp synthetase\t7.9055593981e-06\t6.36800227798e-06\t-1.53755712012e-06\n+UniRef50_A7MGX5\ttRNA  methyltransferase TrmJ\t0.00212961214357\t0.000294172368815\t-0.00183543977476\n+UniRef50_Q4JXU5\tAminomethyltransferase\t2.61473105294e-05\t2.98878317567e-05\t3.7405212273e-06\n+UniRef50_G3A3C4\t\t2.43165574186e-05\t5.52071853911e-05\t3.08906279725e-05\n+UniRef50_L7WPW4\tCation efflux family protein\t0.0220787994494\t0.00729898175619\t-0.0147798176932\n+UniRef50_Q4JXJ2\tCysteine  tRNA ligase\t1.46406253943e-05\t1.21697869621e-05\t-2.4708384322e-06\n+UniRef50_C1KWN7\tGTPase Der\t0.0221045853772\t0.0112808239108\t-0.0108237614664\n+UniRef50_UPI0002003BFB\ttranscriptional regulator, partial\t7.87344330917e-06\t1.02617563671e-05\t2.38831305793e-06\n+UniRef50_A6M2Q1\tM18 family aminopeptidase\t0.000278330413639\t0.00197325854824\t0.0016949281346\n+UniRef50_UPI00037CE245\thypothetical protein\t1.19456994074e-05\t2.06099674676e-05\t8.6642680602e-06\n+UniRef50_Q8CTD2\t\t0.00400791961979\t0.000845403015075\t-0.00316251660472\n+UniRef50_N6UZ26\t\t0.000184609644252\t0.000174056997165\t-1.0552647087e-05\n+UniRef50_Q8CTD8\t\t0.00467178630856\t0.00176140084145\t-0.00291038546711\n+UniRef50_Q213B4\tAspartate racemase\t0.00420809284222\t0.00254295130469\t-0.00166514153753\n+UniRef50_S9QQA7\tpH adaptation potassium efflux system a\t8.62911388131e-06\t8.32021938689e-06\t-3.0889449442e-07\n+UniRef50_L8PMS9\tPutative secreted protein\t6.8034549289e-07\t9.4093242251e-06\t8.72897873221e-06\n+UniRef50_P0AFQ9\t\t0.00385881354037\t0.000873804337357\t-0.00298500920301\n+UniRef50_UPI000366BC2B\tMULTISPECIES\t1.19391317174e-05\t1.78601929922e-05\t5.9210612748e-06\n+UniRef50_Q49VL4\t\t0.00958062041709\t0.006875478517'..b'PI0003808AE5\thypothetical protein\t8.08324761341e-06\t2.29618747517e-05\t1.48786271383e-05\n+UniRef50_UPI0003B5E6AE\tpeptidase M24\t3.89856407794e-06\t1.50807185631e-05\t1.11821544852e-05\n+UniRef50_UPI0003B421A5\theme ABC transporter ATP binding protein\t5.81287411282e-05\t5.79419619714e-05\t-1.867791568e-07\n+UniRef50_UPI0003753359\thypothetical protein\t1.61072015283e-05\t3.42710038439e-06\t-1.26801011439e-05\n+UniRef50_P0AB04\t\t0.00194950393666\t0.0013505102435\t-0.00059899369316\n+UniRef50_UPI0002A4C202\t\t5.55525589308e-05\t2.72340887632e-05\t-2.83184701676e-05\n+UniRef50_UPI00036E9616\talpha L fucosidase\t1.54419626849e-05\t4.72784427132e-05\t3.18364800283e-05\n+UniRef50_A0A017Y7E3\t\t2.9767910727e-05\t4.46189247071e-05\t1.48510139801e-05\n+UniRef50_Q97FA7\t\t0.000297920703939\t0.00212965617169\t0.00183173546775\n+UniRef50_A6LTV0\tFibronectin, type III domain protein\t0.000232107329756\t0.000968568529317\t0.000736461199561\n+UniRef50_UPI000463D2BB\toxidoreductase\t5.61898751659e-06\t1.72615614073e-05\t1.16425738907e-05\n+UniRef50_UPI000219711C\tbranched chain amino acid transporter II carrier protein\t1.58941721423e-05\t1.26151252564e-05\t-3.2790468859e-06\n+UniRef50_K0S2S0\t\t2.86793669097e-05\t0.000499802341001\t0.000471122974091\n+UniRef50_Q4L709\tPorphobilinogen deaminase\t0.00787394947315\t0.00123977484846\t-0.00663417462469\n+UniRef50_P45174\tSodium proline symporter\t0.00172509854048\t0.0132769626526\t0.0115518641121\n+UniRef50_UPI0004294A5C\thypothetical protein\t2.3854643408e-05\t0.000161567506456\t0.000137712863048\n+UniRef50_UPI0004730D89\thypothetical protein\t5.97964782623e-05\t5.05455149912e-06\t-5.47419267632e-05\n+UniRef50_K8Z2D7\t\t1.3812899602e-05\t2.46549338392e-05\t1.08420342372e-05\n+UniRef50_P67446\tXanthine permease XanQ\t0.00379919491288\t0.00221854695027\t-0.00158064796261\n+UniRef50_F3L0R4\tUPF0246 protein YaaA \t1.34689232179e-05\t1.58400623849e-05\t2.371139167e-06\n+UniRef50_Q8XKT3\tDegV domain containing protein CPE1310\t0.000454116200659\t0.000890132602642\t0.000436016401983\n+UniRef50_UPI0003B581A8\ttaurine  pyruvate aminotransferase\t4.74853506028e-05\t1.65980319741e-05\t-3.08873186287e-05\n+UniRef50_UPI0004766D5E\tmalyl CoA thiolesterase\t0.00010122795894\t4.47789761493e-05\t-5.64489827907e-05\n+UniRef50_E3D1E7\t\t0.000285654843523\t0.00126361117974\t0.000977956336217\n+UniRef50_UPI0004692352\tpeptide ABC transporter permease\t5.40927499806e-06\t2.43665928334e-05\t1.89573178353e-05\n+UniRef50_W3PE26\tDnaK domain protein \t9.47651841946e-05\t6.37123709624e-05\t-3.10528132322e-05\n+UniRef50_UPI000371AEC6\thypothetical protein, partial\t3.70251797835e-05\t2.14913216992e-05\t-1.55338580843e-05\n+UniRef50_UPI000441DE45\tPREDICTED\t1.2652266455e-05\t1.60395922604e-06\t-1.1048307229e-05\n+UniRef50_M1MSX4\tPhage infection protein Pip\t0.000482315278466\t0.00128096311338\t0.000798647834914\n+UniRef50_G7L1M9\t50S ribosomal protein L22\t8.40541386484e-06\t3.14240493409e-05\t2.30186354761e-05\n+UniRef50_A8G2K7\t\t0.000566697337317\t0.00294743438332\t0.002380737046\n+UniRef50_V9U122\t\t0.00037836467757\t0.000260790930809\t-0.000117573746761\n+UniRef50_Q81WF1\tCarbamoyl phosphate synthase small chain\t0.000336544305143\t0.00244579174399\t0.00210924743885\n+UniRef50_Q64QH6\t\t1.77190352826e-06\t3.10012895916e-06\t1.3282254309e-06\n+UniRef50_UPI0003722F98\thypothetical protein\t3.11076713415e-06\t5.97056521558e-06\t2.85979808143e-06\n+UniRef50_V5VGJ6\tMembrane fusion protein\t8.51472155251e-05\t0.00611497234421\t0.00602982512868\n+UniRef50_R4REL3\tPeptidase family M48 family\t0.000608546229884\t0.000412069821193\t-0.000196476408691\n+UniRef50_UPI0003B6D53E\tMarR family transcriptional regulator, partial\t1.19401063788e-05\t4.26634331797e-05\t3.07233268009e-05\n+UniRef50_UPI000371A422\thypothetical protein\t0.000149729190961\t3.76661214999e-05\t-0.000112063069461\n+UniRef50_A6LRF3\tTranscriptional regulator, RpiR family\t0.000316507820176\t0.0015608070061\t0.00124429918592\n+UniRef50_A1TJ24\tPotassium transporting ATPase A chain\t0.00814353413742\t0.0122663514283\t0.00412281729088\n+UniRef50_B9DNM2\tPutative pyruvate, phosphate dikinase regulatory protein\t0.0224430959464\t0.0040564575784\t-0.018386638368\n'
b
diff -r 000000000000 -r 90fc00b34716 test-data/output_test_col_3_max.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_test_col_3_max.tabular Fri Apr 15 07:59:28 2016 -0400
b
@@ -0,0 +1,10 @@
+UniRef50_P19529 Replication initiation protein 0.961640316151 0.326879196825 -0.634761119326
+UniRef50_Q5HJZ6 Plasmid recombination enzyme type 3 0.740800531361 0.244512495215 -0.496288036146
+UniRef50_P02983 Tetracycline resistance protein 0.573449640169 0.199788693154 -0.373660947015
+UniRef50_Q93GF3 Rep 0.410004761703 0.131513750747 -0.278491010956
+UniRef50_V6QG63 Integrase 0.340495265028 0.114492453346 -0.226002811682
+UniRef50_W1W6K4 0.278232683798 0.0628932216423 -0.215339462156
+UniRef50_D4FM51 Plasmid recombination enzyme 0.240102320308 0.0660335100375 -0.17406881027
+UniRef50_Z6ILY0 0.226654902022 0.0411737778722 -0.18548112415
+UniRef50_F0P516 Replication initiation protein, truncated 0.219429720121 0.0852047566562 -0.134224963465
+UniRef50_Q8CU99 0.194123181874 0.0585038462355 -0.135619335638
b
diff -r 000000000000 -r 90fc00b34716 test-data/output_test_col_3_min.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_test_col_3_min.tabular Fri Apr 15 07:59:28 2016 -0400
b
@@ -0,0 +1,10 @@
+UniRef50_D3E2A1 Adhesin like protein 9.04497122279e-08 2.28645810921e-07 1.38196098693e-07
+UniRef50_A0A011N6I9 1.51918001834e-07 4.01161738419e-07 2.49243736585e-07
+UniRef50_UPI0004446091 PREDICTED 1.55078193737e-07 2.42316800842e-07 8.7238607105e-08
+UniRef50_W7A2A5 1.56660464008e-07 3.52580982773e-07 1.95920518765e-07
+UniRef50_UPI000349BE1A hypothetical protein 1.75130162644e-07 1.32946712678e-06 1.15433696414e-06
+UniRef50_UPI0001BF6B99 90S preribosome component RRP12 1.84742307319e-07 4.34052266133e-06 4.15578035401e-06
+UniRef50_UPI000443E2D6 PREDICTED 1.90475579618e-07 3.42990413301e-07 1.52514833683e-07
+UniRef50_R4LEH4 Yd repeat containing protein 1.97864528112e-07 3.11608997065e-07 1.13744468953e-07
+UniRef50_UPI000378A614 hypothetical protein 2.00347906511e-07 1.1435229597e-06 9.43175053189e-07
+UniRef50_X2D8N8 CCR4 NOT transcription complex subunit 1 like protein 2.073948527e-07 1.64110422442e-05 1.62036473915e-05
b
diff -r 000000000000 -r 90fc00b34716 test-data/output_test_col_4_max.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_test_col_4_max.tabular Fri Apr 15 07:59:28 2016 -0400
b
@@ -0,0 +1,10 @@
+UniRef50_Q9R3J0 Transposase, putative 0.00225014042765 1.10930408506 1.10705394463
+UniRef50_F0RKJ6 Transposase IS4 family protein 0.00427372358825 0.871496431206 0.867222707618
+UniRef50_UPI0000164CF6 hypothetical protein DR_1761 0.000366361200249 0.367022323052 0.366655961852
+UniRef50_Q9RYQ5 Resolvase, putative 0.000674082122953 0.361753596131 0.361079514008
+UniRef50_P19529 Replication initiation protein 0.961640316151 0.326879196825 -0.634761119326
+UniRef50_F0RR64 0.000846315859607 0.301362181458 0.300515865598
+UniRef50_Q9RV33 0.000368878816464 0.284296397807 0.283927518991
+UniRef50_Q5HJZ6 Plasmid recombination enzyme type 3 0.740800531361 0.244512495215 -0.496288036146
+UniRef50_UPI0000164CD9 putative transposase 0.00104009031306 0.224490351769 0.223450261456
+UniRef50_Q9RYR2 Extracellular solute binding protein, family 5 0.00106094764695 0.224438113631 0.223377165984
b
diff -r 000000000000 -r 90fc00b34716 test-data/output_test_col_4_min.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_test_col_4_min.tabular Fri Apr 15 07:59:28 2016 -0400
b
@@ -0,0 +1,10 @@
+UniRef50_F0YEL0 9.51325576245e-06 1.51416281297e-07 -9.36183948115e-06
+UniRef50_A0A058ZAA4 6.36876267111e-06 1.95527719833e-07 -6.17323495128e-06
+UniRef50_W4XFJ9 6.76030182537e-07 2.06555424425e-07 -4.69474758112e-07
+UniRef50_D3E2A1 Adhesin like protein 9.04497122279e-08 2.28645810921e-07 1.38196098693e-07
+UniRef50_UPI0004446091 PREDICTED 1.55078193737e-07 2.42316800842e-07 8.7238607105e-08
+UniRef50_UPI00036AFA97 hypothetical protein 1.94826634309e-05 2.65435474198e-07 -1.92172279567e-05
+UniRef50_UPI0003C3A0E4 PREDICTED 1.06134224488e-06 2.68380106598e-07 -7.92962138282e-07
+UniRef50_A0A024JLS1 Similar to Saccharomyces cerevisiae YLR106C MDN1 Huge dynein related AAA type ATPase  1.04926977865e-06 2.78766826282e-07 -7.70502952368e-07
+UniRef50_UPI00031ABCD6 hypothetical protein 2.0828158654e-06 2.92321189728e-07 -1.79049467567e-06
+UniRef50_F0VBH0 1.74170711526e-05 3.07747069636e-07 -1.7109324083e-05