annotate FCStxtMergeDownsample.py @ 1:3c0e4179be7a draft default tip

"planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
author azomics
date Mon, 22 Jun 2020 17:42:26 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
1 #!/usr/bin/env python
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
2
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
3 ######################################################################
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
4 # Copyright (c) 2016 Northrop Grumman.
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
5 # All rights reserved.
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
6 ######################################################################
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
7
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
8 from __future__ import print_function
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
9 from __future__ import division
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
10 import sys
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
11 import os
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
12 import pandas as pd
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
13 from argparse import ArgumentParser
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
14
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
15
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
16 def is_number(s):
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
17 try:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
18 float(s)
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
19 return True
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
20 except ValueError:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
21 return False
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
22
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
23
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
24 def is_integer(s):
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
25 try:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
26 int(s)
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
27 return True
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
28 except ValueError:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
29 return False
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
30
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
31
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
32 def compare_headers(files):
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
33 headers = {}
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
34 for eachfile in files:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
35 with open(eachfile, "r") as ef:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
36 headers[eachfile] = ef.readline().strip().lower().split("\t")
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
37
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
38 hdgs_in_common = []
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
39 flag = {}
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
40
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
41 for ref_hdgs in headers[files[0]]:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
42 flag[ref_hdgs] = 1
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
43
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
44 for ij in range(1, len(files)):
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
45 if ref_hdgs in headers[files[ij]]:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
46 flag[ref_hdgs] += 1
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
47 if flag[ref_hdgs] == len(files):
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
48 hdgs_in_common.append(ref_hdgs)
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
49
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
50 if not hdgs_in_common:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
51 sys.exit(9)
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
52 return(hdgs_in_common)
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
53
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
54
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
55 def get_nb_lines(files):
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
56 tot_event = 0
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
57 for f in files:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
58 df = pd.read_table(f)
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
59 tot_event += (len(df.index) - 1)
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
60 return(tot_event)
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
61
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
62
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
63 def get_headers_index(list_headings, headings):
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
64 idxs = []
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
65 lhdgs = [x.lower() for x in headings]
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
66 for element in list_headings:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
67 idxs.append(int(lhdgs.index(element)))
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
68 return(idxs)
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
69
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
70
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
71 def merge_and_DS_txt(in_files, out_file, col_names, factor_ds):
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
72 """Concatenates together tab-separated files.
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
73 The output will have only the columns in common to all the files provided
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
74 as input, as determined by the headers.
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
75 All lines after the header line must contain only numbers.
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
76 Potential errors are logged to stderr. If the number of errors reaches 10,
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
77 the program stops.
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
78 If a downsampling factor is given, returns the indicated fraction of
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
79 random lines.
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
80 """
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
81
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
82 nb_errors = 0
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
83 max_error = 10
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
84
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
85 # get list of headers in common to all files
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
86 list_hdgs = compare_headers(in_files)
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
87 total_events = get_nb_lines(in_files)
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
88 total_final = total_events * ds_factor
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
89 nb_per_file = int(total_final / len(in_files))
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
90
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
91 with open(out_file, "w") as outf:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
92 ff_order = []
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
93 # HEADERS:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
94 with open(in_files[0], "r") as first_file:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
95 headings_ff = first_file.readline().strip()
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
96 headings = headings_ff.split("\t")
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
97 # Get index of headers in common:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
98 hdrs_idx = get_headers_index(list_hdgs, headings)
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
99
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
100 # If column to merge on were provided:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
101 if col_names:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
102 for ix in col_names:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
103 if ix not in hdrs_idx:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
104 nb_errors += 1
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
105 sys.stderr.write(" ".join(["WARNING: column", str(ix), "in", in_files[0],
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
106 "does not exist in all files or has a different header.\n"]))
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
107 if nb_errors == max_error:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
108 exit_code = 4
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
109 sys.stderr.write("Run aborted - too many errors.")
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
110 os.remove(out_file)
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
111 hdrs_idx = col_names
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
112
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
113 # Print out to output file:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
114 headings_to_write = []
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
115 for cti in range(0, len(headings)):
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
116 if cti in hdrs_idx:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
117 headings_to_write.append(headings[cti])
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
118 ff_order.append(headings[cti])
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
119 outf.write("\t".join(headings_to_write) + "\n")
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
120
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
121 # DATA
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
122 for infile in in_files:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
123 with open(infile, "r") as inf:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
124 headings_inf = inf.readline().strip()
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
125 hdgs = headings_inf.split("\t")
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
126 # Get the index of columns to keep:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
127 hdgs_idx = []
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
128 for ctc in ff_order:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
129 hdgs_idx.append(int(hdgs.index(ctc)))
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
130 if col_names:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
131 for iy in col_names:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
132 if iy not in hdgs_idx:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
133 nb_errors += 1
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
134 sys.stderr.write(" ".join(["WARNING: column", str(iy), "in", infile,
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
135 "does not exist in all files or has a different header.\n"]))
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
136 if nb_errors == max_error:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
137 exit_code = 4
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
138 sys.stderr.write("Run aborted - too many errors.")
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
139 os.remove(out_file)
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
140 hdgs_idx = col_names
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
141
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
142 df = pd.read_table(infile, usecols=hdrs_idx)
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
143 df_ds = df.sample(nb_per_file, replace=False)
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
144
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
145 for cols in df_ds.columns.values:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
146 if df_ds[cols].count() != len(df_ds[cols]):
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
147 sys.stderr.write(infile + "contains non-numeric data\n")
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
148
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
149 with open(infile, "r") as checkfile:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
150 fl = checkfile.readline()
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
151 count_lines = 1
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
152 for checklines in checkfile:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
153 to_check = checklines.strip().split("\t")
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
154 count_lines += 1
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
155 for item in to_check:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
156 if not is_number(item):
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
157 sys.stderr.write(" ".join(["WARNING: line", str(count_lines),
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
158 "in", infile, "contains non-numeric results\n"]))
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
159 sys.exit(2)
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
160
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
161 df_ds = df_ds.ix[:, ff_order]
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
162 df_ds.to_csv(outf, sep="\t", header=False, index=False)
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
163
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
164 if nb_errors > 0:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
165 exit_code = 3
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
166 if nb_errors == max_error:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
167 exit_code = 4
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
168 sys.stderr.write("Run aborted - too many errors.")
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
169 os.remove(out_file)
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
170 sys.exit(exit_code)
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
171 return
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
172
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
173
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
174 if __name__ == "__main__":
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
175 parser = ArgumentParser(
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
176 prog="FCStxtmerge",
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
177 description="Merge based on headers text-converted FCS files into one text file.")
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
178
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
179 parser.add_argument(
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
180 '-i',
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
181 dest="input_files",
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
182 required=True,
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
183 action='append',
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
184 help="File location for the text files.")
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
185
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
186 parser.add_argument(
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
187 '-o',
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
188 dest="output_file",
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
189 required=True,
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
190 help="Name of the output file.")
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
191
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
192 parser.add_argument(
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
193 '-c',
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
194 dest="columns",
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
195 help="Specify which column to keep in output file")
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
196
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
197 parser.add_argument(
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
198 '-d',
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
199 dest="downsampling_factor",
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
200 help="How much of each file to keep")
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
201
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
202 args = parser.parse_args()
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
203
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
204 # Get columns to merge on if any:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
205 default_value_col = ["i.e.:1,2,5", "default", "Default"]
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
206 columns = []
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
207 if args.columns:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
208 if args.columns not in default_value_col:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
209 tmp_col = args.columns.split(",")
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
210 if len(tmp_col) == 1:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
211 if not tmp_col[0].strip():
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
212 columns = []
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
213 elif not is_integer(tmp_col[0].strip()):
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
214 sys.exit(7)
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
215 else:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
216 columns.append(int(tmp_col[0].strip()) - 1)
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
217 else:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
218 for c in range(0, len(tmp_col)):
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
219 if not is_integer(tmp_col[c].strip()):
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
220 sys.exit(6)
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
221 else:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
222 columns.append(int(tmp_col[c].strip()) - 1)
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
223
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
224 # Get down sampling factor if any:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
225 # Note: change '%' to 'X' because somehow that's what Galaxy passes?
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
226 default_value_ds = ["i.e.:0.1 or 10X", "default", "Default"]
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
227 ds_factor = 0.1
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
228 if args.downsampling_factor:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
229 if args.downsampling_factor not in default_value_ds:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
230 args.downsampling_factor = args.downsampling_factor.strip()
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
231 downsampling_factor = args.downsampling_factor.rstrip("X")
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
232 if is_number(downsampling_factor):
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
233 ds_factor = float(downsampling_factor)
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
234 if ds_factor > 1 and ds_factor <= 100:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
235 ds_factor = float(downsampling_factor) / 100
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
236 elif ds_factor > 100 or ds_factor <= 0:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
237 sys.stderr.write(str(ds_factor))
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
238 sys.exit(8)
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
239 else:
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
240 sys.exit(8)
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
241
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
242 input_files = [f for f in args.input_files]
3c0e4179be7a "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/merge_ds_flowtext commit 7858e5b085fc3c60c88fe87b2f343969d50d9b1e"
azomics
parents:
diff changeset
243 merge_and_DS_txt(input_files, args.output_file, columns, ds_factor)