Mercurial > repos > iuc > calculate_contrast_threshold
changeset 0:7371bb087d86 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/calculate_contrast_threshold commit 6ba8e678f8cedabaf9b4759cddb81b8b3cd9ec31"
author | iuc |
---|---|
date | Wed, 11 Sep 2019 09:28:55 -0400 |
parents | |
children | |
files | calculate_contrast_threshold.py calculate_contrast_threshold.xml test-data/calcThreshold_b.txt test-data/calcThreshold_t.txt test-data/sample.tabular |
diffstat | 5 files changed, 340 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/calculate_contrast_threshold.py Wed Sep 11 09:28:55 2019 -0400 @@ -0,0 +1,186 @@ +#!/usr/bin/python + +import getopt +import math +import sys + +import numpy as np + +""" +Program to calculate the contrast thresholds for heatmap from tagPileUp CDT +""" + + +def rebin(a, new_shape): + M, N = a.shape + m, n = new_shape + if m >= M: + # repeat rows in data matrix + a = np.repeat(a, math.ceil(float(m) / M), axis=0) + + M, N = a.shape + m, n = new_shape + + row_delete_num = M % m + col_delete_num = N % n + + np.random.seed(seed=0) + + if row_delete_num > 0: + # select deleted rows with equal intervals + row_delete = np.linspace(0, M - 1, num=row_delete_num, dtype=int) + # sort the random selected deleted row ids + row_delete = np.sort(row_delete) + row_delete_plus1 = row_delete[1:-1] + \ + 1 # get deleted rows plus position + # get deleted rows plus position (top +1; end -1) + row_delete_plus1 = np.append( + np.append(row_delete[0] + 1, row_delete_plus1), row_delete[-1] - 1) + # put the info of deleted rows into the next rows by mean + a[row_delete_plus1, :] = ( + a[row_delete, :] + a[row_delete_plus1, :]) / 2 + a = np.delete(a, row_delete, axis=0) # random remove rows + + if col_delete_num > 0: + # select deleted cols with equal intervals + col_delete = np.linspace(0, N - 1, num=col_delete_num, dtype=int) + # sort the random selected deleted col ids + col_delete = np.sort(col_delete) + col_delete_plus1 = col_delete[1:-1] + \ + 1 # get deleted cols plus position + # get deleted cols plus position (top +1; end -1) + col_delete_plus1 = np.append( + np.append(col_delete[0] + 1, col_delete_plus1), col_delete[-1] - 1) + # put the info of deleted cols into the next cols by mean + a[:, col_delete_plus1] = ( + a[:, col_delete] + a[:, col_delete_plus1]) / 2 + a = np.delete(a, col_delete, axis=1) # random remove columns + + M, N = a.shape + + # compare the heatmap matrix + a_compress = a.reshape((m, int(M / m), n, int(N / n))).mean(3).mean(1) + return np.array(a_compress) + + +def load_Data(input_file, quantile, absolute, header, start_col, row_num, col_num, min_upper_lim): + data0 = [] + with open(input_file, 'r') as data: + if header == 'T': + data.readline() + + for rec in data: + tmp = [(x.strip()) for x in rec.split('\t')] + data0.append(tmp[start_col:]) + data0 = np.array(data0, dtype=float) + + if row_num == -999: + row_num = data0.shape[0] + if col_num == -999: + col_num = data0.shape[1] + + # rebin data0 + if row_num < data0.shape[0] and col_num < data0.shape[1]: + data0 = rebin(data0, (row_num, col_num)) + elif row_num < data0.shape[0]: + data0 = rebin(data0, (row_num, data0.shape[1])) + elif col_num < data0.shape[1]: + data0 = rebin(data0, (data0.shape[0], col_num)) + + # Calculate contrast limits here + rows, cols = np.nonzero(data0) + upper_lim = np.percentile(data0[rows, cols], quantile) + lower_lim = 0 + if absolute != -999: + upper_lim = absolute + + # Setting an absolute threshold to a minimum, + # in cases the 95th percentile contrast is <= user defined min_upper_lim + if quantile > 0.0: + print("\nQUANTILE: {}".format(quantile)) + print("Quantile calculated UPPER LIM: {}".format(upper_lim)) + print("LOWER LIM: {}".format(lower_lim)) + if upper_lim <= min_upper_lim: + print("setting heatmap upper_threshold to min_upper_lim\n") + upper_lim = min_upper_lim + + outfile = open('calcThreshold.txt', 'w') + outfile.write("upper_threshold:{}\nlower_threshold:{}\nrow_num:{}\ncol_num:{}\nheader:{}\nstart_col:{}".format( + upper_lim, lower_lim, row_num, col_num, header, start_col)) + print('heatmap_upper_threshold:' + str(upper_lim)) + print('heatmap_lower_threshold:' + str(lower_lim)) + outfile.flush() + outfile.close() + + +############################################################################ +# python cdt_to_heatmap.py -i test.tabular.split_line -o test.tabular.split_line.png -q 0.9 -c black -d T -s 2 -r 500 -l 300 -b test.colorsplit +############################################################################ +usage = """ +Usage: +This script calculates the contrast thresholds from Tag pile up heatmap data. Outputs a text file that contains the parameters for the heatmap script. + +python calculateThreshold.py -i <input file> -q <quantile> -m <min upper thresold after quantile calculation> -t <absolute tag threshold> -d <header T/F> -s <start column> -r <row num after compress> -l <col num after compress>' + +Example: +python calculateThreshold.py -i test.tabular.split_line -q 90 -m 5 -d T -s 2 -r 600 -l 300 +""" + +if __name__ == '__main__': + + # check for command line arguments + if len(sys.argv) < 2 or not sys.argv[1].startswith("-"): + sys.exit(usage) + # get arguments + try: + optlist, alist = getopt.getopt(sys.argv[1:], 'hi:o:q:t:c:d:s:r:l:m:') + except getopt.GetoptError: + sys.exit(usage) + + # default quantile contrast saturation = 0.9 + quantile = 90.0 + min_upper_lim = 5.0 + # absolute contrast saturation overrides quantile + absolute = -999 + + # default figure width/height is defined by matrix size + # if user-defined size is smaller than matrix, activate rebin function + row_num = -999 + col_num = -999 + + for opt in optlist: + if opt[0] == "-h": + sys.exit(usage) + elif opt[0] == "-i": + input_file = opt[1] + elif opt[0] == "-q": + quantile = float(opt[1]) + elif opt[0] == '-t': + absolute = float(opt[1]) + elif opt[0] == "-d": + header = opt[1] + elif opt[0] == "-s": + start_col = int(opt[1]) + elif opt[0] == "-r": + row_num = int(opt[1]) + elif opt[0] == "-l": + col_num = int(opt[1]) + elif opt[0] == "-m": + min_upper_lim = float(opt[1]) + + print("Header present:", header) + print("Start column:", start_col) + print("Row number (pixels):", row_num) + print("Col number (pixels):", col_num) + print("Min Upper Limit while using Quantile:", min_upper_lim) + if absolute != -999: + print("Absolute tag contrast threshold:", absolute) + else: + print("Percentile tag contrast threshold:", quantile) + + if absolute == -999 and quantile <= 0: + print("\nInvalid threshold!!!") + sys.exit(usage) + + load_Data(input_file, quantile, absolute, + header, start_col, row_num, col_num, min_upper_lim)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/calculate_contrast_threshold.xml Wed Sep 11 09:28:55 2019 -0400 @@ -0,0 +1,122 @@ +<tool id="calculate_contrast_threshold" name="Calculate Contrast threshold" version="1.0.0"> + <description>from tag pileup CDT + </description> + <requirements> + <requirement type="package" version="1.15.4">numpy</requirement> + <requirement type="package" version="3.7.4">python</requirement> + </requirements> + <command detect_errors="exit_code"> + <![CDATA[ + python '$__tool_directory__/calculate_contrast_threshold.py' -i '$input_file' + + ## Setting the quantile values properly. + #if str($quantile_type.quantile_type_selector) == "b_option": + -q '${quantile_type.quantile}' + -m '${quantile_type.min_contrast}' + #else if str($quantile_type.quantile_type_selector) == "t_option": + -t '${quantile_type.quantile2}' + #end if + + -d '$header' -s '$start_col' -r '$row_num' -l '$col_num' + ]]> + </command> + + <inputs> + <param name="input_file" argument="-i" type="data" format="txt" label="Pileup Data Matrix "/> + <param name="header" argument="-d" type="boolean" truevalue="T" falsevalue="F" checked="true" label="Does the input file have a Header ?" help="Standard CDT file have headers."/> + <param name="start_col" argument="-s" type="integer" value="2" label="Valid Data Start Column" help="Valid data values start from this column"/> + <param name="col_num" argument="-l" type="integer" value="300" label="Plot Width in pixels" help="Equal to the heatmap width you plan to create"/> + <param name="row_num" argument="-r" type="integer" value="600" label="Plot Height in pixels" help="Equal to the heatmap height you plan to create"/> + + + <conditional name="quantile_type"> + <param name="quantile_type_selector" type="select" display="radio" label="Choose the Contrast paramter"> + <option value="b_option" selected="true">Calculate thresholds from data (-b)</option> + <option value="t_option">Enforce absolute thresholds (-t) + </option> + </param> + + <when value="b_option"> + <param name="quantile" argument="-b" type="float" min="0" max="100" value="95" label="Quantile" help="Enter your quantile value above."/> + <param name="min_contrast" type="float" min="0" value="0" label="Minimum upper limit after Quantile calculation" help="This value will be used as the upper limit if the calculated quantile is below this value" argument="-m"/> + </when> + + <when value="t_option"> + <param name="quantile2" argument="-t" type="float" min="0" value="0.0" label="Absolute tag threshold" help="Enter your custom tag threshold value above. takes real values (>= 0)"/> + </when> + </conditional> + + </inputs> + + <outputs> + <data name="threshold_output" format="txt" from_work_dir="calcThreshold.txt"/> + </outputs> + + <tests> + <test> + <param name="input_file" value="sample.tabular"/> + <param name="header" value="T"/> + <param name="start_col" value="2"/> + <param name="col_num" value="300"/> + <param name="row_num" value="600"/> + <conditional name="quantile_type"> + <param name="quantile_type_selector" value="b_option"/> + <param name="quantile" value="95"/> + <param name="min_contrast" value="5"/> + </conditional> + <output name="threshold_output" file="calcThreshold_b.txt" /> + </test> + <test> + <param name="input_file" value="sample.tabular"/> + <param name="header" value="T"/> + <param name="start_col" value="2"/> + <param name="col_num" value="300"/> + <param name="row_num" value="600"/> + <conditional name="quantile_type"> + <param name="quantile_type_selector" value="t_option"/> + <param name="quantile2" value="10.0"/> + </conditional> + <output name="threshold_output" file="calcThreshold_t.txt" /> + </test> + </tests> + + <help> + <![CDATA[ + +**What it does** + +---- + +Calculates a contrast threshold from the CDT file generated by ``tag_pileup_frequency``. The calculated values are then used to set a uniform contrast for all the heatmaps generated downstream. + +**Choosing Plot Width & Height** + +If your trying to create heatmaps with dimensions (Width x Height)px = (300 x 600)px. Use Plot width = 300, height = 600. This not only helps in generating unbiased heatmaps but also helps in reusing the calculated contrasts for heatmaps of same dimensions. + +**Understanding contrast parameters** + +`-b` + + Calculates a percentile value (for example 95th percentile) from the input CDT data matrix to report upper-limit and lower-limit for setting heatmap contrasts. + + Also can set a minimum upper-limit to fall-back, incase the calculated percentile is <= specified minimum. + +`-t` + + Takes the absolute value entered and reports an upper_limit and lower_limit. + + ]]> + </help> + + <citations> + <citation type="bibtex"> + @unpublished{None, + author = {Kuntala, Prashant Kumar and Lai, William KM }, + title = {None}, + year = {None}, + eprint = {None}, + url = {http://www.pughlab.psu.edu/} + }</citation> + </citations> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/calcThreshold_b.txt Wed Sep 11 09:28:55 2019 -0400 @@ -0,0 +1,6 @@ +upper_threshold:5.0 +lower_threshold:0 +row_num:600 +col_num:300 +header:T +start_col:2 \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/calcThreshold_t.txt Wed Sep 11 09:28:55 2019 -0400 @@ -0,0 +1,6 @@ +upper_threshold:10.0 +lower_threshold:0 +row_num:600 +col_num:300 +header:T +start_col:2 \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sample.tabular Wed Sep 11 09:28:55 2019 -0400 @@ -0,0 +1,20 @@ +YORF NAME 0 1 2 3 4 5 6 7 +10001 10001 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 +10002 10002 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +10003 10003 0.0 0.0 0.0 1.0 0.0 1.0 0.0 0.0 +10004 10004 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 +10005 10005 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 +10006 10006 1.0 2.0 2.0 0.0 1.0 0.0 0.0 0.0 +10007 10007 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +10008 10008 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 +10009 10009 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +10011 10011 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 +10012 10012 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +10013 10013 0.0 0.0 1.0 1.0 0.0 1.0 0.0 0.0 +10014 10014 0.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0 +10015 10015 1.0 1.0 1.0 0.0 1.0 1.0 1.0 0.0 +10016 10016 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 +10017 10017 0.0 0.0 0.0 0.0 0.0 1.0 0.0 1.0 +10018 10018 0.0 0.0 0.0 0.0 1.0 1.0 0.0 0.0 +10019 10019 0.0 1.0 0.0 0.0 0.0 1.0 0.0 0.0 +10020 10020 0.0 2.0 1.0 0.0 1.0 0.0 0.0 0.0