Previous changeset 2:8bf642eee8eb (2016-08-11) Next changeset 4:7d2f2d1a23ee (2016-08-11) |
Commit message:
Deleted selected files |
removed:
consol_fit.py consol_fit.xml |
b |
diff -r 8bf642eee8eb -r 98ec522f4e95 consol_fit.py --- a/consol_fit.py Thu Aug 11 18:07:53 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,300 +0,0 @@\n-# Consol_fit! It\'s a script & it\'ll consolidate your fitness values if you got them from a looping trimming pipeline instead of the standard split-by-transposon pipeline. That\'s all.\n-\n-import math\n-import csv\n-\n-\n-\n-\n-\n-\n-\n-\n-\n-\n-##### ARGUMENTS #####\n-\n-def print_usage():\n-\tprint "\\n" + "You are missing one or more required flags. A complete list of flags accepted by calc_fitness is as follows:" + "\\n\\n"\n-\tprint "\\033[1m" + "Required" + "\\033[0m" + "\\n"\n-\tprint "-i" + "\\t\\t" + "The calc_fit file to be consolidated" + "\\n"\n-\tprint "-out" + "\\t\\t" + "Name of a file to enter the .csv output." + "\\n"\n-\tprint "-out2" + "\\t\\t" + "Name of a file to put the percent blank score in (used in aggregate)." + "\\n"\n-\tprint "-calctxt" + "\\t\\t" + "The txt file output from calc_fit" + "\\n"\n-\tprint "-normalize" + "\\t" + "A file that contains a list of genes that should have a fitness of 1" + "\\n"\n-\tprint "\\n"\n-\tprint "\\033[1m" + "Optional" + "\\033[0m" + "\\n"\n-\tprint "-cutoff" + "\\t\\t" + "Discard any positions where the average of counted transcripts at time 0 and time 1 is below this number (default 0)" + "\\n"\n-\tprint "-cutoff2" + "\\t\\t" + "Discard any positions within the normalization genes where the average of counted transcripts at time 0 and time 1 is below this number (default 0)" + "\\n"\n-\tprint "-wig" + "\\t\\t" + "Create a wiggle file for viewing in a genome browser. Provide a filename." + "\\n"\n-\tprint "-maxweight" + "\\t" + "The maximum weight a transposon gene can have in normalization calculations" + "\\n"\n-\tprint "-multiply" + "\\t" + "Multiply all fitness scores by a certain value (e.g., the fitness of a knockout). You should normalize the data." + "\\n"\n-\tprint "\\n"\n-\n-import argparse \n-parser = argparse.ArgumentParser()\n-parser.add_argument("-calctxt", action="store", dest="calctxt")\n-parser.add_argument("-normalize", action="store", dest="normalize")\n-parser.add_argument("-i", action="store", dest="input")\n-parser.add_argument("-out", action="store", dest="outfile")\n-parser.add_argument("-out2", action="store", dest="outfile2")\n-parser.add_argument("-cutoff", action="store", dest="cutoff")\n-parser.add_argument("-cutoff2", action="store", dest="cutoff2")\n-parser.add_argument("-wig", action="store", dest="wig")\n-parser.add_argument("-maxweight", action="store", dest="max_weight")\n-parser.add_argument("-multiply", action="store", dest="multiply")\n-arguments = parser.parse_args()\n-\n-if (not arguments.input or not arguments.outfile or not arguments.calctxt):\n-\tprint_usage() \n-\tquit()\n-\n-if (not arguments.max_weight):\n-\targuments.max_weight = 75\n-\n-if (not arguments.cutoff):\n-\targuments.cutoff = 0\n-\t\n-# Cutoff2 only has an effect if it\'s larger than cutoff, since the normalization step references a list of insertions already affected by cutoff.\n-\t\n-if (not arguments.cutoff2):\n-\targuments.cutoff2 = 10\n-\n-#Gets total & refname from calc_fit outfile2\n-\n-with open(arguments.calctxt) as file:\n-\tcalctxt = file.readlines()\n-total = float(calctxt[1].split()[1])\n-refname = calctxt[2].split()[1]\n-\n-\n-\n-\n-\n-\n-\n-\n-\n-\t\n-##### CONSOLIDATING THE CALC_FIT FILE #####\n-\n-with open(arguments.input) as file:\n-\tinput = file.readlines()\n-results = [["position", "strand", "count_1", "count_2", "ratio", "mt_freq_t1", "mt_freq_t2", "pop_freq_t1", "pop_freq_t2", "gene", "D", "W", "nW"]]\n-i = 1\n-d = float(input[i].split(",")[10])\n-while i < len(input):\n-\tposition = float(input[i].split(",")[0])\n-\tstrands = input[i].split(",")[1]\n-\tc1 = float(input[i].split(",")[2])\n-\tc2 = float(input[i].split(",")[3])\n-\tgene = input[i].split(",")[9]\n-\twhile i + 1 < len(input) and float(input[i+1].split(",")[0]) - position <= 4:\n-\t\tif i + 1 < len(input):\n-\t\t\ti += 1\n-\t\t\tc1 += float(input[i].split(",")[2])\n-\t\t\tc2 += float(input[i].split(",")[3])\n-\t\t\tstrands = input[i].split(",")[1]\n-\t\t\tif strands[0] == \'b\':\n-\t\t\t\tnew_strands = \'b/\'\n-\t\t\telif strands[0] == \'+\':\n-\t\t\t\tif input[i].split(",")[1][0] == \'b\':\n-\t\t\t\t\tnew_strands = \'b/\'\n-\t\t\t\telif input[i].split(",")[1][0] == \'+\''..b'th in vivo experiments. \n-# For example, when studying a nasal infection in a mouse model, what bacteria "sticks" and is able to survive and what bacteria is swallowed and killed or otherwise flushed out tends to be a matter of chance not fitness; all mutants with an insertion in a specific transposon gene could be flushed out by chance!\n-\n-\t\t\t\tif score == 0:\n-\t\t\t\t\tblank_ws += 1\t\n-\t\t\t\tsum += score\n-\t\t\t\tcount += 1\n-\t\t\t\tweights.append(avg)\n-\t\t\t\tscores.append(score)\n-\t\t\t\t\n-\t\t\t\tprint str(list[9]) + " " + str(score) + " " + str(c1)\n-\n-# Counts and removes all "blank" fitness values of normalization genes - those that = 0 - because they most likely don\'t really have a fitness value of 0, and you just happened to not get any reads from that location at t2. \n- \n-\tblank_count = 0\n-\toriginal_count = len(scores)\n-\ti = 0\n-\twhile i < original_count:\n-\t\tw_value = scores[i]\n-\t\tif w_value == 0:\n-\t\t\tblank_count += 1\n-\t\t\tweights.pop[i]\n-\t\t\tscores.pop[i]\n-\t\t\ti-=1\n-\t\ti += 1\n-\n-# If no normalization genes can pass the cutoff, normalization cannot occur, so this ends the script advises the user to try again and lower cutoff and/or cutoff2.\n-\t\n-\tif len(scores) == 0:\n-\t\tprint \'ERROR: The normalization genes do not have enough reads to pass cutoff and/or cutoff2; please lower one or both of those arguments.\' + "\\n"\n-\t\tquit()\n-\t\n-\tpc_blank_normals = float(blank_count) / float(original_count)\n-\tprint "# blank out of " + str(original_count) + ": " + str(pc_blank_normals) + "\\n"\n-\twith open(arguments.outfile2, "w") as f:\n-\t\tf.write("blanks: " + str(pc_blank_normals) + "\\n" + "total: " + str(total) + "\\n" + "refname: " + refname)\n- \n-\taverage = sum / count\n-\ti = 0\n-\tweighted_sum = 0\n-\tweight_sum = 0\n-\twhile i < len(weights):\n-\t\tweighted_sum += weights[i]*scores[i]\n-\t\tweight_sum += weights[i]\n-\t\ti += 1\n-\tweighted_average = weighted_sum/weight_sum\n- \n-\tprint "Normalization step:" + "\\n"\n-\tprint "Regular average: " + str(average) + "\\n"\n-\tprint "Weighted Average: " + str(weighted_average) + "\\n"\n-\tprint "Total Insertions: " + str(count) + "\\n"\n- \n-\told_ws = 0\n-\tnew_ws = 0\n-\twcount = 0\n-\tfor list in results:\n-\t\tif list[11] == \'W\':\n-\t\t\tcontinue\n-\t\tnew_w = float(list[11])/weighted_average\n-\t\t\n-# Sometimes you want to multiply all the fitness values by a constant; this does that.\n-# For example you might multiply all the values by a constant for a genetic interaction screen - where Tn-Seq is performed as usual except there\'s one background knockout all the mutants share.\n-\t\t\n-\t\tif arguments.multiply:\n-\t\t\tnew_w *= float(arguments.multiply)\n-\t\t\n-\t\tif float(list[11]) > 0:\n-\t\t\told_ws += float(list[11])\n-\t\t\tnew_ws += new_w\n-\t\t\twcount += 1\n-\n-\t\tlist[12] = new_w\n-\t\t\n-\t\tif (arguments.wig):\n-\t\t\twigstring += str(list[0]) + " " + str(new_w) + "\\n"\n-\t\t\t\n-\told_w_mean = old_ws / wcount\n-\tnew_w_mean = new_ws / wcount\n-\tprint "Old W Average: " + str(old_w_mean) + "\\n"\n-\tprint "New W Average: " + str(new_w_mean) + "\\n"\n-\n-with open(arguments.outfile, "wb") as csvfile:\n- writer = csv.writer(csvfile)\n- writer.writerows(results)\n- \t\n-if (arguments.wig):\n-\tif (arguments.normalize):\n-\t\twith open(arguments.wig, "wb") as wigfile:\n-\t\t\twigfile.write(wigstring)\n-\telse:\n-\t\tfor list in results:\n-\t\t\twigstring += str(list[0]) + " " + str(list[11]) + "\\n"\n-\t\twith open(arguments.wig, "wb") as wigfile:\n-\t\t\t\twigfile.write(wigstring)\n-\t\t\t\t\n-\t\t\t\t\n-# ___ ___ ___ ___ ___ ___ ___ ___ \n-# /\\__\\ /\\ \\ /\\__\\ /\\__\\ /\\ \\ /\\ \\ /\\ \\ /\\__\\ \n-# /:/ _/_ /::\\ \\ |::L__L /::L_L_ /::\\ \\ /::\\ \\ /::\\ \\ |::L__L \n-# /::-"\\__\\ /::\\:\\__\\ |:::\\__\\ /:/L:\\__\\ /:/\\:\\__\\ /:/\\:\\__\\ /:/\\:\\__\\ |:::\\__\\\n-# \\;:;-",-" \\/\\::/ / /:;;/__/ \\/_/:/ / \\:\\ \\/__/ \\:\\ \\/__/ \\:\\/:/ / /:;;/__/\n-# |:| | /:/ / \\/__/ /:/ / \\:\\__\\ \\:\\__\\ \\::/ / \\/__/ \n-# \\|__| \\/__/ \\/__/ \\/__/ \\/__/ \\/__/ \n\\ No newline at end of file\n' |
b |
diff -r 8bf642eee8eb -r 98ec522f4e95 consol_fit.xml --- a/consol_fit.xml Thu Aug 11 18:07:53 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,81 +0,0 @@ -<tool id="consolidate_fitnesses" name="Consolidate Fitnesses"> - <description>of transposon insertion locations</description> - <requirements> - <requirement type="package" version="1.64">biopython</requirement> - </requirements> - <command interpreter="python"> - consol_fit.py - -i $input - -calctxt $calctxt - -out $output - -out2 $output2 - -maxweight $maxweight - -cutoff $cutoff - -cutoff2 $cutoff2 - -wig $output3 - #if $normalization.calculations == "yes": - -normalize $normalization.genes - #end if - #if $multiply.choice == "yes": - -multiply $multiply.factor - #end if - #if $reads.downstream == "yes": - -d 1 - #end if - </command> - <inputs> - <param name="input" type="data" label="Fitness file"/> - <param name="calctxt" type="data" label="the txt file output from calc_fitness"/> - <conditional name="normalization"> - <param name="calculations" type="select" label="Normalize fitness calculations?"> - <option value="no">No</option> - <option value="yes">Yes</option> - </param> - <when value="no"> - <!-- do nothing --> - </when> - <when value="yes"> - <param name="genes" type="data" label="Genes to normalize by" /> - </when> - </conditional> - <param name="cutoff" type="float" value="0.0" label="Cutoff"/> - <param name="cutoff2" type="float" value="0.0" label="Cutoff2"/> - <param name="maxweight" type="float" value="75" label="Maximum weight of a transposon gene in normalization calculations"/> - <conditional name="multiply"> - <param name="choice" type="select" label="Multiply fitness scores by a certain value?"> - <option value="no">No</option> - <option value="yes">Yes</option> - </param> - <when value="no"> - <!-- do nothing --> - </when> - <when value="yes"> - <param name="factor" type="float" value="0.0" label="Multiply by" /> - </when> - </conditional> - <conditional name="reads"> - <param name="downstream" type="select" label="Are all reads downstream of the transposon?"> - <option value="no">No</option> - <option value="yes">Yes</option> - </param> - <when value="no"> - <!-- do nothing --> - </when> - <when value="yes"> - <!-- do nothing --> - </when> - </conditional> - </inputs> - <outputs> - <data format="csv" name="output" /> - <data format="txt" name="output2" /> - <data format="wig" name="output3" /> - </outputs> - <help> - -**What it does** - -This tool consolidates the fitness values of transposon insertion mutations generated by Tn-Seq, and is mostly useful for consolidating values produced by a 4-cycle looping trimming pipeline. - -</help> -</tool> \ No newline at end of file |