Repository 'microsatellite_ngs'
hg clone https://toolshed.g2.bx.psu.edu/repos/arkarachai-fungtammasan/microsatellite_ngs

Changeset 7:3c05abb4452e (2015-04-22)
Previous changeset 6:dccd7a3ee717 (2015-04-22)
Commit message:
add missing files
added:
Galaxy-Workflow-Estimate_minimum_informative_read_depth.ga
Galaxy-Workflow-TR_genome_profiling.ga
Galaxy-Workflow-microsatellite_profiling.ga
README.md
commandline_sample_STR-FM_estimate_mininum_informative_Read_Depth
commandline_sample_STR-FM_reference_profiling
commandline_sample_STR-FM_shortread_profiling
test-data/C_sample_fastq
test-data/C_sample_snoope
test-data/PCRinclude.allrate.bymajorallele
test-data/combineprob_out.txt
test-data/microsatcompat_in.txt
test-data/microsatcompat_out.txt
test-data/microsatellite_flanking_L.fastq
test-data/microsatellite_flanking_R.fastq
test-data/microsatpurity_in.txt
test-data/microsatpurity_out.txt
test-data/nice1tab.py
test-data/probvalueforhetero_in.txt
test-data/probvalueforhetero_out.txt
test-data/profilegenerator_in.txt
test-data/profilegenerator_out.txt
test-data/readdepth2seqdepth.out
test-data/samplePESAM_2_profile_C.txt
test-data/sampleTRgenotypingcorrection
test-data/sampleTRprofile_C.txt
test-data/samplefq.snoope
test-data/samplefq.snoope.new
test-data/sampleprofilegenerator_in
test-data/sampleprofilegenerator_out
test-data/samplesortedPESAM_C.sam
test-data/shifted.2bit
b
diff -r dccd7a3ee717 -r 3c05abb4452e Galaxy-Workflow-Estimate_minimum_informative_read_depth.ga
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Galaxy-Workflow-Estimate_minimum_informative_read_depth.ga Wed Apr 22 12:22:50 2015 -0400
[
b'@@ -0,0 +1,342 @@\n+{\n+    "a_galaxy_workflow": "true", \n+    "annotation": "", \n+    "format-version": "0.1", \n+    "name": "Estimate minimum informative read depth", \n+    "steps": {\n+        "0": {\n+            "annotation": "See sample in  https://usegalaxy.org/u/guru%40psu.edu/h/error-rates-files", \n+            "id": 0, \n+            "input_connections": {}, \n+            "inputs": [\n+                {\n+                    "description": "See sample in  https://usegalaxy.org/u/guru%40psu.edu/h/error-rates-files", \n+                    "name": "TR error rate"\n+                }\n+            ], \n+            "name": "Input dataset", \n+            "outputs": [], \n+            "position": {\n+                "left": 220, \n+                "top": 737\n+            }, \n+            "tool_errors": null, \n+            "tool_id": null, \n+            "tool_state": "{\\"name\\": \\"TR error rate\\"}", \n+            "tool_version": null, \n+            "type": "data_input", \n+            "user_outputs": []\n+        }, \n+        "1": {\n+            "annotation": "replace \'A\' with motif of interest", \n+            "id": 1, \n+            "input_connections": {\n+                "input": {\n+                    "id": 0, \n+                    "output_name": "output"\n+                }\n+            }, \n+            "inputs": [], \n+            "name": "Filter", \n+            "outputs": [\n+                {\n+                    "name": "out_file1", \n+                    "type": "input"\n+                }\n+            ], \n+            "position": {\n+                "left": 385, \n+                "top": 260\n+            }, \n+            "post_job_actions": {\n+                "HideDatasetActionout_file1": {\n+                    "action_arguments": {}, \n+                    "action_type": "HideDatasetAction", \n+                    "output_name": "out_file1"\n+                }\n+            }, \n+            "tool_errors": null, \n+            "tool_id": "Filter1", \n+            "tool_state": "{\\"input\\": \\"null\\", \\"__rerun_remap_job_id__\\": null, \\"header_lines\\": \\"\\\\\\"0\\\\\\"\\", \\"cond\\": \\"\\\\\\"c4==\'A\'\\\\\\"\\", \\"__page__\\": 0}", \n+            "tool_version": "1.1.0", \n+            "type": "tool", \n+            "user_outputs": []\n+        }, \n+        "2": {\n+            "annotation": "", \n+            "id": 2, \n+            "input_connections": {\n+                "error_profile": {\n+                    "id": 1, \n+                    "output_name": "out_file1"\n+                }\n+            }, \n+            "inputs": [], \n+            "name": "Generate all possible combination of read profile", \n+            "outputs": [\n+                {\n+                    "name": "output", \n+                    "type": "tabular"\n+                }\n+            ], \n+            "position": {\n+                "left": 653.5, \n+                "top": 203\n+            }, \n+            "post_job_actions": {}, \n+            "tool_errors": null, \n+            "tool_id": "toolshed.g2.bx.psu.edu/repos/arkarachai-fungtammasan/microsatellite_ngs/Profilegenerator/2.0.0", \n+            "tool_state": "{\\"error_profile\\": \\"null\\", \\"MOTIF\\": \\"\\\\\\"A\\\\\\"\\", \\"__page__\\": 0, \\"__rerun_remap_job_id__\\": null, \\"minprob\\": \\"\\\\\\"1e-08\\\\\\"\\", \\"Maxdepth\\": \\"\\\\\\"30\\\\\\"\\"}", \n+            "tool_version": "2.0.0", \n+            "type": "tool", \n+            "user_outputs": []\n+        }, \n+        "3": {\n+            "annotation": "", \n+            "id": 3, \n+            "input_connections": {\n+                "input": {\n+                    "id": 2, \n+                    "output_name": "output"\n+                }\n+            }, \n+            "inputs": [], \n+            "name": "Unique", \n+            "outputs": [\n+                {\n+                    "name": "outfile", \n+                    "type": "input"\n+                }\n+            ], \n+            "position": {\n+                "left": 676, \n+                "top": 322\n+            }, \n+            "post_job_actions": {\n+       '..b'        "microsat_error_profile": {\n+                    "id": 0, \n+                    "output_name": "output"\n+                }, \n+                "microsat_raw": {\n+                    "id": 6, \n+                    "output_name": "out_file1"\n+                }\n+            }, \n+            "inputs": [], \n+            "name": "Evaluate the probability of the allele combination to generate read profile", \n+            "outputs": [\n+                {\n+                    "name": "microsat_corrected", \n+                    "type": "tabular"\n+                }\n+            ], \n+            "position": {\n+                "left": 708.5, \n+                "top": 913\n+            }, \n+            "post_job_actions": {\n+                "HideDatasetActionmicrosat_corrected": {\n+                    "action_arguments": {}, \n+                    "action_type": "HideDatasetAction", \n+                    "output_name": "microsat_corrected"\n+                }\n+            }, \n+            "tool_errors": null, \n+            "tool_id": "toolshed.g2.bx.psu.edu/repos/arkarachai-fungtammasan/microsatellite_ngs/heteroprob/2.0.0", \n+            "tool_state": "{\\"microsat_raw\\": \\"null\\", \\"__page__\\": 0, \\"__rerun_remap_job_id__\\": null, \\"microsat_error_profile\\": \\"null\\", \\"expectedminorallele\\": \\"\\\\\\"0.5\\\\\\"\\"}", \n+            "tool_version": "2.0.0", \n+            "type": "tool", \n+            "user_outputs": []\n+        }, \n+        "8": {\n+            "annotation": "", \n+            "id": 8, \n+            "input_connections": {\n+                "input": {\n+                    "id": 7, \n+                    "output_name": "microsat_corrected"\n+                }\n+            }, \n+            "inputs": [], \n+            "name": "Sort", \n+            "outputs": [\n+                {\n+                    "name": "out_file1", \n+                    "type": "input"\n+                }\n+            ], \n+            "position": {\n+                "left": 761, \n+                "top": 1133\n+            }, \n+            "post_job_actions": {\n+                "HideDatasetActionout_file1": {\n+                    "action_arguments": {}, \n+                    "action_type": "HideDatasetAction", \n+                    "output_name": "out_file1"\n+                }\n+            }, \n+            "tool_errors": null, \n+            "tool_id": "sort1", \n+            "tool_state": "{\\"__page__\\": 0, \\"style\\": \\"\\\\\\"num\\\\\\"\\", \\"column\\": \\"{\\\\\\"__class__\\\\\\": \\\\\\"UnvalidatedValue\\\\\\", \\\\\\"value\\\\\\": \\\\\\"12\\\\\\"}\\", \\"__rerun_remap_job_id__\\": null, \\"order\\": \\"\\\\\\"ASC\\\\\\"\\", \\"input\\": \\"null\\", \\"column_set\\": \\"[{\\\\\\"other_order\\\\\\": \\\\\\"ASC\\\\\\", \\\\\\"__index__\\\\\\": 0, \\\\\\"other_column\\\\\\": {\\\\\\"__class__\\\\\\": \\\\\\"UnvalidatedValue\\\\\\", \\\\\\"value\\\\\\": \\\\\\"6\\\\\\"}, \\\\\\"other_style\\\\\\": \\\\\\"num\\\\\\"}]\\"}", \n+            "tool_version": "1.0.3", \n+            "type": "tool", \n+            "user_outputs": []\n+        }, \n+        "9": {\n+            "annotation": "", \n+            "id": 9, \n+            "input_connections": {\n+                "input": {\n+                    "id": 8, \n+                    "output_name": "out_file1"\n+                }\n+            }, \n+            "inputs": [], \n+            "name": "Combine probability to generate read profile ", \n+            "outputs": [\n+                {\n+                    "name": "output", \n+                    "type": "tabular"\n+                }\n+            ], \n+            "position": {\n+                "left": 722, \n+                "top": 1264\n+            }, \n+            "post_job_actions": {}, \n+            "tool_errors": null, \n+            "tool_id": "toolshed.g2.bx.psu.edu/repos/arkarachai-fungtammasan/microsatellite_ngs/combineproballelecom/2.0.0", \n+            "tool_state": "{\\"input\\": \\"null\\", \\"__rerun_remap_job_id__\\": null, \\"__page__\\": 0}", \n+            "tool_version": "2.0.0", \n+            "type": "tool", \n+            "user_outputs": []\n+        }\n+    }\n+}\n\\ No newline at end of file\n'
b
diff -r dccd7a3ee717 -r 3c05abb4452e Galaxy-Workflow-TR_genome_profiling.ga
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Galaxy-Workflow-TR_genome_profiling.ga Wed Apr 22 12:22:50 2015 -0400
[
@@ -0,0 +1,191 @@
+{
+    "a_galaxy_workflow": "true", 
+    "annotation": "", 
+    "format-version": "0.1", 
+    "name": "TR genome profiling", 
+    "steps": {
+        "0": {
+            "annotation": "", 
+            "id": 0, 
+            "input_connections": {}, 
+            "inputs": [
+                {
+                    "description": "", 
+                    "name": "fasta reference file"
+                }
+            ], 
+            "name": "Input dataset", 
+            "outputs": [], 
+            "position": {
+                "left": 200, 
+                "top": 250
+            }, 
+            "tool_errors": null, 
+            "tool_id": null, 
+            "tool_state": "{\"name\": \"fasta reference file\"}", 
+            "tool_version": null, 
+            "type": "data_input", 
+            "user_outputs": []
+        }, 
+        "1": {
+            "annotation": "", 
+            "id": 1, 
+            "input_connections": {
+                "filePath": {
+                    "id": 0, 
+                    "output_name": "output"
+                }
+            }, 
+            "inputs": [], 
+            "name": "Microsatellite detection", 
+            "outputs": [
+                {
+                    "name": "stdout", 
+                    "type": "tabular"
+                }
+            ], 
+            "position": {
+                "left": 430, 
+                "top": 250
+            }, 
+            "post_job_actions": {
+                "ChangeDatatypeActionstdout": {
+                    "action_arguments": {
+                        "newtype": "tabular"
+                    }, 
+                    "action_type": "ChangeDatatypeAction", 
+                    "output_name": "stdout"
+                }, 
+                "HideDatasetActionstdout": {
+                    "action_arguments": {}, 
+                    "action_type": "HideDatasetAction", 
+                    "output_name": "stdout"
+                }
+            }, 
+            "tool_errors": null, 
+            "tool_id": "toolshed.g2.bx.psu.edu/repos/arkarachai-fungtammasan/microsatellite_ngs/microsatellite/1.0.0", 
+            "tool_state": "{\"__page__\": 0, \"flankSetting\": \"{\\\"noflankdisplay\\\": \\\"False\\\", \\\"flankdisplay\\\": \\\"0\\\", \\\"__current_case__\\\": 0}\", \"filePath\": \"null\", \"minlength\": \"\\\"5\\\"\", \"hammingThreshold\": \"\\\"0\\\"\", \"period\": \"\\\"1\\\"\", \"surfix\": \"\\\"0\\\"\", \"prefix\": \"\\\"0\\\"\", \"__rerun_remap_job_id__\": null, \"inputFileSource\": \"{\\\"inputFileType\\\": \\\"fasta\\\", \\\"__current_case__\\\": 2}\", \"partialmotifs\": \"\\\"True\\\"\", \"multipleruns\": \"\\\"True\\\"\"}", 
+            "tool_version": "1.0.0", 
+            "type": "tool", 
+            "user_outputs": []
+        }, 
+        "2": {
+            "annotation": "", 
+            "id": 2, 
+            "input_connections": {
+                "input": {
+                    "id": 1, 
+                    "output_name": "stdout"
+                }
+            }, 
+            "inputs": [], 
+            "name": "Compute", 
+            "outputs": [
+                {
+                    "name": "out_file1", 
+                    "type": "input"
+                }
+            ], 
+            "position": {
+                "left": 688, 
+                "top": 250
+            }, 
+            "post_job_actions": {
+                "HideDatasetActionout_file1": {
+                    "action_arguments": {}, 
+                    "action_type": "HideDatasetAction", 
+                    "output_name": "out_file1"
+                }
+            }, 
+            "tool_errors": null, 
+            "tool_id": "Add_a_column1", 
+            "tool_state": "{\"input\": \"null\", \"__rerun_remap_job_id__\": null, \"cond\": \"\\\"int(c1+c2)\\\"\", \"round\": \"\\\"no\\\"\", \"__page__\": 0}", 
+            "tool_version": "1.1.0", 
+            "type": "tool", 
+            "user_outputs": []
+        }, 
+        "3": {
+            "annotation": "", 
+            "id": 3, 
+            "input_connections": {
+                "input": {
+                    "id": 2, 
+                    "output_name": "out_file1"
+                }
+            }, 
+            "inputs": [], 
+            "name": "Compute", 
+            "outputs": [
+                {
+                    "name": "out_file1", 
+                    "type": "input"
+                }
+            ], 
+            "position": {
+                "left": 916, 
+                "top": 250
+            }, 
+            "post_job_actions": {
+                "HideDatasetActionout_file1": {
+                    "action_arguments": {}, 
+                    "action_type": "HideDatasetAction", 
+                    "output_name": "out_file1"
+                }
+            }, 
+            "tool_errors": null, 
+            "tool_id": "Add_a_column1", 
+            "tool_state": "{\"input\": \"null\", \"__rerun_remap_job_id__\": null, \"cond\": \"\\\"len(c4)\\\"\", \"round\": \"\\\"no\\\"\", \"__page__\": 0}", 
+            "tool_version": "1.1.0", 
+            "type": "tool", 
+            "user_outputs": []
+        }, 
+        "4": {
+            "annotation": "", 
+            "id": 4, 
+            "input_connections": {
+                "input": {
+                    "id": 3, 
+                    "output_name": "out_file1"
+                }
+            }, 
+            "inputs": [], 
+            "name": "Cut", 
+            "outputs": [
+                {
+                    "name": "out_file1", 
+                    "type": "tabular"
+                }
+            ], 
+            "position": {
+                "left": 1144, 
+                "top": 250
+            }, 
+            "post_job_actions": {
+                "ChangeDatatypeActionout_file1": {
+                    "action_arguments": {
+                        "newtype": "interval"
+                    }, 
+                    "action_type": "ChangeDatatypeAction", 
+                    "output_name": "out_file1"
+                }, 
+                "ColumnSetActionout_file1": {
+                    "action_arguments": {
+                        "chromCol": "1", 
+                        "endCol": "3", 
+                        "nameCol": "", 
+                        "startCol": "2", 
+                        "strandCol": ""
+                    }, 
+                    "action_type": "ColumnSetAction", 
+                    "output_name": "out_file1"
+                }
+            }, 
+            "tool_errors": null, 
+            "tool_id": "Cut1", 
+            "tool_state": "{\"columnList\": \"\\\"c6,c2,c9,c4,c1,c10\\\"\", \"input\": \"null\", \"delimiter\": \"\\\"T\\\"\", \"__rerun_remap_job_id__\": null, \"__page__\": 0}", 
+            "tool_version": "1.0.2", 
+            "type": "tool", 
+            "user_outputs": []
+        }
+    }
+}
\ No newline at end of file
b
diff -r dccd7a3ee717 -r 3c05abb4452e Galaxy-Workflow-microsatellite_profiling.ga
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Galaxy-Workflow-microsatellite_profiling.ga Wed Apr 22 12:22:50 2015 -0400
[
b'@@ -0,0 +1,764 @@\n+{\n+    "a_galaxy_workflow": "true", \n+    "annotation": "Profile length of microsatellites or short tandem repeats from short read data", \n+    "format-version": "0.1", \n+    "name": "microsatellite_profiling", \n+    "steps": {\n+        "0": {\n+            "annotation": "input raw read that you want to detect Tandem reepats (TRs)/microsatellites", \n+            "id": 0, \n+            "input_connections": {}, \n+            "inputs": [\n+                {\n+                    "description": "input raw read that you want to detect Tandem reepats (TRs)/microsatellites", \n+                    "name": "single end fastq"\n+                }\n+            ], \n+            "name": "Input dataset", \n+            "outputs": [], \n+            "position": {\n+                "left": 200, \n+                "top": 274.765625\n+            }, \n+            "tool_errors": null, \n+            "tool_id": null, \n+            "tool_state": "{\\"name\\": \\"single end fastq\\"}", \n+            "tool_version": null, \n+            "type": "data_input", \n+            "user_outputs": []\n+        }, \n+        "1": {\n+            "annotation": "need to be prepared by user using TR genome profiling", \n+            "id": 1, \n+            "input_connections": {}, \n+            "inputs": [\n+                {\n+                    "description": "need to be prepared by user using TR genome profiling", \n+                    "name": "TR in genome"\n+                }\n+            ], \n+            "name": "Input dataset", \n+            "outputs": [], \n+            "position": {\n+                "left": 177, \n+                "top": 412.953125\n+            }, \n+            "tool_errors": null, \n+            "tool_id": null, \n+            "tool_state": "{\\"name\\": \\"TR in genome\\"}", \n+            "tool_version": null, \n+            "type": "data_input", \n+            "user_outputs": []\n+        }, \n+        "2": {\n+            "annotation": "https://usegalaxy.org/u/guru%40psu.edu/h/error-rates-files", \n+            "id": 2, \n+            "input_connections": {}, \n+            "inputs": [\n+                {\n+                    "description": "https://usegalaxy.org/u/guru%40psu.edu/h/error-rates-files", \n+                    "name": "error rate file"\n+                }\n+            ], \n+            "name": "Input dataset", \n+            "outputs": [], \n+            "position": {\n+                "left": 1412.875, \n+                "top": 384.671875\n+            }, \n+            "tool_errors": null, \n+            "tool_id": null, \n+            "tool_state": "{\\"name\\": \\"error rate file\\"}", \n+            "tool_version": null, \n+            "type": "data_input", \n+            "user_outputs": []\n+        }, \n+        "3": {\n+            "annotation": "", \n+            "id": 3, \n+            "input_connections": {\n+                "filePath": {\n+                    "id": 0, \n+                    "output_name": "output"\n+                }\n+            }, \n+            "inputs": [], \n+            "name": "Microsatellite detection", \n+            "outputs": [\n+                {\n+                    "name": "stdout", \n+                    "type": "tabular"\n+                }\n+            ], \n+            "position": {\n+                "left": 441.953125, \n+                "top": 252\n+            }, \n+            "post_job_actions": {\n+                "ChangeDatatypeActionstdout": {\n+                    "action_arguments": {\n+                        "newtype": "tabular"\n+                    }, \n+                    "action_type": "ChangeDatatypeAction", \n+                    "output_name": "stdout"\n+                }, \n+                "RenameDatasetActionstdout": {\n+                    "action_arguments": {\n+                        "newname": "TR in raw reads"\n+                    }, \n+                    "action_type": "RenameDatasetAction", \n+                    "output_name": "stdout"\n+                }\n+            }, \n+            "tool_errors": null, \n+   '..b'          "HideDatasetActionout_file1": {\n+                    "action_arguments": {}, \n+                    "action_type": "HideDatasetAction", \n+                    "output_name": "out_file1"\n+                }\n+            }, \n+            "tool_errors": null, \n+            "tool_id": "Grouping1", \n+            "tool_state": "{\\"operations\\": \\"[{\\\\\\"opcol\\\\\\": {\\\\\\"__class__\\\\\\": \\\\\\"UnvalidatedValue\\\\\\", \\\\\\"value\\\\\\": \\\\\\"6\\\\\\"}, \\\\\\"__index__\\\\\\": 0, \\\\\\"optype\\\\\\": \\\\\\"cat\\\\\\", \\\\\\"opround\\\\\\": \\\\\\"no\\\\\\"}, {\\\\\\"opcol\\\\\\": {\\\\\\"__class__\\\\\\": \\\\\\"UnvalidatedValue\\\\\\", \\\\\\"value\\\\\\": \\\\\\"4\\\\\\"}, \\\\\\"__index__\\\\\\": 1, \\\\\\"optype\\\\\\": \\\\\\"cat_uniq\\\\\\", \\\\\\"opround\\\\\\": \\\\\\"no\\\\\\"}]\\", \\"__page__\\": 0, \\"input1\\": \\"null\\", \\"ignorelines\\": \\"null\\", \\"groupcol\\": \\"{\\\\\\"__class__\\\\\\": \\\\\\"UnvalidatedValue\\\\\\", \\\\\\"value\\\\\\": \\\\\\"8\\\\\\"}\\", \\"__rerun_remap_job_id__\\": null, \\"ignorecase\\": \\"\\\\\\"False\\\\\\"\\"}", \n+            "tool_version": "2.1.0", \n+            "type": "tool", \n+            "user_outputs": []\n+        }, \n+        "19": {\n+            "annotation": "", \n+            "id": 19, \n+            "input_connections": {\n+                "input": {\n+                    "id": 18, \n+                    "output_name": "out_file1"\n+                }\n+            }, \n+            "inputs": [], \n+            "name": "Filter", \n+            "outputs": [\n+                {\n+                    "name": "out_file1", \n+                    "type": "input"\n+                }\n+            ], \n+            "position": {\n+                "left": 1186, \n+                "top": 1405.953125\n+            }, \n+            "post_job_actions": {\n+                "RenameDatasetActionout_file1": {\n+                    "action_arguments": {\n+                        "newname": "TR profile"\n+                    }, \n+                    "action_type": "RenameDatasetAction", \n+                    "output_name": "out_file1"\n+                }\n+            }, \n+            "tool_errors": null, \n+            "tool_id": "Filter1", \n+            "tool_state": "{\\"input\\": \\"null\\", \\"__rerun_remap_job_id__\\": null, \\"header_lines\\": \\"\\\\\\"0\\\\\\"\\", \\"cond\\": \\"\\\\\\"c2.count(\\\\\\\\\\\\\\",\\\\\\\\\\\\\\")>=4\\\\\\"\\", \\"__page__\\": 0}", \n+            "tool_version": "1.1.0", \n+            "type": "tool", \n+            "user_outputs": []\n+        }, \n+        "20": {\n+            "annotation": "", \n+            "id": 20, \n+            "input_connections": {\n+                "microsat_error_profile": {\n+                    "id": 2, \n+                    "output_name": "output"\n+                }, \n+                "microsat_raw": {\n+                    "id": 19, \n+                    "output_name": "out_file1"\n+                }\n+            }, \n+            "inputs": [], \n+            "name": "Correct genotype for microsatellite errors", \n+            "outputs": [\n+                {\n+                    "name": "microsat_corrected", \n+                    "type": "tabular"\n+                }\n+            ], \n+            "position": {\n+                "left": 1591.328125, \n+                "top": 456.8125\n+            }, \n+            "post_job_actions": {\n+                "RenameDatasetActionmicrosat_corrected": {\n+                    "action_arguments": {\n+                        "newname": "Genotype file"\n+                    }, \n+                    "action_type": "RenameDatasetAction", \n+                    "output_name": "microsat_corrected"\n+                }\n+            }, \n+            "tool_errors": null, \n+            "tool_id": "toolshed.g2.bx.psu.edu/repos/arkarachai-fungtammasan/microsatellite_ngs/GenotypeSTR/2.0.0", \n+            "tool_state": "{\\"microsat_raw\\": \\"null\\", \\"__page__\\": 0, \\"__rerun_remap_job_id__\\": null, \\"microsat_error_profile\\": \\"null\\", \\"expectedminorallele\\": \\"\\\\\\"0.5\\\\\\"\\"}", \n+            "tool_version": "2.0.0", \n+            "type": "tool", \n+            "user_outputs": []\n+        }\n+    }\n+}\n\\ No newline at end of file\n'
b
diff -r dccd7a3ee717 -r 3c05abb4452e README.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md Wed Apr 22 12:22:50 2015 -0400
b
b'@@ -0,0 +1,103 @@\n+# *STR-FM*, a short tandem repeat profiling using a flank-based mapping approach\n+\n+## User manual and guide\n+We designed the STR profiling pipeline as a collection of tools which can be executed in both commandline or via a GUI on Galaxy. The easiest way to use STR-FM pipeline is to via Galaxy platform. Current, we have all tools in Galaxy main toolshed (See installation of STR-FM tools from toolshed below) and in Galaxy test website (STR-FM: microsatellite analysis).\n+\n+## Overview\n+\n+Our tools in \xe2\x80\x98str_fm\xe2\x80\x99 can be used to: \n+\n+**(1) profile STRs from short read data with STR-FM pipeline** (tools: \xe2\x80\x98STR detection\xe2\x80\x99, \xe2\x80\x98Read name modifier\xe2\x80\x99, \xe2\x80\x98Fetch bases flanking\xe2\x80\x99, \xe2\x80\x98Combine mapped faux paired-end reads\xe2\x80\x99, \xe2\x80\x98Check STR motif compatibility between reference and read STRs\xe2\x80\x99, \xe2\x80\x98Select uninterrupted STRs\xe2\x80\x99)\n+\n+This pipeline needs several tools on Galaxy to complete the process. It can be customized with different mapper or STRs detection algorithm. Either single-end or paired-end sequencing data can be utilized; for paired-end read data, each read is treated separately. The core of the pipeline consists of the following three procedures \n+\n+First, STR-FM runs a short-read STR detection tool using a string comparison algorithm (see publication details). The algorithm can detect exact (pure, or uninterrupted) STRs (mono- through hexanucleotide STRs greater than or equal to two repeats), incomplete motifs (e.g., ATATATA), interrupted STRs (e.g., AAAATAAAAA), or multiple STRs in a read. Reads that do not have sufficient upstream or downstream sequences flanking the STRs are discarded (we used a threshold of 20 bp on each side of an STR). Each read is split into two \xe2\x80\x9cpseudoreads,\xe2\x80\x9d containing the upstream and downstream flanks surrounding the STR. \n+\n+Second, these are mapped to the reference genome using a standard paired-end read-mapping algorithm, e.g., BWA, Bowtie, or Bowtie2, treating each pair of flanking sequences as a faux paired-end read. \n+\n+Finally, STR-FM runs a profiler tool, which groups all reads with STRs that are mapped to the same location in the reference genome. As a result, an array of all STR lengths from the reads mapping to a particular STR-containing locus is generated.\n+\n+**(2) genotype STRs with error correction** (tool \xe2\x80\x98Correct genotype for STR errors\xe2\x80\x99)\n+\n+This pipeline needs only one of our tools to complete process. It will take STR-profile file and sequencine error rates file as inputs. The program will calculate the maximum likelihood of genotype for each STR locus in STR-profile file. Then it will report the mostly likely genotype and the log odds ratio between their probabilities, which can be interpreted as a confidence of genotyping (the more this value deviates from 0, the more confidence we have in this genotype).\n+\n+**(3) estimate the minimum informative read depth from error rates** (tools: \xe2\x80\x98Generate all possible combination of STR length profile\xe2\x80\x99, \xe2\x80\x98Evaluate the probability of the allele combination to generate read profile\xe2\x80\x99, \xe2\x80\x98Combine read profile probabilities\xe2\x80\x99)\n+\n+This pipeline needs other tools on Galaxy to complete the process. This pipeline will generate all possible read profiles from sequencing error spectrum, select the profiles that can distinguish heterozygote from homozygote, calculate the probability to produce such profiles from sequencing error spectrum, and report the probability that a certain sequence depth can distinguish heterozygote from homozygote under a given sequencing error rates (see publication details). We recommend that you should try to run with less than 10x depth for initial trial.\n+\n+**(4) convert informative read depth to locus-specific and genome-wide sequencing depth** (tool \xe2\x80\x98Convert informative read depth to sequencing depth\xe2\x80\x99).  \n+\n+This pipeline needs only one of our tools to complete process. It will convert *informative read depth* to *locus-specific sequencing depth* (given read length) and *genome-'..b'pth from the given informative read depth\n+The detailed description for each tool is embedded within the tool.\n+\n+## Citing *STR-FM*\n+Fungtammasan A, Ananda G, Hile SE, Su MS, Sun C, Harris R, Medvedev P, Eckert K, Makova KD. 2015. Accurate Typing of Short Tandem Repeats from Genome-wide Sequencing Data and its Applications, Genome Research\n+\n+## Installation of STR-FM tools from toolshed\n+\n+\n+The installation can be done as follows\n+\n+\n+1 Install and set configuration of local Galaxy \n+\n+1.1 Download and install Galaxy (https://wiki.galaxyproject.org/Admin/GetGalaxy). Galaxy works on both Unix and Mac OS.\n+\n+1.2 From your Galaxy directory, add your E-mail as admin E-mail to the Galaxy configuration file. Depending on the Galaxy version, this file can be either universe_wsgi.ini or config/galaxy.ini (https://wiki.galaxyproject.org/Admin/Interface)\n+\n+1.3 Set directory for tool dependencies (step 2 in https://wiki.galaxyproject.org/Admin/Tools/AddToolFromToolShedTutorial). \n+\n+1.4 Run local Galaxy from the command line by running \xe2\x80\x98sh run.sh\xe2\x80\x99 from your Galaxy directory. \n+\n+1.5 Open your Galaxy from your browser at address http://localhost:8080 (https://wiki.galaxyproject.org/Admin/GetGalaxy)\n+\n+1.6 Register using your admin E-mail in the \xe2\x80\x98User\xe2\x80\x99 tab on the top.\n+\n+1.7 Refresh your browser\n+\n+\n+2 Install tools and dependencies\n+\n+2.1 From your local galaxy, click \xe2\x80\x98Admin\xe2\x80\x99 tab on the top.\n+\n+2.2 On the left panel, click \xe2\x80\x98Search and browse tool sheds\xe2\x80\x99 under \xe2\x80\x98Tool sheds\xe2\x80\x99. \xe2\x80\x98Accessible Galaxy tool sheds\xe2\x80\x99 will appear on main panel.\n+\n+2.3 Click on \xe2\x80\x98Galaxy main tool shed\xe2\x80\x99 and select \xe2\x80\x98Browse valid repositories\xe2\x80\x99. (https://wiki.galaxyproject.org/Admin/Tools/AddToolFromToolShedTutorial)\n+\n+2.4 Type \xe2\x80\x98str_fm in search box and click enter.\n+\n+2.5 The \xe2\x80\x98suite_str_fm_0_1\xe2\x80\x99 repository that has \xe2\x80\x98arkarachai-fungtammasan\xe2\x80\x99 as the owner will appear. The user may click on this repository name and click \xe2\x80\x98Preview and install\xe2\x80\x99. The \xe2\x80\x98Install to Galaxy\xe2\x80\x99 button will appear on upper right corner. This button allows the user to install all our tools and workflows -- pipelines containing tools for specific purpose such as STR profiling from short read sequencing data, microsatellite detection of the reference genome, and estimating minimum informative read depth. None of our tools have any dependencies. However, some of the other tools that used in our workflows (e.g. SAM flag filter, unique element selection, etc.) are not included in the standard Galaxy installation. For the user\xe2\x80\x99s convenience, we included all dependency tools for the workflows in this repository. Therefore, installing \xe2\x80\x98suite_str_fm_0_1\xe2\x80\x99 will be sufficient to operate all workflows we provided. \n+\n+2.6 After clicking on \xe2\x80\x98Install to Galaxy\xe2\x80\x99 and \xe2\x80\x98Install\xe2\x80\x99 button in confirmation page, all our tools, workflows, and test datasets will be downloaded to your local Galaxy. After the download is completed, all our tools will be available on your local Galaxy. If the user wants to use the workflows that we suggested (i.e. STR profiling from short read sequencing data, microsatellite detection of the reference genome, and estimating minimum informative read depth), please proceed to step 3.\n+\n+2.7 Refresh your browser\n+\n+\n+3 Install workflows\n+\n+3.1 Click on the \xe2\x80\x98Admin\xe2\x80\x99 tab at the top again.\n+\n+3.2 On the right panel, click \xe2\x80\x98Manage installed tool shed repositories\xe2\x80\x99 under \xe2\x80\x98Server\xe2\x80\x99. \xe2\x80\x98Installed tool shed repositories\xe2\x80\x99 will appear on main panel.\n+\n+3.3 Click to open \xe2\x80\x98str_fm\xe2\x80\x99 repository. \n+\n+3.4 Scroll down to \xe2\x80\x98Workflows\xe2\x80\x99 section and select the workflow that you want to install. The SGV graphic of the workflow will appear.\n+\n+3.5 Click on the \xe2\x80\x98Repository Actions\xe2\x80\x99 on the upper right corner and select \xe2\x80\x98Import workflow to Galaxy\xe2\x80\x99. If success, the \xe2\x80\x98Workflow <workflow name> imported successfully\xe2\x80\x99 will appear. Once the workflow is imported to your Galaxy, you can view and modify it from \xe2\x80\x98Workflow\xe2\x80\x99 tab on the top. \n'
b
diff -r dccd7a3ee717 -r 3c05abb4452e commandline_sample_STR-FM_estimate_mininum_informative_Read_Depth
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commandline_sample_STR-FM_estimate_mininum_informative_Read_Depth Wed Apr 22 12:22:50 2015 -0400
b
@@ -0,0 +1,35 @@
+## This is a sample PBS script for profiling STR from reference genome using STR-FM
+##   
+##requirement
+##1 STR error rates (can be downloaded from https://usegalaxy.org/u/guru%40psu.edu/h/error-rates-files) --> errorrate.bymajorallele
+##
+echo " "
+echo " "
+echo "Job started on `hostname` at `date`"
+cd /working/directory/
+echo ${MOTIF}
+echo ${OUTPUT}
+echo " "
+echo "Generate all possible combination of STR length profile" ## See detail in profilegenerator.xml on https://github.com/Arkarachai/STR-FM
+python  profilegenerator.py errorrate.bymajorallele ${MOTIF} 30 > ${OUTPUT}.30
+
+echo "remove duplicated profiles"
+cat ${OUTPUT}.30 | sort | uniq > ${OUTPUT}.30.sort
+
+echo "genotyping using error correction model" ## See detail in GenotypingSTR.xml on https://github.com/Arkarachai/STR-FM
+python  GenotypeTRcorrection.py  ${OUTPUT}.30.sort errorrate.bymajorallele ${OUTPUT}.30.prob 0.5
+
+echo "select only full motif different --> need to replace 4 with motif size (1-6)"
+cat ${OUTPUT}.30.prob | grep  hetero | awk '(($7-$8)==4) || (($8-$7)==4) {print $0}' > ${OUTPUT}.30.prob.screen
+
+echo "Evaluate the probability of the allele combination to generate read profile" ## See detail in  probvalueforhetero.xml on https://github.com/Arkarachai/STR-FM
+python heteroprob.py  ${OUTPUT}.30.prob.screen  ${INPUT} >  ${OUTPUT}.30.bino
+
+echo "formatting"
+cat  ${OUTPUT}.30.bino | sort  -k 12n,12 -k 6n,6 > ${OUTPUT}.30.bino.sort
+
+echo "Combine read profile probabilities" ## See detail in  combineprobforallelecombination.xml on https://github.com/Arkarachai/STR-FM
+python combinedprobforallelecombination.py ${OUTPUT}.30.bino.sort > ${OUTPUT}.30.bino.sort.plot
+
+
+echo "Job end on `hostname` at `date`"
b
diff -r dccd7a3ee717 -r 3c05abb4452e commandline_sample_STR-FM_reference_profiling
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commandline_sample_STR-FM_reference_profiling Wed Apr 22 12:22:50 2015 -0400
b
@@ -0,0 +1,25 @@
+## This is a sample PBS script for profiling STR from reference genome using STR-FM version 1.0.0 (April 20, 2014)
+##   
+##requirement
+##1 reference genome in FASTA format --> ${INPUT}.fa
+##
+echo " "
+echo " "
+echo "Job started on `hostname` at `date`"
+cd /working/directory/
+echo " "
+echo " detect STR in reference genome" ## See detail in microsatellite.xml on https://github.com/Arkarachai/STR-FM
+python microsatellite.py ${INPUT}.fa --fasta --period=1 --partialmotifs --minlength=4 --prefix=0 --suffix=0 --hamming=0 --multipleruns --flankdisplay=0  --splitbyvalidity  >${INPUT}.mono.out
+python microsatellite.py ${INPUT}.fa --fasta --period=2 --partialmotifs --minlength=6 --prefix=0 --suffix=0 --hamming=0 --multipleruns --flankdisplay=0  --splitbyvalidity  >${INPUT}.di.out
+python microsatellite.py ${INPUT}.fa --fasta --period=3 --partialmotifs --minlength=6 --prefix=0 --suffix=0 --hamming=0 --multipleruns --flankdisplay=0  --splitbyvalidity  >${INPUT}.tri.out
+python microsatellite.py ${INPUT}.fa --fasta --period=4 --partialmotifs --minlength=8 --prefix=0 --suffix=0 --hamming=0 --multipleruns --flankdisplay=0  --splitbyvalidity  >${INPUT}.tetra.out
+
+echo "formatting"
+cat ${INPUT}.mono.out | awk 'BEGIN{FS="\t";OFS="\t"};{print $6,$2,$2+$1,$4,$1,length($4) }' > ${INPUT}.mono.TR
+cat ${INPUT}.di.out | awk 'BEGIN{FS="\t";OFS="\t"};{print $6,$2,$2+$1,$4,$1,length($4) }' > ${INPUT}.di.TR
+cat ${INPUT}.tri.out | awk 'BEGIN{FS="\t";OFS="\t"};{print $6,$2,$2+$1,$4,$1,length($4) }' > ${INPUT}.tri.TR
+cat ${INPUT}.tetra.out | awk 'BEGIN{FS="\t";OFS="\t"};{print $6,$2,$2+$1,$4,$1,length($4) }' > ${INPUT}.tetra.TR
+
+
+
+echo "Job end on `hostname` at `date`"
b
diff -r dccd7a3ee717 -r 3c05abb4452e commandline_sample_STR-FM_shortread_profiling
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/commandline_sample_STR-FM_shortread_profiling Wed Apr 22 12:22:50 2015 -0400
b
b'@@ -0,0 +1,124 @@\n+## This is a sample PBS script for profiling STR from short read using STR-FM version 2.0.0 (April 20, 2015)\n+##   \n+##requirement\n+##1 fastq input in sangerfq Phred scale --> ${INPUT}.fastq\n+##2 index of mapping program (bwa, bowtie, etc) \n+##3 location of all STR in reference genome (use PBS script name "sampleSTR_reference_profiling.txt) --> /path/to/STR/in/reference/genome.TR (you can make 4 separated TR files for 4 types of STRs)\n+##4 reference genome in FASTA and in 2bit file --> /path/to/2bit/ref.2bit (use utility from UCSC genome browser to create 2bit file version of reference genome)\n+##5 local Galaxy (available from Galaxy website for Mac and Unix computer)\n+##6 STR error rates (can be downloaded from https://usegalaxy.org/u/guru%40psu.edu/h/error-rates-files) --> errorrate.bymajorallele\n+##\n+echo " "\n+echo " "\n+echo "Job started on `hostname` at `date`"\n+ref=/path/to/reference/sequence/and/bwa/index/ref.fa\n+export PYTHONPATH=/path/to/galaxy-dist/lib/\n+galaxydir=/path/to/galaxy-dist/tools\n+cd /working/directory/\n+echo " "\n+echo " detect STR in short read" ## See detail in microsatellite.xml on https://github.com/Arkarachai/STR-FM\n+python microsatellite.py ${INPUT}.fastq  --fastq --period=1 --partialmotifs --minlength=5 --prefix=20 --suffix=20 --hamming=0 --multipleruns  >${INPUT}.mono.out\n+python microsatellite.py ${INPUT}.fastq  --fastq --period=2 --partialmotifs --minlength=6 --prefix=20 --suffix=20 --hamming=0 --multipleruns  >${INPUT}.di.out\n+python microsatellite.py ${INPUT}.fastq  --fastq --period=3 --partialmotifs --minlength=9 --prefix=20 --suffix=20 --hamming=0 --multipleruns  >${INPUT}.tri.out\n+python microsatellite.py ${INPUT}.fastq  --fastq --period=4 --partialmotifs --minlength=12 --prefix=20 --suffix=20 --hamming=0 --multipleruns  >${INPUT}.tetra.out\n+\n+echo "change read name at " ## See detail in space2underscore_readname.xml on https://github.com/Arkarachai/STR-FM\n+python changespacetounderscore_readname.py ${INPUT}.mono.out  ${INPUT}.mono.new 6\n+python changespacetounderscore_readname.py ${INPUT}.di.out  ${INPUT}.di.new 6\n+python changespacetounderscore_readname.py ${INPUT}.tri.out  ${INPUT}.tri.new 6\n+python changespacetounderscore_readname.py ${INPUT}.tetra.out  ${INPUT}.tetra.new 6\n+\n+echo "start fetch flanking at `date`" ## See detail in fetchflank.xml on https://github.com/Arkarachai/STR-FM\n+python pair_fetch_DNA_ff.py ${INPUT}.mono.new ${INPUT}.mono_ff_L.txt ${INPUT}.mono_ff_R.txt 20 20\n+python pair_fetch_DNA_ff.py ${INPUT}.di.new ${INPUT}.di_ff_L.txt ${INPUT}.di_ff_R.txt 20 20\n+python pair_fetch_DNA_ff.py ${INPUT}.tri.new ${INPUT}.tri_ff_L.txt ${INPUT}.tri_ff_R.txt 20 20\n+python pair_fetch_DNA_ff.py ${INPUT}.tetra.new ${INPUT}.tetra_ff_L.txt ${INPUT}.tetra_ff_R.txt 20 20\n+\n+echo "BWA uniquely mapped no indel no deletion "\n+bwa aln -n 0 -o 0 ${ref} ${INPUT}.mono_ff_L.txt > ${INPUT}.mono_ff_L.sai \n+bwa aln\t-n 0 -o 0 ${ref} ${INPUT}.mono_ff_R.txt > ${INPUT}.mono_ff_R.sai\n+bwa sampe ${ref} ${INPUT}.mono_ff_L.sai ${INPUT}.mono_ff_R.sai ${INPUT}.mono_ff_L.txt ${INPUT}.mono_ff_R.txt  > ${INPUT}.mono.sam\n+samtools view -Sb -F 12 -q 1 ${INPUT}.mono.sam > ${INPUT}.mono.n.all.bam\n+bwa aln -n 0 -o 0 ${ref} ${INPUT}.di_ff_L.txt > ${INPUT}.di_ff_L.sai \n+bwa aln\t-n 0 -o 0 ${ref} ${INPUT}.di_ff_R.txt > ${INPUT}.di_ff_R.sai\n+bwa sampe ${ref} ${INPUT}.di_ff_L.sai ${INPUT}.di_ff_R.sai ${INPUT}.di_ff_L.txt ${INPUT}.di_ff_R.txt  > ${INPUT}.di.sam\n+samtools view -Sb -F 12 -q 1 ${INPUT}.di.sam > ${INPUT}.di.n.all.bam\n+bwa aln -n 0 -o 0 ${ref} ${INPUT}.tri_ff_L.txt > ${INPUT}.tri_ff_L.sai \n+bwa aln\t-n 0 -o 0 ${ref} ${INPUT}.tri_ff_R.txt > ${INPUT}.tri_ff_R.sai\n+bwa sampe ${ref} ${INPUT}.tri_ff_L.sai ${INPUT}.tri_ff_R.sai ${INPUT}.tri_ff_L.txt ${INPUT}.tri_ff_R.txt  > ${INPUT}.tri.sam\n+samtools view -Sb -F 12 -q 1 ${INPUT}.tri.sam > ${INPUT}.tri.n.all.bam\n+bwa aln -n 0 -o 0 ${ref} ${INPUT}.tetra_ff_L.txt > ${INPUT}.tetra_ff_L.sai \n+bwa aln\t-n 0 -o 0 ${ref} ${INPUT}.tetra_ff_R.txt > ${INPUT}.tetra_'..b'operations/gops_join.py /path/to/STR/in/reference/genome.TR ${INPUT}.mono.RF.j ${INPUT}.mono.gop -1 1,2,3,0 -2 10,13,14,0 -m 1 -f\n+python ${galaxydir}/new_operations/gops_join.py /path/to/STR/in/reference/genome.TR ${INPUT}.di.RF.j ${INPUT}.di.gop -1 1,2,3,0 -2 10,13,14,0 -m 1 -f\n+python ${galaxydir}/new_operations/gops_join.py /path/to/STR/in/reference/genome.TR ${INPUT}.tri.RF.j ${INPUT}.tri.gop -1 1,2,3,0 -2 10,13,14,0 -m 1 -f\n+python ${galaxydir}/new_operations/gops_join.py /path/to/STR/in/reference/genome.TR ${INPUT}.tetra.RF.j ${INPUT}.tetra.gop -1 1,2,3,0 -2 10,13,14,0 -m 1 -f\n+\n+echo "remove incompatible motif (remove incorrect mapped reads given that there is no STR motif difference from reference genome)" ## See detail in microsatcompat.xml on https://github.com/Arkarachai/STR-FM\n+python microsatcompat.py ${INPUT}.mono.gop 4 10 > ${INPUT}.mono.fulltable1 \n+python microsatcompat.py ${INPUT}.di.gop 4 10 > ${INPUT}.di.fulltable1 \n+python microsatcompat.py ${INPUT}.tri.gop 4 10 > ${INPUT}.tri.fulltable1 \n+python microsatcompat.py ${INPUT}.tetra.gop 4 10 > ${INPUT}.tetra.fulltable1 \n+\n+echo "remove shifting flanking location (remove cases that come from STR interruption or flanking bases are misread as STRs)"\n+cat ${INPUT}.mono.fulltable1 | awk \'($19==$2) && ($20==$3) {print $0}\' > ${INPUT}.mono.fulltable2\n+cat ${INPUT}.di.fulltable1 | awk \'($19==$2) && ($20==$3) {print $0}\' > ${INPUT}.di.fulltable2\n+cat ${INPUT}.tri.fulltable1 | awk \'($19==$2) && ($20==$3) {print $0}\' > ${INPUT}.tri.fulltable2\n+cat ${INPUT}.tetra.fulltable1 | awk \'($19==$2) && ($20==$3) {print $0}\' > ${INPUT}.tetra.fulltable2\n+\n+echo "keep only column that are necessary for profiling" \n+cat ${INPUT}.mono.fulltable2| cut -f 1,2,3,4,5,7 | sort -k 1n,1 -k 2n,2 -k 3n,3 > ${INPUT}.mono.cuttmp0\n+cat ${INPUT}.di.fulltable2| cut -f 1,2,3,4,5,7 | sort -k 1n,1 -k 2n,2 -k 3n,3 > ${INPUT}.di.cuttmp0\n+cat ${INPUT}.tri.fulltable2| cut -f 1,2,3,4,5,7 | sort -k 1n,1 -k 2n,2 -k 3n,3 > ${INPUT}.tri.cuttmp0\n+cat ${INPUT}.tetra.fulltable2| cut -f 1,2,3,4,5,7 | sort -k 1n,1 -k 2n,2 -k 3n,3 > ${INPUT}.tetra.cuttmp0\n+\n+echo "If you multiple analysis by splitting initial fastq, you should merge (cat) all results from the same sample after this step"\n+\n+echo "create genomic coordinate column and group by that column"\n+perl ${galaxydir}/filters/fixedValueColumn.pl ${INPUT}.mono.cuttmp0 ${INPUT}.mono.cuttmp1 "_" "no"\n+python ${galaxydir}/filters/mergeCols.py ${INPUT}.mono.cuttmp1 ${INPUT}.mono.cuttmp2 1 7 2 7 3\n+python ${galaxydir}/stats/grouping.py ${INPUT}.mono.cuttmp3 ${INPUT}.mono.cuttmp2 8 0 \'cat 6 0\' \'cat_uniq 4 0\'\n+perl ${galaxydir}/filters/fixedValueColumn.pl ${INPUT}.di.cuttmp0 ${INPUT}.di.cuttmp1 "_" "no"\n+python ${galaxydir}/filters/mergeCols.py ${INPUT}.di.cuttmp1 ${INPUT}.di.cuttmp2 1 7 2 7 3\n+python ${galaxydir}/stats/grouping.py ${INPUT}.di.cuttmp3 ${INPUT}.di.cuttmp2 8 0 \'cat 6 0\' \'cat_uniq 4 0\'\n+perl ${galaxydir}/filters/fixedValueColumn.pl ${INPUT}.tri.cuttmp0 ${INPUT}.tri.cuttmp1 "_" "no"\n+python ${galaxydir}/filters/mergeCols.py ${INPUT}.tri.cuttmp1 ${INPUT}.tri.cuttmp2 1 7 2 7 3\n+python ${galaxydir}/stats/grouping.py ${INPUT}.tri.cuttmp3 ${INPUT}.tri.cuttmp2 8 0 \'cat 6 0\' \'cat_uniq 4 0\'\n+perl ${galaxydir}/filters/fixedValueColumn.pl ${INPUT}.tetra.cuttmp0 ${INPUT}.tetra.cuttmp1 "_" "no"\n+python ${galaxydir}/filters/mergeCols.py ${INPUT}.tetra.cuttmp1 ${INPUT}.tetra.cuttmp2 1 7 2 7 3\n+python ${galaxydir}/stats/grouping.py ${INPUT}.tetra.cuttmp3 ${INPUT}.tetra.cuttmp2 8 0 \'cat 6 0\' \'cat_uniq 4 0\'\n+\n+echo "you may filter for minimum sequencing depth here"\n+\n+echo "genotyping using error correction model" ## See detail in GenotypingSTR.xml on https://github.com/Arkarachai/STR-FM\n+cat ${INPUT}.mono.cuttmp2 ${INPUT}.di.cuttmp2 ${INPUT}.tri.cuttmp2 ${INPUT}.tetra.cuttmp2 > ${INPUT}.step5\n+python GenotypeTRcorrection.py ${INPUT}.step5 errorrate.bymajorallele ${INPUT}.step5.result 0.5\n+## final output is ${INPUT}.step5.result\n+\n+echo "Job end on `hostname` at `date`"\n'
b
diff -r dccd7a3ee717 -r 3c05abb4452e test-data/C_sample_fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/C_sample_fastq Wed Apr 22 12:22:50 2015 -0400
b
@@ -0,0 +1,8 @@
+@IL2_40_2_1_735_755
+ATTTTCCAGCACCGTCATGTGGTTCCAGAGGTTAAAGTGCTGAAATAACAT
++
+IIIIIIIIIIIIIIIIIIIIIIII4IIIIIIIII5IIDI)'7%*8%%%%5*
+@IL2_40_2_1_919_700
+ATAAGGAAAAAAAAAAAAAAAACCAGGTCTTTTTTTTTTTTTTTTTGTTAT
++
+IIIIIIIIIIIIIIIIIIIIII@IIII2III4-II47I?CII>-%:C-;$&
b
diff -r dccd7a3ee717 -r 3c05abb4452e test-data/C_sample_snoope
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/C_sample_snoope Wed Apr 22 12:22:50 2015 -0400
b
@@ -0,0 +1,4 @@
+3 33 15 A 0 IL2_40_2_1_735_755_1_per1_2 ATTTTCCAGCACCGTCATGTGGTTCCAGAGGTTaaaGTGCTGAAATAACAT IIIIIIIIIIIIIIIIIIIIIIII4IIIIIIIII5IIDI)'7%*8%%%%5*
+3 42 6 A 0 IL2_40_2_1_735_755_1_per1_3 ATTTTCCAGCACCGTCATGTGGTTCCAGAGGTTAAAGTGCTGaaaTAACAT IIIIIIIIIIIIIIIIIIIIIIII4IIIIIIIII5IIDI)'7%*8%%%%5*
+16 6 29 A 0 IL2_40_2_1_919_700_1_per1_1 ATAAGGaaaaaaaaaaaaaaaaCCAGGTCTTTTTTTTTTTTTTTTTGTTAT IIIIIIIIIIIIIIIIIIIIII@IIII2III4-II47I?CII>-%:C-;$&
+17 29 5 T 0 IL2_40_2_1_919_700_1_per1_2 ATAAGGAAAAAAAAAAAAAAAACCAGGTCtttttttttttttttttGTTAT IIIIIIIIIIIIIIIIIIIIII@IIII2III4-II47I?CII>-%:C-;$&
b
diff -r dccd7a3ee717 -r 3c05abb4452e test-data/PCRinclude.allrate.bymajorallele
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/PCRinclude.allrate.bymajorallele Wed Apr 22 12:22:50 2015 -0400
b
b'@@ -0,0 +1,997 @@\n+10\t10\t91456\tA\n+10\t9\t1259\tA\n+10\t11\t605\tA\n+10\t8\t16\tA\n+10\t12\t8\tA\n+10\t7\t2\tA\n+11\t11\t39657\tA\n+11\t10\t1211\tA\n+11\t12\t514\tA\n+11\t9\t54\tA\n+11\t13\t9\tA\n+11\t8\t3\tA\n+11\t14\t1\tA\n+12\t12\t18850\tA\n+12\t11\t986\tA\n+12\t13\t417\tA\n+12\t10\t73\tA\n+12\t14\t8\tA\n+12\t9\t1\tA\n+12\t8\t1\tA\n+13\t13\t10201\tA\n+13\t12\t885\tA\n+13\t14\t320\tA\n+13\t11\t83\tA\n+13\t15\t12\tA\n+13\t10\t8\tA\n+14\t14\t3649\tA\n+14\t13\t409\tA\n+14\t15\t151\tA\n+14\t12\t62\tA\n+14\t11\t6\tA\n+14\t16\t5\tA\n+14\t10\t1\tA\n+15\t15\t847\tA\n+15\t14\t140\tA\n+15\t16\t60\tA\n+15\t13\t20\tA\n+15\t17\t4\tA\n+15\t12\t3\tA\n+16\t16\t182\tA\n+16\t15\t60\tA\n+16\t17\t14\tA\n+16\t14\t12\tA\n+16\t13\t1\tA\n+16\t12\t1\tA\n+16\t18\t1\tA\n+17\t17\t11\tA\n+17\t16\t5\tA\n+17\t15\t2\tA\n+17\t18\t1\tA\n+18\t18\t4\tA\n+18\t17\t2\tA\n+5\t5\t10047169\tA\n+5\t6\t44\tA\n+6\t6\t2808071\tA\n+6\t5\t195\tA\n+6\t7\t69\tA\n+7\t7\t1097174\tA\n+7\t6\t313\tA\n+7\t8\t83\tA\n+7\t5\t6\tA\n+8\t8\t369496\tA\n+8\t7\t387\tA\n+8\t9\t248\tA\n+8\t6\t3\tA\n+8\t10\t2\tA\n+9\t9\t184958\tA\n+9\t8\t707\tA\n+9\t10\t486\tA\n+9\t7\t5\tA\n+9\t11\t4\tA\n+10\t10\t46\tC\n+10\t9\t3\tC\n+5\t5\t1354993\tC\n+5\t6\t7\tC\n+6\t6\t193431\tC\n+6\t5\t14\tC\n+6\t7\t2\tC\n+7\t7\t22171\tC\n+7\t6\t4\tC\n+8\t8\t2966\tC\n+8\t9\t3\tC\n+8\t7\t3\tC\n+9\t9\t638\tC\n+9\t8\t8\tC\n+9\t7\t1\tC\n+10\t10\t21211\tAC\n+10\t8\t3\tAC\n+10\t12\t1\tAC\n+11\t11\t15048\tAC\n+11\t9\t10\tAC\n+12\t12\t6043\tAC\n+12\t10\t15\tAC\n+12\t14\t1\tAC\n+13\t13\t5070\tAC\n+13\t11\t40\tAC\n+13\t15\t1\tAC\n+14\t14\t3093\tAC\n+14\t12\t44\tAC\n+14\t10\t1\tAC\n+15\t15\t2848\tAC\n+15\t13\t31\tAC\n+15\t17\t1\tAC\n+16\t16\t1273\tAC\n+16\t14\t30\tAC\n+16\t12\t2\tAC\n+17\t17\t1297\tAC\n+17\t15\t27\tAC\n+18\t18\t1269\tAC\n+18\t16\t43\tAC\n+18\t20\t2\tAC\n+18\t14\t1\tAC\n+19\t19\t679\tAC\n+19\t17\t17\tAC\n+19\t21\t1\tAC\n+20\t20\t645\tAC\n+20\t18\t34\tAC\n+20\t22\t2\tAC\n+20\t16\t1\tAC\n+21\t21\t723\tAC\n+21\t19\t28\tAC\n+21\t17\t1\tAC\n+21\t23\t1\tAC\n+22\t22\t499\tAC\n+22\t20\t29\tAC\n+22\t18\t3\tAC\n+23\t23\t540\tAC\n+23\t21\t30\tAC\n+23\t19\t2\tAC\n+23\t25\t1\tAC\n+24\t24\t385\tAC\n+24\t22\t38\tAC\n+24\t26\t2\tAC\n+24\t20\t1\tAC\n+25\t25\t407\tAC\n+25\t23\t22\tAC\n+25\t27\t2\tAC\n+25\t21\t1\tAC\n+26\t26\t257\tAC\n+26\t24\t30\tAC\n+26\t22\t3\tAC\n+26\t28\t1\tAC\n+26\t20\t1\tAC\n+27\t27\t339\tAC\n+27\t25\t28\tAC\n+27\t23\t3\tAC\n+27\t29\t2\tAC\n+28\t28\t202\tAC\n+28\t26\t17\tAC\n+28\t30\t6\tAC\n+29\t29\t277\tAC\n+29\t27\t29\tAC\n+29\t31\t6\tAC\n+29\t25\t3\tAC\n+30\t30\t117\tAC\n+30\t28\t12\tAC\n+30\t32\t3\tAC\n+30\t18\t1\tAC\n+31\t31\t144\tAC\n+31\t29\t18\tAC\n+31\t27\t4\tAC\n+31\t33\t2\tAC\n+32\t32\t101\tAC\n+32\t30\t23\tAC\n+32\t28\t2\tAC\n+32\t34\t2\tAC\n+32\t26\t1\tAC\n+33\t33\t106\tAC\n+33\t31\t15\tAC\n+33\t35\t3\tAC\n+33\t29\t1\tAC\n+34\t34\t33\tAC\n+34\t32\t7\tAC\n+35\t35\t21\tAC\n+35\t33\t4\tAC\n+35\t31\t1\tAC\n+36\t36\t12\tAC\n+36\t34\t1\tAC\n+37\t37\t10\tAC\n+37\t35\t3\tAC\n+37\t31\t1\tAC\n+37\t39\t1\tAC\n+38\t38\t4\tAC\n+38\t36\t1\tAC\n+6\t6\t1521439\tAC\n+7\t7\t513952\tAC\n+8\t8\t134603\tAC\n+8\t6\t2\tAC\n+9\t9\t60741\tAC\n+9\t7\t3\tAC\n+9\t11\t1\tAC\n+10\t10\t21772\tAG\n+10\t8\t3\tAG\n+10\t12\t1\tAG\n+11\t11\t13880\tAG\n+11\t9\t10\tAG\n+11\t13\t1\tAG\n+12\t12\t5628\tAG\n+12\t10\t13\tAG\n+12\t14\t4\tAG\n+13\t13\t4494\tAG\n+13\t11\t17\tAG\n+14\t14\t1898\tAG\n+14\t12\t15\tAG\n+15\t15\t2427\tAG\n+15\t13\t18\tAG\n+16\t16\t1076\tAG\n+16\t14\t24\tAG\n+16\t12\t1\tAG\n+17\t17\t874\tAG\n+17\t15\t12\tAG\n+17\t19\t1\tAG\n+17\t13\t1\tAG\n+18\t18\t536\tAG\n+18\t16\t20\tAG\n+18\t14\t1\tAG\n+19\t19\t563\tAG\n+19\t17\t25\tAG\n+20\t20\t201\tAG\n+20\t18\t14\tAG\n+21\t21\t260\tAG\n+21\t19\t10\tAG\n+22\t22\t83\tAG\n+22\t20\t5\tAG\n+23\t23\t147\tAG\n+23\t21\t5\tAG\n+23\t25\t1\tAG\n+24\t24\t99\tAG\n+24\t22\t4\tAG\n+24\t18\t1\tAG\n+25\t25\t62\tAG\n+25\t23\t3\tAG\n+25\t27\t1\tAG\n+26\t26\t38\tAG\n+26\t24\t8\tAG\n+27\t27\t24\tAG\n+27\t25\t3\tAG\n+27\t23\t1\tAG\n+28\t28\t14\tAG\n+28\t26\t2\tAG\n+29\t29\t12\tAG\n+29\t27\t5\tAG\n+29\t31\t1\tAG\n+30\t30\t7\tAG\n+30\t28\t2\tAG\n+31\t31\t7\tAG\n+31\t27\t3\tAG\n+31\t23\t1\tAG\n+32\t32\t4\tAG\n+32\t28\t1\tAG\n+6\t6\t1880822\tAG\n+7\t7\t684837\tAG\n+7\t9\t1\tAG\n+8\t8\t183381\tAG\n+9\t9\t75547\tAG\n+9\t7\t6\tAG\n+9\t11\t1\tAG\n+10\t10\t18179\tAT\n+10\t8\t7\tAT\n+10\t12\t4\tAT\n+11\t11\t8969\tAT\n+11\t9\t5\tAT\n+11\t13\t2\tAT\n+12\t12\t4888\tAT\n+12\t10\t8\tAT\n+12\t14\t2\tAT\n+13\t13\t2785\tAT\n+13\t11\t17\tAT\n+13\t15\t1\tAT\n+14\t14\t2310\tAT\n+14\t12\t40\tAT\n+14\t16\t4\tAT\n+14\t10\t2\tAT\n+15\t15\t1461\tAT\n+15\t13\t33\tAT\n+15\t11\t1\tAT\n+15\t17\t1\tAT\n+16\t16\t879\tAT\n+16\t14\t42\tAT\n+16\t18\t2\tAT\n+16\t12\t1\tAT\n+17\t17\t599\tAT\n+17\t15\t38\tAT\n+17\t19\t2\tAT\n+17\t13\t1\tAT\n+18\t18\t367\tAT\n+18\t16\t29\tAT\n+18\t20\t7\tAT\n+18\t14\t1\tAT\n+19\t19\t223\tAT\n+19\t17\t34\tAT\n+19\t21\t3\tAT\n+20\t20\t97\tAT\n+20\t18\t14\tAT\n+20\t16\t2\tAT\n+20\t22\t1\tAT\n+21\t21\t60\tAT\n+21\t19\t18\tAT\n+21\t17\t1\tAT\n+22\t22\t53\tAT\n+22\t20\t15\tAT\n+22\t24\t5\tAT\n+22\t18\t3\tAT\n+23\t23\t11\tAT\n+23\t21\t1\tAT\n+24\t24\t7\tAT\n+24\t20\t2\tAT\n+24\t22\t2\tAT\n+6\t6\t1671932\tAT\n+6\t8\t1\tAT\n+7\t7\t595145\tAT\n+8\t8\t195533\tAT\n+8\t10\t5\tAT\n+8\t6\t2\tAT\n+9\t9\t52576\tAT\n+9\t7\t3\tAT\n+10\t10\t17\tCG\n+11\t11\t17\tCG\n'..b'5\tAAGT\n+34\t34\t9\tAAGT\n+35\t35\t6\tAAGT\n+12\t12\t594\tAATC\n+13\t13\t205\tAATC\n+14\t14\t88\tAATC\n+15\t15\t112\tAATC\n+16\t16\t20\tAATC\n+17\t17\t81\tAATC\n+18\t18\t23\tAATC\n+21\t21\t13\tAATC\n+22\t22\t8\tAATC\n+24\t24\t19\tAATC\n+26\t26\t7\tAATC\n+28\t28\t9\tAATC\n+33\t33\t6\tAATC\n+12\t12\t2293\tAATG\n+13\t13\t1226\tAATG\n+14\t14\t678\tAATG\n+15\t15\t455\tAATG\n+16\t16\t222\tAATG\n+17\t17\t211\tAATG\n+18\t18\t104\tAATG\n+19\t19\t79\tAATG\n+20\t20\t40\tAATG\n+21\t21\t33\tAATG\n+22\t22\t73\tAATG\n+23\t23\t24\tAATG\n+24\t24\t16\tAATG\n+25\t25\t18\tAATG\n+26\t26\t15\tAATG\n+27\t27\t22\tAATG\n+27\t23\t1\tAATG\n+28\t28\t5\tAATG\n+32\t32\t17\tAATG\n+33\t33\t16\tAATG\n+12\t12\t2633\tAATT\n+13\t13\t1086\tAATT\n+14\t14\t1052\tAATT\n+15\t15\t386\tAATT\n+16\t16\t393\tAATT\n+17\t17\t98\tAATT\n+18\t18\t104\tAATT\n+19\t19\t105\tAATT\n+20\t20\t34\tAATT\n+21\t21\t12\tAATT\n+22\t22\t20\tAATT\n+25\t25\t18\tAATT\n+26\t26\t25\tAATT\n+27\t27\t7\tAATT\n+29\t29\t7\tAATT\n+35\t35\t12\tAATT\n+12\t12\t1406\tACAG\n+13\t13\t964\tACAG\n+14\t14\t300\tACAG\n+15\t15\t130\tACAG\n+16\t16\t102\tACAG\n+17\t17\t49\tACAG\n+18\t18\t30\tACAG\n+19\t19\t88\tACAG\n+20\t20\t5\tACAG\n+23\t23\t5\tACAG\n+12\t12\t4868\tACAT\n+12\t15\t4\tACAT\n+13\t13\t3216\tACAT\n+14\t14\t957\tACAT\n+15\t15\t1052\tACAT\n+16\t16\t588\tACAT\n+17\t17\t422\tACAT\n+18\t18\t239\tACAT\n+19\t19\t238\tACAT\n+19\t15\t1\tACAT\n+20\t20\t25\tACAT\n+21\t21\t79\tACAT\n+22\t22\t20\tACAT\n+23\t23\t38\tACAT\n+27\t27\t42\tACAT\n+29\t29\t18\tACAT\n+31\t31\t5\tACAT\n+32\t32\t5\tACAT\n+35\t35\t6\tACAT\n+36\t36\t9\tACAT\n+41\t41\t14\tACAT\n+44\t44\t8\tACAT\n+44\t40\t1\tACAT\n+50\t50\t12\tACAT\n+12\t12\t833\tACCC\n+13\t13\t345\tACCC\n+14\t14\t190\tACCC\n+15\t15\t60\tACCC\n+16\t16\t12\tACCC\n+17\t17\t15\tACCC\n+19\t19\t8\tACCG\n+12\t12\t416\tACCT\n+13\t13\t123\tACCT\n+14\t14\t140\tACCT\n+15\t15\t69\tACCT\n+16\t16\t41\tACCT\n+17\t17\t45\tACCT\n+19\t19\t18\tACCT\n+20\t20\t27\tACCT\n+21\t21\t19\tACCT\n+22\t22\t6\tACCT\n+27\t27\t13\tACCT\n+28\t28\t7\tACCT\n+29\t29\t9\tACCT\n+30\t30\t7\tACCT\n+34\t34\t6\tACCT\n+45\t45\t5\tACCT\n+12\t12\t84\tACGC\n+13\t13\t52\tACGC\n+15\t15\t63\tACGC\n+12\t12\t433\tACGG\n+13\t13\t163\tACGG\n+14\t14\t38\tACGG\n+15\t15\t44\tACGG\n+16\t16\t7\tACGG\n+17\t17\t11\tACGG\n+19\t19\t6\tACGG\n+25\t25\t10\tACGG\n+12\t12\t1119\tACGT\n+13\t13\t509\tACGT\n+14\t14\t338\tACGT\n+15\t15\t16\tACGT\n+16\t16\t66\tACGT\n+17\t17\t7\tACGT\n+19\t19\t27\tACGT\n+12\t12\t2211\tACTC\n+13\t13\t685\tACTC\n+14\t14\t188\tACTC\n+15\t15\t151\tACTC\n+16\t16\t91\tACTC\n+18\t18\t17\tACTC\n+19\t19\t24\tACTC\n+20\t20\t23\tACTC\n+21\t21\t13\tACTC\n+23\t23\t19\tACTC\n+45\t45\t8\tACTC\n+12\t12\t161\tACTG\n+13\t13\t69\tACTG\n+14\t14\t7\tACTG\n+15\t15\t14\tACTG\n+16\t16\t15\tACTG\n+12\t12\t3118\tAGAT\n+13\t13\t1216\tAGAT\n+14\t14\t1084\tAGAT\n+15\t15\t869\tAGAT\n+16\t16\t508\tAGAT\n+17\t17\t322\tAGAT\n+18\t18\t159\tAGAT\n+19\t19\t258\tAGAT\n+20\t20\t63\tAGAT\n+21\t21\t84\tAGAT\n+22\t22\t69\tAGAT\n+22\t14\t6\tAGAT\n+23\t23\t112\tAGAT\n+24\t24\t107\tAGAT\n+25\t25\t36\tAGAT\n+26\t26\t113\tAGAT\n+27\t27\t42\tAGAT\n+28\t28\t58\tAGAT\n+29\t29\t37\tAGAT\n+30\t30\t16\tAGAT\n+31\t31\t32\tAGAT\n+32\t32\t24\tAGAT\n+33\t33\t10\tAGAT\n+34\t34\t43\tAGAT\n+35\t35\t6\tAGAT\n+36\t36\t13\tAGAT\n+36\t32\t1\tAGAT\n+37\t37\t35\tAGAT\n+38\t38\t34\tAGAT\n+39\t39\t20\tAGAT\n+39\t35\t2\tAGAT\n+40\t40\t27\tAGAT\n+41\t41\t29\tAGAT\n+42\t42\t30\tAGAT\n+43\t43\t87\tAGAT\n+44\t44\t67\tAGAT\n+45\t45\t20\tAGAT\n+46\t46\t15\tAGAT\n+47\t47\t28\tAGAT\n+48\t48\t26\tAGAT\n+49\t49\t13\tAGAT\n+50\t50\t11\tAGAT\n+52\t52\t5\tAGAT\n+54\t54\t6\tAGAT\n+12\t12\t236\tAGCC\n+13\t13\t109\tAGCC\n+14\t14\t17\tAGCC\n+15\t15\t14\tAGCC\n+16\t16\t8\tAGCC\n+18\t18\t12\tAGCC\n+21\t21\t18\tAGCC\n+23\t23\t13\tAGCC\n+12\t12\t23\tAGCG\n+13\t13\t19\tAGCG\n+18\t18\t9\tAGCG\n+12\t12\t272\tAGCT\n+13\t13\t89\tAGCT\n+14\t14\t108\tAGCT\n+15\t15\t49\tAGCT\n+16\t16\t19\tAGCT\n+17\t17\t19\tAGCT\n+18\t18\t19\tAGCT\n+19\t19\t44\tAGCT\n+22\t22\t12\tAGCT\n+27\t27\t16\tAGCT\n+12\t12\t87\tAGGC\n+13\t13\t19\tAGGC\n+14\t14\t16\tAGGC\n+18\t18\t7\tAGGC\n+12\t12\t3610\tAGGG\n+13\t13\t1980\tAGGG\n+14\t14\t1095\tAGGG\n+15\t15\t624\tAGGG\n+16\t16\t159\tAGGG\n+17\t17\t59\tAGGG\n+18\t18\t43\tAGGG\n+19\t19\t60\tAGGG\n+20\t20\t49\tAGGG\n+21\t21\t12\tAGGG\n+23\t23\t10\tAGGG\n+12\t12\t531\tATCC\n+13\t13\t323\tATCC\n+14\t14\t221\tATCC\n+15\t15\t58\tATCC\n+16\t16\t78\tATCC\n+17\t17\t38\tATCC\n+18\t18\t12\tATCC\n+19\t19\t19\tATCC\n+20\t20\t17\tATCC\n+21\t21\t44\tATCC\n+22\t22\t12\tATCC\n+23\t23\t39\tATCC\n+24\t24\t11\tATCC\n+25\t25\t12\tATCC\n+27\t27\t10\tATCC\n+32\t32\t6\tATCC\n+39\t39\t8\tATCC\n+40\t40\t6\tATCC\n+48\t48\t7\tATCC\n+12\t12\t272\tATCG\n+13\t13\t89\tATCG\n+14\t14\t108\tATCG\n+15\t15\t49\tATCG\n+16\t16\t19\tATCG\n+17\t17\t19\tATCG\n+18\t18\t19\tATCG\n+19\t19\t44\tATCG\n+22\t22\t12\tATCG\n+27\t27\t16\tATCG\n+12\t12\t1119\tATGC\n+13\t13\t509\tATGC\n+14\t14\t338\tATGC\n+15\t15\t16\tATGC\n+16\t16\t66\tATGC\n+17\t17\t7\tATGC\n+19\t19\t27\tATGC\n+12\t12\t13\tCCCG\n+12\t12\t178\tAGTC\n+13\t13\t77\tAGTC\n+14\t14\t13\tAGTC\n+15\t15\t12\tAGTC\n'
b
diff -r dccd7a3ee717 -r 3c05abb4452e test-data/combineprob_out.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/combineprob_out.txt Wed Apr 22 12:22:50 2015 -0400
b
@@ -0,0 +1,7 @@
+read_depth allele heterozygous_prob motif
+2 10_11 0.485943568663 A
+2 11_12 0.472130683091 A
+2 9_10 0.494635026326 A
+3 10_11 0.71878954705 A
+3 11_12 0.688571908761 A
+3 9_10 0.73801798345 A
b
diff -r dccd7a3ee717 -r 3c05abb4452e test-data/microsatcompat_in.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/microsatcompat_in.txt Wed Apr 22 12:22:50 2015 -0400
b
@@ -0,0 +1,3 @@
+15 64416346 64416378 AT 32 16 18 22 61 TA 0 ERR194158.789781069_HSQ1008:176:D0UYCACXX:2:1201:4831:11242/1_1_per2_1 TTCCTTTATAAGAAATCTTTACatatatatatatatatatGACTGTTTTGCTTTGTTTTGAGTTTCATAAAAATAGTATCATGGGGGCCGGTCACGGTGGC CCCFFFFFGHHFFIJIHGHIGIGGEGGIGHEGBHIIIJIFGCHGGIIJJEEIEIADHGICBFIGIGCGIJIIIGIIHIGDHGIIJHF>C888=@DB92<@? ERR194158.789781069_HSQ1008:176:D0UYCACXX:2:1201:4831:11242/1_1_per2_1 15 64416324 64416346 64416346 64416378 64416378 64416439 32 ATATATATATATATATATATATATATATATAT
+17 52191125 52191133 GA 8 4 8 26 67 AC 0 ERR194158.781426177_HSQ1008:176:D0UYCACXX:2:1109:7175:90983/1_1_per2_1 CTTCCAGGGCCCTTCCAATGCCAAAAacacacacCTTTTTCCCCTGACCCTCTGTCAGTCTTCTGAATTTAAAGCTGGGCTCTGGGACTTACCAGTGTGAG CCCFFFFFHHHHHJJJJJJJJJJJJJJIHIIJIJJJJJJJJJJJIGIJJJJJJJHIJJIIJJJHHHHHHHFFFFFCEEDDDDDDDDBDDDDDDDDDCCCDC ERR194158.781426177_HSQ1008:176:D0UYCACXX:2:1109:7175:90983/1_1_per2_1 17 52191099 52191125 52191125 52191133 52191133 52191200 8 ACACACAC
+17 52191125 52191133 AC 8 4 8 26 67 AG 0 ERR194158.781426177_HSQ1008:176:D0UYCACXX:2:1109:7175:90983/1_1_per2_1 CTTCCAGGGCCCTTCCAATGCCAAAAacacacacCTTTTTCCCCTGACCCTCTGTCAGTCTTCTGAATTTAAAGCTGGGCTCTGGGACTTACCAGTGTGAG CCCFFFFFHHHHHJJJJJJJJJJJJJJIHIIJIJJJJJJJJJJJIGIJJJJJJJHIJJIIJJJHHHHHHHFFFFFCEEDDDDDDDDBDDDDDDDDDCCCDC ERR194158.781426177_HSQ1008:176:D0UYCACXX:2:1109:7175:90983/1_1_per2_1 17 52191099 52191125 52191125 52191133 52191133 52191200 8 AGAGAGAG
b
diff -r dccd7a3ee717 -r 3c05abb4452e test-data/microsatcompat_out.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/microsatcompat_out.txt Wed Apr 22 12:22:50 2015 -0400
b
@@ -0,0 +1,1 @@
+15 64416346 64416378 AT 32 16 18 22 61 TA 0 ERR194158.789781069_HSQ1008:176:D0UYCACXX:2:1201:4831:11242/1_1_per2_1 TTCCTTTATAAGAAATCTTTACatatatatatatatatatGACTGTTTTGCTTTGTTTTGAGTTTCATAAAAATAGTATCATGGGGGCCGGTCACGGTGGC CCCFFFFFGHHFFIJIHGHIGIGGEGGIGHEGBHIIIJIFGCHGGIIJJEEIEIADHGICBFIGIGCGIJIIIGIIHIGDHGIIJHF>C888=@DB92<@? ERR194158.789781069_HSQ1008:176:D0UYCACXX:2:1201:4831:11242/1_1_per2_1 15 64416324 64416346 64416346 64416378 64416378 64416439 32 ATATATATATATATATATATATATATATATAT
b
diff -r dccd7a3ee717 -r 3c05abb4452e test-data/microsatellite_flanking_L.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/microsatellite_flanking_L.fastq Wed Apr 22 12:22:50 2015 -0400
b
@@ -0,0 +1,4 @@
+@SRR345592.75000006 HS2000-192_107:1:63:5822:176818_1_per1_1
+TACCCTCCTGTCTTCCCAGACTGATTTCTGTTCCTGCCCT
++SRR345592.75000006 HS2000-192_107:1:63:5822:176818_1_per1_1
+GGGGGGGGGGGGGGGGGFGGGGGGGGGFEGGGGGGGGGGG
b
diff -r dccd7a3ee717 -r 3c05abb4452e test-data/microsatellite_flanking_R.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/microsatellite_flanking_R.fastq Wed Apr 22 12:22:50 2015 -0400
b
@@ -0,0 +1,4 @@
+@SRR345592.75000006 HS2000-192_107:1:63:5822:176818_1_per1_1
+TTCTTGACTCCTCTGAATGGGTACGGGAGTGTGGACCTCAGGGAGGCCCCCTTG
++SRR345592.75000006 HS2000-192_107:1:63:5822:176818_1_per1_1
+GGGGG?FFFGGGGGDEGGEFFBEFCEEBD@BACB*?=9;(/=5'6=4:?>C*A<
b
diff -r dccd7a3ee717 -r 3c05abb4452e test-data/microsatpurity_in.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/microsatpurity_in.txt Wed Apr 22 12:22:50 2015 -0400
b
@@ -0,0 +1,3 @@
+15 64416346 64416378 AT 32 16 18 22 61 AT 0 ERR194158.789781069_HSQ1008:176:D0UYCACXX:2:1201:4831:11242/1_1_per2_1 TTCCTTTATAAGAAATCTTTACatatatatatatatatatGACTGTTTTGCTTTGTTTTGAGTTTCATAAAAATAGTATCATGGGGGCCGGTCACGGTGGC CCCFFFFFGHHFFIJIHGHIGIGGEGGIGHEGBHIIIJIFGCHGGIIJJEEIEIADHGICBFIGIGCGIJIIIGIIHIGDHGIIJHF>C888=@DB92<@? ERR194158.789781069_HSQ1008:176:D0UYCACXX:2:1201:4831:11242/1_1_per2_1 15 64416324 64416346 64416346 64416378 64416378 64416439 32 ATATATATATATATATATATATATATATATAT
+15 64416346 64416378 AT 32 16 18 22 61 AT 0 ERR194158.789781069_HSQ1008:176:D0UYCACXX:2:1201:4831:11242/1_1_per2_1 TTCCTTTATAAGAAATCTTTACatatatatatatatatatGACTGTTTTGCTTTGTTTTGAGTTTCATAAAAATAGTATCATGGGGGCCGGTCACGGTGGC CCCFFFFFGHHFFIJIHGHIGIGGEGGIGHEGBHIIIJIFGCHGGIIJJEEIEIADHGICBFIGIGCGIJIIIGIIHIGDHGIIJHF>C888=@DB92<@? ERR194158.789781069_HSQ1008:176:D0UYCACXX:2:1201:4831:11242/1_1_per2_1 15 64416324 64416346 64416346 64416378 64416378 64416439 32 ATATATATATATATATATTATATATATATAT
+17 52191125 52191133 AC 8 4 8 26 67 AC 0 ERR194158.781426177_HSQ1008:176:D0UYCACXX:2:1109:7175:90983/1_1_per2_1 CTTCCAGGGCCCTTCCAATGCCAAAAacacacacCTTTTTCCCCTGACCCTCTGTCAGTCTTCTGAATTTAAAGCTGGGCTCTGGGACTTACCAGTGTGAG CCCFFFFFHHHHHJJJJJJJJJJJJJJIHIIJIJJJJJJJJJJJIGIJJJJJJJHIJJIIJJJHHHHHHHFFFFFCEEDDDDDDDDBDDDDDDDDDCCCDC ERR194158.781426177_HSQ1008:176:D0UYCACXX:2:1109:7175:90983/1_1_per2_1 17 52191099 52191125 52191125 52191133 52191133 52191200 8 ACACACAC
b
diff -r dccd7a3ee717 -r 3c05abb4452e test-data/microsatpurity_out.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/microsatpurity_out.txt Wed Apr 22 12:22:50 2015 -0400
b
@@ -0,0 +1,2 @@
+15 64416346 64416378 AT 32 16 18 22 61 AT 0 ERR194158.789781069_HSQ1008:176:D0UYCACXX:2:1201:4831:11242/1_1_per2_1 TTCCTTTATAAGAAATCTTTACatatatatatatatatatGACTGTTTTGCTTTGTTTTGAGTTTCATAAAAATAGTATCATGGGGGCCGGTCACGGTGGC CCCFFFFFGHHFFIJIHGHIGIGGEGGIGHEGBHIIIJIFGCHGGIIJJEEIEIADHGICBFIGIGCGIJIIIGIIHIGDHGIIJHF>C888=@DB92<@? ERR194158.789781069_HSQ1008:176:D0UYCACXX:2:1201:4831:11242/1_1_per2_1 15 64416324 64416346 64416346 64416378 64416378 64416439 32 ATATATATATATATATATATATATATATATAT
+17 52191125 52191133 AC 8 4 8 26 67 AC 0 ERR194158.781426177_HSQ1008:176:D0UYCACXX:2:1109:7175:90983/1_1_per2_1 CTTCCAGGGCCCTTCCAATGCCAAAAacacacacCTTTTTCCCCTGACCCTCTGTCAGTCTTCTGAATTTAAAGCTGGGCTCTGGGACTTACCAGTGTGAG CCCFFFFFHHHHHJJJJJJJJJJJJJJIHIIJIJJJJJJJJJJJIGIJJJJJJJHIJJIIJJJHHHHHHHFFFFFCEEDDDDDDDDBDDDDDDDDDCCCDC ERR194158.781426177_HSQ1008:176:D0UYCACXX:2:1109:7175:90983/1_1_per2_1 17 52191099 52191125 52191125 52191133 52191133 52191200 8 ACACACAC
b
diff -r dccd7a3ee717 -r 3c05abb4452e test-data/nice1tab.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/nice1tab.py Wed Apr 22 12:22:50 2015 -0400
[
@@ -0,0 +1,6 @@
+import sys
+fd=open(sys.argv[1])
+lines=fd.readlines()
+for line in lines:
+    temp=line.strip().split()
+    print '\t'.join(temp)
\ No newline at end of file
b
diff -r dccd7a3ee717 -r 3c05abb4452e test-data/probvalueforhetero_in.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/probvalueforhetero_in.txt Wed Apr 22 12:22:50 2015 -0400
b
@@ -0,0 +1,9 @@
+chr 9,10 A hetero -1.27220836321 10 10 9
+chr 10,11 A hetero -0.939119957032 11 11 10
+chr 11,12 A hetero -0.720375026792 12 12 11
+chr 9,9,10 A hetero -1.6841441619 9 9 10
+chr 9,10,10 A hetero -0.97233405327 10 10 9
+chr 10,10,11 A hetero -1.29451118958 10 10 11
+chr 10,11,11 A hetero -0.641022011041 11 11 10
+chr 11,11,12 A hetero -1.01921634129 11 11 12
+chr 11,12,12 A hetero -0.425116661902 12 12 11
b
diff -r dccd7a3ee717 -r 3c05abb4452e test-data/probvalueforhetero_out.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/probvalueforhetero_out.txt Wed Apr 22 12:22:50 2015 -0400
b
@@ -0,0 +1,9 @@
+chr 9,10 A hetero -1.27220836321 10 10 9 0.247317513163 2 0.494635026326 2
+chr 10,11 A hetero -0.939119957032 11 11 10 0.242971784331 2 0.485943568663 2
+chr 11,12 A hetero -0.720375026792 12 12 11 0.236065341545 2 0.472130683091 2
+chr 9,9,10 A hetero -1.6841441619 9 9 10 0.124528157268 3 0.373584471803 3
+chr 9,10,10 A hetero -0.97233405327 10 10 9 0.121477837216 3 0.364433511647 3
+chr 10,10,11 A hetero -1.29451118958 10 10 11 0.122575544751 3 0.367726634253 3
+chr 10,11,11 A hetero -0.641022011041 11 11 10 0.117020970932 3 0.351062912797 3
+chr 11,11,12 A hetero -1.01921634129 11 11 12 0.11865253007 3 0.35595759021 3
+chr 11,12,12 A hetero -0.425116661902 12 12 11 0.110871439517 3 0.332614318551 3
b
diff -r dccd7a3ee717 -r 3c05abb4452e test-data/profilegenerator_in.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/profilegenerator_in.txt Wed Apr 22 12:22:50 2015 -0400
b
@@ -0,0 +1,6 @@
+9 9 100000
+10 10 91456
+10 9 1259
+11 11 39657
+11 10 1211
+11 12 514
b
diff -r dccd7a3ee717 -r 3c05abb4452e test-data/profilegenerator_out.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/profilegenerator_out.txt Wed Apr 22 12:22:50 2015 -0400
b
@@ -0,0 +1,30 @@
+chr 9,9 A
+chr 9,10 A
+chr 9,11 A
+chr 9,12 A
+chr 10,10 A
+chr 10,11 A
+chr 10,12 A
+chr 11,11 A
+chr 11,12 A
+chr 12,12 A
+chr 9,9,9 A
+chr 9,9,10 A
+chr 9,9,11 A
+chr 9,9,12 A
+chr 9,10,10 A
+chr 9,10,11 A
+chr 9,10,12 A
+chr 9,11,11 A
+chr 9,11,12 A
+chr 9,12,12 A
+chr 10,10,10 A
+chr 10,10,11 A
+chr 10,10,12 A
+chr 10,11,11 A
+chr 10,11,12 A
+chr 10,12,12 A
+chr 11,11,11 A
+chr 11,11,12 A
+chr 11,12,12 A
+chr 12,12,12 A
b
diff -r dccd7a3ee717 -r 3c05abb4452e test-data/readdepth2seqdepth.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/readdepth2seqdepth.out Wed Apr 22 12:22:50 2015 -0400
b
@@ -0,0 +1,2 @@
+repeat_length read_length informative_read_depth =locus_specific_sequencing_depth =genome_wide_sequencing_depth
+10 100 5 10 15
b
diff -r dccd7a3ee717 -r 3c05abb4452e test-data/samplePESAM_2_profile_C.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/samplePESAM_2_profile_C.txt Wed Apr 22 12:22:50 2015 -0400
b
@@ -0,0 +1,5 @@
+M01368:22:000000000-A4T24:1:1101:10010:3775_1:N:0:2_1_per1_1 shifted 540 713 713 719 719 759 6 GGGGGG
+M01368:22:000000000-A4T24:1:1101:10015:2849_1:N:0:2_1_per1_2 shifted 4007 4082 4082 4088 4088 4258 6 TTTTTT
+M01368:22:000000000-A4T24:1:1101:10070:4955_1:N:0:2_1_per1_1 shifted 1849 1930 1930 1936 1936 2100 6 CCCCCC
+M01368:22:000000000-A4T24:1:1101:10070:4955_1:N:0:2_1_per1_2 shifted 1849 2025 2025 2030 2030 2100 5 GGGGG
+M01368:22:000000000-A4T24:1:1101:10126:5433_1:N:0:2_1_per1_1 shifted 1428 1517 1517 1522 1522 1543 5 AAAAA
b
diff -r dccd7a3ee717 -r 3c05abb4452e test-data/sampleTRgenotypingcorrection
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sampleTRgenotypingcorrection Wed Apr 22 12:22:50 2015 -0400
b
@@ -0,0 +1,2 @@
+chr1 14,13,13,13 A hetero -0.429451855856 13 13 14
+chr1 5,6,6,6,6,7,7,8,8 A hetero -14.8744881854 7 6 8
b
diff -r dccd7a3ee717 -r 3c05abb4452e test-data/sampleTRprofile_C.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sampleTRprofile_C.txt Wed Apr 22 12:22:50 2015 -0400
b
@@ -0,0 +1,2 @@
+chr1 14,13,13,13 A
+chr1 5,6,6,6,6,7,7,8,8 A
b
diff -r dccd7a3ee717 -r 3c05abb4452e test-data/samplefq.snoope
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/samplefq.snoope Wed Apr 22 12:22:50 2015 -0400
b
@@ -0,0 +1,1 @@
+6 40 54 G 0 SRR345592.75000006 HS2000-192_107:1:63:5822:176818_1_per1_1 TACCCTCCTGTCTTCCCAGACTGATTTCTGTTCCTGCCCTggggggTTCTTGACTCCTCTGAATGGGTACGGGAGTGTGGACCTCAGGGAGGCCCCCTTG GGGGGGGGGGGGGGGGGFGGGGGGGGGFEGGGGGGGGGGG?FFDFGGGGGG?FFFGGGGGDEGGEFFBEFCEEBD@BACB*?=9;(/=5'6=4:?>C*A<
b
diff -r dccd7a3ee717 -r 3c05abb4452e test-data/samplefq.snoope.new
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/samplefq.snoope.new Wed Apr 22 12:22:50 2015 -0400
b
@@ -0,0 +1,1 @@
+6 40 54 G 0 SRR345592.75000006_HS2000-192_107:1:63:5822:176818_1_per1_1 TACCCTCCTGTCTTCCCAGACTGATTTCTGTTCCTGCCCTggggggTTCTTGACTCCTCTGAATGGGTACGGGAGTGTGGACCTCAGGGAGGCCCCCTTG GGGGGGGGGGGGGGGGGFGGGGGGGGGFEGGGGGGGGGGG?FFDFGGGGGG?FFFGGGGGDEGGEFFBEFCEEBD@BACB*?=9;(/=5'6=4:?>C*A<
b
diff -r dccd7a3ee717 -r 3c05abb4452e test-data/sampleprofilegenerator_in
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sampleprofilegenerator_in Wed Apr 22 12:22:50 2015 -0400
b
@@ -0,0 +1,6 @@
+9 9 100000
+10 10 91456
+10 9 1259
+11 11 39657
+11 10 1211
+11 12 514
b
diff -r dccd7a3ee717 -r 3c05abb4452e test-data/sampleprofilegenerator_out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sampleprofilegenerator_out Wed Apr 22 12:22:50 2015 -0400
b
@@ -0,0 +1,30 @@
+chr 9,9 A
+chr 9,10 A
+chr 9,11 A
+chr 9,12 A
+chr 10,10 A
+chr 10,11 A
+chr 10,12 A
+chr 11,11 A
+chr 11,12 A
+chr 12,12 A
+chr 9,9,9 A
+chr 9,9,10 A
+chr 9,9,11 A
+chr 9,9,12 A
+chr 9,10,10 A
+chr 9,10,11 A
+chr 9,10,12 A
+chr 9,11,11 A
+chr 9,11,12 A
+chr 9,12,12 A
+chr 10,10,10 A
+chr 10,10,11 A
+chr 10,10,12 A
+chr 10,11,11 A
+chr 10,11,12 A
+chr 10,12,12 A
+chr 11,11,11 A
+chr 11,11,12 A
+chr 11,12,12 A
+chr 12,12,12 A
b
diff -r dccd7a3ee717 -r 3c05abb4452e test-data/samplesortedPESAM_C.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/samplesortedPESAM_C.sam Wed Apr 22 12:22:50 2015 -0400
b
@@ -0,0 +1,10 @@
+M01368:22:000000000-A4T24:1:1101:10010:3775_1:N:0:2_1_per1_1 113 shifted 720 37 40M = 541 -46 TTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACC HHFG@IIHHHHHIHHFHHGFGGGGDBDDEDDDBBB????? XT:A:U NM:i:0 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:40
+M01368:22:000000000-A4T24:1:1101:10010:3775_1:N:0:2_1_per1_1 177 shifted 541 37 173M = 720 46 CTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAAC ::GECC:*:)D<GEGGGECCCEC?00E?::CCCCEEECC:C*GEC4'.>ACGGEC:CC?>><DCE?C:EC?GECE?:CCECGEEC*GEECEC:GEEGE?GGECC:ECA2CC*CCC8DEGGEGC=CGECEAEGEEDGGEDEGD=EBGGGFDHHHHHHHHEEHHHHHIIHFIIHH XT:A:U NM:i:0 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:173
+M01368:22:000000000-A4T24:1:1101:10015:2849_1:N:0:2_1_per1_2 113 shifted 4089 37 170M = 4008 -176 GCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGTAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAG GECGGGGGGGGGGGGEGEGGGGD>2GEGGGGGEEGGGGGGGGGGGGGEEECEGEAGGEEGEB>=GGFGEAGHHHEHHHFHFF?ED;HFIHHIIIIHIIHHHHIHHHHIHHHHHHHHIIIIHIHHHHIHHHHHIIHHIIHHIIHIIIIIGGGGGGDDDDDDDDBBB????< XT:A:U NM:i:0 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:170
+M01368:22:000000000-A4T24:1:1101:10015:2849_1:N:0:2_1_per1_2 177 shifted 4008 37 75M = 4089 176 TGCCATAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGC CEGGEEEECC?:EEGECGGGGECGGGGEEGGEEGCCGEGGGGGGGGGGDGGGGGE>EEGGGGGGGGGGGAGGGGE XT:A:U NM:i:0 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:75
+M01368:22:000000000-A4T24:1:1101:10070:4955_1:N:0:2_1_per1_1 129 shifted 1937 37 164M = 1850 -87 TCAGATAGGGGTCCCTTGACCACCATCCTCCGTGAAATCAATATCCCGCACAAGAGTGCTACTCTCCTCGCTCCGGGCCCATAACACTTGGGGGTAGCTAAAGTGAACTGTATCCGACATCTGGTTCCTACTTCAGGGCCATAAAGCCTAAATAGCCCACACGT HHHHIHHHHHHHHHHHHHHHHHHHHHGGFGGGGGGGHGGGGGGGGGGGGEGGGGGGAEEGGGEGGGGGGEGEEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGECGGGGGGGGGGGGGGAGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGCEGEGG XT:A:U NM:i:1 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:138T25
+M01368:22:000000000-A4T24:1:1101:10070:4955_1:N:0:2_1_per1_1 65 shifted 1850 37 81M = 1937 87 CCCTTAACAGTACATAGTACATAAAGCCATTTACCGTACATAGCACATTACAGTCAAATCCCTTCTCGTCCCCATGGATGA ?????BBBEEDBBDDDGGGGGGIIIIIIIIIIIIIHHHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIHIHHHIIIIIIHGH XT:A:U NM:i:0 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:81
+M01368:22:000000000-A4T24:1:1101:10070:4955_1:N:0:2_1_per1_2 129 shifted 2031 37 70M = 1850 -181 TAGCTAAAGTGAACTGTATCCGACATCTGGTTCCTACTTCAGGGCCATAAAGCCTAAATAGCCCACACGT GGGGGGGGECGGGGGGGGGGGGGGAGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGCEGEGG XT:A:U NM:i:1 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:44T25
+M01368:22:000000000-A4T24:1:1101:10070:4955_1:N:0:2_1_per1_2 65 shifted 1850 37 176M = 2031 181 CCCTTAACAGTACATAGTACATAAAGCCATTTACCGTACATAGCACATTACAGTCAAATCCCTTCTCGTCCCCATGGATGACCCCCCTCAGATAGGGGTCCCTTGACCACCATCCTCCGTGAAATCAATATCCCGCACAAGAGTGCTACTCTCCTCGCTCCGGGCCCATAACACTT ?????BBBEEDBBDDDGGGGGGIIIIIIIIIIIIIHHHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIHIHHHIIIIIIHGHIIIHHHHHHHIHHHHHHHHHHHHHHHHHHHHHGGFGGGGGGGHGGGGGGGGGGGGEGGGGGGAEEGGGEGGGGGGEGEEGGGGGGGGGGGGGGGG XT:A:U NM:i:0 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:176
+M01368:22:000000000-A4T24:1:1101:10126:5433_1:N:0:2_1_per1_1 129 shifted 1523 37 21M = 1429 -94 GTCTTTAACTCCACCATTAGC GGGEGGEGGGGGCGGGGGEGG XT:A:U NM:i:0 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:21
+M01368:22:000000000-A4T24:1:1101:10126:5433_1:N:0:2_1_per1_1 65 shifted 1429 37 89M = 1523 94 CTATGCATCCAACGCGTTGGGAGCTCTCCCATATGGTCGACCTGCAGGCGGCCGCGAATTCACTAGTGATTTCCAAGGACAAATCAGAG ?????BBBDDDDDDDDGGGFGGFEHIIIIIIIHIIIHIHHHHHIIHFHHHHHHHHHHHHHHHHHHHHGGGGGGGGGGGGGGGGGGEGEE XT:A:U NM:i:0 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:89
b
diff -r dccd7a3ee717 -r 3c05abb4452e test-data/shifted.2bit
b
Binary file test-data/shifted.2bit has changed