From 387060661f1f9792ca99285b7b127adb092df811 Mon Sep 17 00:00:00 2001
From: Francesco Vezzi <vezzi@milou-b.uppmax.uu.se>
Date: Thu, 13 Mar 2014 16:10:07 +0100
Subject: [PATCH] Major changes in the project: the project is now a regular
 python package and extensive variation has been done on the MP pipeline in
 order to make it more general and flexibile

---
 .../config_de_novo.yaml                       |   0
 .../config_de_novo_amanita.yaml               |   0
 .../config_de_novo_picea.yaml                 |   0
 .../config_de_novo_uppmax.yaml                |  19 +-
 de_novo_scilife/MP.py                         | 379 +++++++++++++++++
 de_novo_scilife/MP.pyc                        | Bin 0 -> 14674 bytes
 de_novo_scilife/QCcontrol.py                  | 184 ++++++++
 de_novo_scilife/QCcontrol.pyc                 | Bin 0 -> 7018 bytes
 de_novo_scilife/__init__.py                   |   0
 de_novo_scilife/__init__.pyc                  | Bin 0 -> 162 bytes
 {script => de_novo_scilife}/align.py          | 178 +++-----
 de_novo_scilife/align.pyc                     | Bin 0 -> 14125 bytes
 {script => de_novo_scilife}/assemble.py       |  12 +-
 de_novo_scilife/assemble.pyc                  | Bin 0 -> 19602 bytes
 {script => de_novo_scilife}/common.py         |  77 +++-
 de_novo_scilife/common.pyc                    | Bin 0 -> 7034 bytes
 .../deNovo_pipeline.py                        |  33 +-
 {script => de_novo_scilife}/evaluete.py       |   4 +-
 de_novo_scilife/evaluete.pyc                  | Bin 0 -> 11251 bytes
 de_novo_scilife/pdf/__init__.py               |  93 +++++
 de_novo_scilife/pdf/__init__.pyc              | Bin 0 -> 4843 bytes
 de_novo_scilife/pdf/common.py                 |  17 +
 de_novo_scilife/pdf/common.pyc                | Bin 0 -> 559 bytes
 de_novo_scilife/pdf/theme.py                  |  57 +++
 de_novo_scilife/pdf/theme.pyc                 | Bin 0 -> 2512 bytes
 de_novo_scilife/pdf/util.py                   |  18 +
 de_novo_scilife/pdf/util.pyc                  | Bin 0 -> 811 bytes
 de_novo_scilife/util.pyc                      | Bin 0 -> 803 bytes
 pictures/NGI.jpeg                             | Bin 0 -> 9957 bytes
 pictures/SciLifeLab.jpeg                      | Bin 0 -> 1539 bytes
 script/MP.py                                  | 102 -----
 script/QCcontrol.py                           | 128 ------
 setup.py                                      |  14 +
 utils/OpGen_util.py                           | 395 ++++++++++++++++++
 ...aluation.py => produce_assembly_report.py} | 156 +++----
 utils/reverse_complement.py                   |  51 +++
 utils/run_MP_analysis.py                      | 117 ++++++
 37 files changed, 1575 insertions(+), 459 deletions(-)
 rename config_de_novo.yaml => config_files/config_de_novo.yaml (100%)
 rename config_de_novo_amanita.yaml => config_files/config_de_novo_amanita.yaml (100%)
 rename config_de_novo_picea.yaml => config_files/config_de_novo_picea.yaml (100%)
 rename config_de_novo_uppmax.yaml => config_files/config_de_novo_uppmax.yaml (81%)
 create mode 100644 de_novo_scilife/MP.py
 create mode 100644 de_novo_scilife/MP.pyc
 create mode 100644 de_novo_scilife/QCcontrol.py
 create mode 100644 de_novo_scilife/QCcontrol.pyc
 create mode 100644 de_novo_scilife/__init__.py
 create mode 100644 de_novo_scilife/__init__.pyc
 rename {script => de_novo_scilife}/align.py (75%)
 create mode 100644 de_novo_scilife/align.pyc
 rename {script => de_novo_scilife}/assemble.py (99%)
 create mode 100644 de_novo_scilife/assemble.pyc
 rename {script => de_novo_scilife}/common.py (55%)
 create mode 100644 de_novo_scilife/common.pyc
 rename {script => de_novo_scilife}/deNovo_pipeline.py (74%)
 rename {script => de_novo_scilife}/evaluete.py (96%)
 create mode 100644 de_novo_scilife/evaluete.pyc
 create mode 100644 de_novo_scilife/pdf/__init__.py
 create mode 100644 de_novo_scilife/pdf/__init__.pyc
 create mode 100644 de_novo_scilife/pdf/common.py
 create mode 100644 de_novo_scilife/pdf/common.pyc
 create mode 100644 de_novo_scilife/pdf/theme.py
 create mode 100644 de_novo_scilife/pdf/theme.pyc
 create mode 100644 de_novo_scilife/pdf/util.py
 create mode 100644 de_novo_scilife/pdf/util.pyc
 create mode 100644 de_novo_scilife/util.pyc
 create mode 100644 pictures/NGI.jpeg
 create mode 100644 pictures/SciLifeLab.jpeg
 delete mode 100644 script/MP.py
 delete mode 100644 script/QCcontrol.py
 create mode 100644 setup.py
 create mode 100644 utils/OpGen_util.py
 rename utils/{run_assembly_evaluation.py => produce_assembly_report.py} (73%)
 create mode 100644 utils/reverse_complement.py
 create mode 100644 utils/run_MP_analysis.py

diff --git a/config_de_novo.yaml b/config_files/config_de_novo.yaml
similarity index 100%
rename from config_de_novo.yaml
rename to config_files/config_de_novo.yaml
diff --git a/config_de_novo_amanita.yaml b/config_files/config_de_novo_amanita.yaml
similarity index 100%
rename from config_de_novo_amanita.yaml
rename to config_files/config_de_novo_amanita.yaml
diff --git a/config_de_novo_picea.yaml b/config_files/config_de_novo_picea.yaml
similarity index 100%
rename from config_de_novo_picea.yaml
rename to config_files/config_de_novo_picea.yaml
diff --git a/config_de_novo_uppmax.yaml b/config_files/config_de_novo_uppmax.yaml
similarity index 81%
rename from config_de_novo_uppmax.yaml
rename to config_files/config_de_novo_uppmax.yaml
index dd22149..5424818 100644
--- a/config_de_novo_uppmax.yaml
+++ b/config_files/config_de_novo_uppmax.yaml
@@ -1,10 +1,9 @@
 Pipelines:
- MP
- QCcontrol
- assemble
- evaluete
- align
- user_define
+ QCcontrol: ["fastqc", "abyss", "trimmomatic"]
+ MP: ["fastqc", "abyss", "trimmomatic", "align"]
+ assemble: ["soapdenovo", "abyss"]
+ evaluete: ["qaTools", "FRC"]
+ align: []
 Tools:
  jellyfish:
   bin: /home/vezzi/DE_NOVO_PIPELINE/assembly_pipeline/tools/jellyfish-1.1.11/bin/jellyfish
@@ -14,7 +13,7 @@ Tools:
   options: [--threads,  "16" ,  --outdir,  fastqc]
  trimmomatic:
   bin: /home/vezzi/DE_NOVO_PIPELINE/tools/Trimmomatic-0.30/trimmomatic-0.30.jar
-  options: []
+  options: [""]
  masurca:
   bin: /home/vezzi/DE_NOVO_PIPELINE/tools/MaSuRCA-2.0.4/
   options: [""]
@@ -23,7 +22,7 @@ Tools:
   options: [""]
  abyss:
   bin: /sw/apps/bioinfo/abyss/1.3.5/milou/bin/
-  options: ["k=54", "np=16"]
+  options: [""]
  abyss_mergePairs:
   bin: /sw/apps/bioinfo/abyss/1.3.5/milou/bin/abyss-mergepairs
   options: ["-m 20", "-1 230", "-2 230"]
@@ -52,8 +51,8 @@ Tools:
   bin: /sw/apps/bioinfo/samtools/0.1.19/milou/bin/samtools
   options: ""
  qaTools:
-  bin: /home/vezzi/DE_NOVO_PIPELINE/tools/qaTools/
+  bin: /home/vezzi/DE_NOVO_PIPELINE/tools/qaTools/qaCompute
   options: ""
  trinity:
-  bin: ~/DE_NOVO_PIPELINE/tools/trinityrnaseq_r20131110/
+  bin: $HOME/DE_NOVO_PIPELINE/tools/trinityrnaseq_r20131110/
   options: []
diff --git a/de_novo_scilife/MP.py b/de_novo_scilife/MP.py
new file mode 100644
index 0000000..c5aeef8
--- /dev/null
+++ b/de_novo_scilife/MP.py
@@ -0,0 +1,379 @@
+import sys, os, yaml, glob
+import gzip
+import subprocess
+import pandas as pd
+import re
+import string
+from de_novo_scilife import align
+from de_novo_scilife import common
+from de_novo_scilife import QCcontrol
+
+from de_novo_scilife import pdf
+from de_novo_scilife.pdf.theme import colors, DefaultTheme
+
+
+def run(global_config, sample_config):
+    sorted_libraries_by_insert = common._sort_libraries_by_insert(sample_config)
+    
+    
+    if "tools" in sample_config and len(sample_config["tools"]) > 0:
+        """If so, execute them one after the other in the specified order (might not work)"""
+        for command in sample_config["tools"]:
+            """with this I pick up at run time the correct function in the current module"""
+            command_fn    = getattr(sys.modules[__name__] , "_run_{}".format(command))
+            """Update sample config, each command return sample_config and if necessary it modifies it"""
+            sample_config = command_fn(global_config, sample_config, sorted_libraries_by_insert)
+    else:
+        #run default pipeline for QC
+        sample_config = _run_trimmomatic(global_config, sample_config, sorted_libraries_by_insert)
+        sample_config = _run_fastqc(global_config, sample_config, sorted_libraries_by_insert)
+        sample_config = _run_abyss(global_config, sample_config, sorted_libraries_by_insert)
+        sample_config = _run_align(global_config, sample_config, sorted_libraries_by_insert)
+    
+        #sample_config = _run_FindTranslocations(global_config, sample_config, sorted_libraries_by_insert)
+    #now reverse complement
+    #sorted_libraries_by_insert = common._sort_libraries_by_insert(sample_config)
+    sample_config = _run_revcomp(global_config, sample_config, sorted_libraries_by_insert)
+    _run_report(global_config, sample_config,sorted_libraries_by_insert)
+
+def _run_fastqc(global_config, sample_config,sorted_libraries_by_insert):
+    return QCcontrol._run_fastqc(global_config, sample_config,sorted_libraries_by_insert)
+
+def _run_abyss(global_config, sample_config,sorted_libraries_by_insert):
+    return QCcontrol._run_abyss(global_config, sample_config,sorted_libraries_by_insert)
+
+
+
+def _run_align(global_config, sample_config,sorted_libraries_by_insert):
+    
+    if "reference" not in sample_config:
+        print "reference sequence not provided, skypping alignment step. Please provide a reference if you are intrested in aligning the reads against a reference"
+        return
+    if not os.path.exists("alignments"):
+        os.makedirs("alignments")
+    os.chdir("alignments")
+    
+    sorted_libraries_by_insert =  align._align_reads(global_config, sample_config,  sorted_libraries_by_insert) # align reads
+    sorted_alignments_by_insert = align._merge_bam_files(global_config, sample_config, sorted_libraries_by_insert) # merge alignments
+    sorted_alignments_by_insert = align.picard_CGbias(global_config, sample_config,sorted_alignments_by_insert) # compute picard stats
+    sorted_alignments_by_insert = align.picard_collectInsertSizeMetrics(global_config, sample_config,sorted_alignments_by_insert)
+    sorted_alignments_by_insert = align.picard_markDuplicates(global_config, sample_config,sorted_alignments_by_insert)
+
+    os.chdir("..")
+    sample_config["alignments"] = sorted_alignments_by_insert
+
+    return sample_config
+
+
+def _run_trimmomatic(global_config, sample_config, sorted_libraries_by_insert):
+    program        = global_config["Tools"]["trimmomatic"]["bin"]
+    program_folder = os.path.dirname(program)
+    if "adapters" not in sample_config:
+        sys.exit("running MP pipeline, adapters file to be used in trimming are needed for Trimmomatic. Please specify them\
+        in the sample configuration file and rerun")
+    adapterFile    = sample_config["adapters"]
+    if not os.path.exists(adapterFile):
+        sys.exit("Trimmomatic cannot be run as adapter file is not specified or points to unknown position: {}".format(adapterFile))
+
+    mainDirectory   = os.getcwd()
+    trimmomaticDir  = os.path.join(mainDirectory, "Trimmomatic")
+    if not os.path.exists(trimmomaticDir):
+        os.makedirs(trimmomaticDir)
+    os.chdir(trimmomaticDir)
+    #now I am in running dir, I need to process one by one the libraries
+    threads = 8
+    if "threads" in sample_config:
+        threads = sample_config["threads"]
+
+    for library, libraryInfo in sorted_libraries_by_insert:
+        read1=libraryInfo["pair1"]
+        read2=libraryInfo["pair2"]
+        orientation = libraryInfo["orientation"]
+        if orientation=="outtie": # this must be done only with MP
+            if read2 is not None:
+                read1_baseName = os.path.split(read1)[1].split(".")[0]
+                read2_baseName = os.path.split(read2)[1].split(".")[0]
+                output_read1_pair = os.path.join(trimmomaticDir,  "{}.fastq.gz".format(read1_baseName))
+                output_read1_sing = os.path.join(trimmomaticDir, "{}_u.fastq.gz".format(read1_baseName))
+                output_read2_pair = os.path.join(trimmomaticDir, "{}.fastq.gz".format(read2_baseName))
+                output_read2_sing = os.path.join(trimmomaticDir,  "{}_u.fastq.gz".format(read2_baseName))
+                command = ["java",  "-jar", program, "PE", "-threads", "{}".format(threads),  "-phred33",  read1, read2,  output_read1_pair ,output_read1_sing , output_read2_pair, output_read2_sing ,"ILLUMINACLIP:{}:2:30:10".format(adapterFile), "LEADING:3", "TRAILING:3", "SLIDINGWINDOW:4:15", "MINLEN:30" ]
+                common.print_command(command)
+                if not common.check_dryrun(sample_config) and not os.path.exists("{}.fastq.gz".format(read1_baseName)): # do not execute is files have been already gennerated
+                    stdOut = open("{}_trimmomatic.stdOut".format(read1_baseName), "w")
+                    stdErr = open("{}_trimmomatic.stdErr".format(read1_baseName), "w")
+                    returnValue = subprocess.call(command, stdout=stdOut, stderr=stdErr) # run the program
+                    if returnValue != 0:
+                        print "error while running command: {}".format(command)
+                libraryInfo["pair1"]       = output_read1_pair
+                libraryInfo["pair2"]       = output_read2_pair
+                libraryInfo["trimmomatic"] = os.path.join(trimmomaticDir, "{}_trimmomatic.stdErr".format(read1_baseName))
+    os.chdir(mainDirectory)
+    return sample_config
+
+
+def _run_revcomp(global_config, sample_config, sorted_libraries_by_insert):
+    currentDir  = os.getcwd()
+    workingDir  = os.path.join(currentDir, "reverse_complemented")
+    if not os.path.exists(workingDir):
+        os.makedirs(workingDir)
+    os.chdir(workingDir)
+
+    common.print_command("reversing complementing reads, internal function")
+    for library, libraryInfo in sorted_libraries_by_insert:
+        pairs = [libraryInfo["pair1"] , libraryInfo["pair2"]]
+        for pair in pairs:
+            base_name   = os.path.split(pair)[1].split(".fastq")[0]
+            output_name = "{}_rc.fastq".format(base_name)
+            if not os.path.exists("{}.gz".format(output_name)):
+                rev_compl_file          = _rev_comp_file(pair, output_name)
+                libraryInfo["pair1_rc"] = os.path.join(workingDir, rev_compl_file)
+    os.chdir(currentDir)
+    return sample_config
+    
+    
+def _rev_comp_file(input_fastq, output_fastq):
+    proc = subprocess.Popen(["zcat", "{}".format(input_fastq)], stdout=subprocess.PIPE, bufsize=1)
+    with gzip.open('{}.gz'.format(output_fastq), 'wb') as output_f:
+        for header in iter(proc.stdout.readline, b''):
+            header   = header.rstrip()
+            sequence = proc.stdout.readline().rstrip()
+            comment  = proc.stdout.readline().rstrip()
+            quality  = proc.stdout.readline().rstrip()
+        
+            sequence_rc = rc(sequence)
+            quality_rev = reverse(quality)
+
+            output_f.write("{}\n".format(header))
+            output_f.write("{}\n".format(sequence_rc))
+            output_f.write("{}\n".format(comment))
+            output_f.write("{}\n".format(quality_rev))
+
+    proc.communicate()
+    return "{}.gz".format(output_fastq)
+
+def reverse(quality):
+    seq_rev = quality[::-1]
+    return seq_rev
+
+
+def rc(dna):
+    complements = string.maketrans('acgtrymkbdhvnACGTRYMKBDHVN', 'tgcayrkmvhdbnTGCAYRKMVHDBN')
+    rcseq = dna.translate(complements)[::-1]
+    return rcseq
+
+
+
+
+def _run_report(global_config, sample_config, sorted_libraries_by_insert):
+    """This function produces a pdf report and stores the important resutls in a single folder"""
+   
+    ### retrive all info needed to write the report
+    sampleName = "sample"
+    if "output" in sample_config:
+        sampleName = sample_config["output"]
+    projectName = "anonymous_project"
+    if "projectName" in sample_config:
+        projectName = sample_config["projectName"]
+
+
+    currentDir  = os.getcwd()
+    workingDir  = os.path.join(currentDir, "report")
+    if not os.path.exists(workingDir):
+        os.makedirs(workingDir)
+    os.chdir(workingDir)
+
+    PDFtitle = "{}.pdf".format(sample_config["output"])
+
+    TABLE_WIDTH = 540 # this you cannot do in rLab which is why I wrote the helper initially
+    class MyTheme(DefaultTheme):
+        doc = {
+            'leftMargin': 25,
+            'rightMargin': 25,
+            'topMargin': 20,
+            'bottomMargin': 25,
+            'allowSplitting': False
+            }
+    # let's create the doc and specify title and author
+    doc = pdf.Pdf('{} {}'.format(projectName, sampleName), 'NGI-Stockholm, Science for Life Laboratory')
+    
+    # now we apply our theme
+    doc.set_theme(MyTheme)
+    
+    # give me some space
+    doc.add_spacer()
+    
+    # this header defaults to H1
+    scriptDirectory = os.path.split(os.path.abspath(__file__))[0]
+    logo_path = os.path.join(scriptDirectory, '../pictures/SciLifeLab.jpeg')
+    doc.add_image(logo_path, 180, 67, pdf.CENTER)
+    logo_path = os.path.join(scriptDirectory, '../pictures/NGI.jpeg')
+    doc.add_image(logo_path, 180, 67, pdf.CENTER)
+    # give me some space
+    doc.add_spacer()
+
+    doc.add_header('NGI-Stockholm -- Science For Life Laboratory')
+    doc.add_header('Mate Pair Best Practice Analysis Report')
+    doc.add_header('{} -- {}'.format(projectName, sampleName))
+    # give me some space
+    #doc.add_spacer()
+    #doc.add_paragraph("NGI-Stockholm and Science For Life Laboratory bla bla bla [Mission statement]")
+
+    doc.add_spacer()
+    doc.add_paragraph("For sample {} belonging to project {} NGI-Stockholm best-practice-analysis for Mate Pairs produce with the Nextera MP \
+protcol has been run. For more informations about the protocols and the general guidelines for analysis refer to \
+http://res.illumina.com/documents/products/technotes/technote_nextera_matepair_data_processing.pdf".format(sampleName, projectName))
+    doc.add_spacer()
+    tools = ["trimmomatic", "fastqc", "abyss", "align"]
+    if "tools" in sample_config and len(sample_config["tools"]) > 0:
+        tools = sample_config["tools"]
+    doc.add_paragraph("The following tools have been employed (tools are listed in order of execution):")
+    bollet_list = []
+    for tool in tools:
+        if tool != "align":
+            program_path = global_config["Tools"][tool]["bin"]
+            bollet_list.append("{} : {}".format(tool, program_path))
+        else:
+            bollet_list.append("{} : {}".format(tool, global_config["Tools"]["bwa"]["bin"]))
+            bollet_list.append("{} : {}".format(tool, global_config["Tools"]["samtools"]["bin"]))
+            bollet_list.append("{} : {}".format(tool, global_config["Tools"]["picard"]["bin"]))
+    doc.add_list(bollet_list)
+    doc.add_spacer()
+    doc.add_paragraph("For each tool results are reported (tables, pictures). Moreover you will find all the results and commands that have been\
+in the result delivery folder on Uppmax")
+
+
+    for tool in tools:
+        doc.add_header(tool , pdf.H2)
+        if tool  == "trimmomatic":
+            doc.add_paragraph("As a result of the Illumina Nextera Mate Pair protocol many reads sequenced will contain the adapter sequence. Illumina reccomends\
+to remove the adaptor before use the reads in any downstream analysis.")
+            doc.add_paragraph("Adapter sequences removed are:")
+            adapter_file = sample_config["adapters"]
+            adapters     = []
+            with open(adapter_file) as file:
+                lines       = file.readlines()
+                adapters = [lines[1].rstrip(),lines[3].rstrip(),lines[5].rstrip()]
+            doc.add_list(adapters)
+            doc.add_spacer()
+
+            trimmomatic_table_part1 = [[sampleName, "#orig_pairs", "#survived_pairs"]] # this is the header row
+            trimmomatic_table_part2 = [[sampleName,"#survived_fw_only", "#survived_rv_only", "#discarded"]]
+            
+            total_orig_pairs       = 0
+            total_survived_pairs   = 0
+            total_survived_fw_only = 0
+            total_survived_rv_only = 0
+            total_discarded        = 0
+            
+            for library, libraryInfo in sorted_libraries_by_insert:
+                runName = os.path.basename(libraryInfo["trimmomatic"]).split("_1_trimmomatic.stdErr")[0]
+                with open(libraryInfo["trimmomatic"]) as trimmomatic_output:
+                    lines       = trimmomatic_output.readlines()
+                    result_line = lines[-2].rstrip()
+                    match_string = re.compile('Input Read Pairs: (\d+) Both Surviving: (\d+) \(.+\) Forward Only Surviving: (\d+) \(.+\) Reverse Only Surviving: (\d+) \(.+\) Dropped: (\d+) \(.+\)')
+                    read_pairs       = int(match_string.match(result_line).group(1))
+                    survived_pairs   = int(match_string.match(result_line).group(2))
+                    survived_fw_only = int(match_string.match(result_line).group(3))
+                    survived_rv_only = int(match_string.match(result_line).group(4))
+                    discarded        = int(match_string.match(result_line).group(5))
+                    read_pairs_perc          =  "({0:.0f}%)".format((float(survived_pairs)/read_pairs) * 100)
+                    survived_fw_only_perc    =  "({0:.0f}%)".format((float(survived_fw_only)/read_pairs) * 100)
+                    survived_rv_only_perc    =  "({0:.0f}%)".format((float(survived_rv_only)/read_pairs) * 100)
+                    survived_discarded_perc  =  "({0:.0f}%)".format((float(discarded)/read_pairs) * 100)
+
+                    total_orig_pairs       += read_pairs
+                    total_survived_pairs   += survived_pairs
+                    total_survived_fw_only += survived_fw_only
+                    total_survived_rv_only += survived_rv_only
+                    total_discarded        += discarded
+                trimmomatic_table_part1.append([runName,read_pairs, "{} {}".format(survived_pairs, read_pairs_perc)]) # these are the other rows
+                trimmomatic_table_part2.append([runName, "{} {}".format(survived_fw_only, survived_fw_only_perc), \
+                "{} {}".format(survived_rv_only, survived_rv_only_perc),  "{} {}".format(discarded, survived_discarded_perc)]) # these are the other rows
+
+            survived_pairs_perc            =  "({0:.0f}%)".format((float(total_survived_pairs)/total_orig_pairs) * 100)
+            survived_survived_fw_only_perc =  "({0:.0f}%)".format((float(total_survived_fw_only)/total_orig_pairs) * 100)
+            survived_survived_rv_only_perc =  "({0:.0f}%)".format((float(total_survived_rv_only)/total_orig_pairs) * 100)
+            survived_discarded_perc        =  "({0:.0f}%)".format((float(total_discarded)/total_orig_pairs) * 100)
+            
+            
+            trimmomatic_table_part1.append(["total", total_orig_pairs, "{} {}".format(total_survived_pairs, survived_pairs_perc)]) # last row is the sum
+            trimmomatic_table_part2.append(["total", "{} {}".format(survived_fw_only, survived_fw_only_perc), \
+                "{} {}".format(survived_rv_only, survived_rv_only_perc),  "{} {}".format(discarded, survived_discarded_perc)]) # last row is the sum
+            
+            doc.add_table(trimmomatic_table_part1, TABLE_WIDTH)
+            doc.add_spacer()
+            doc.add_table(trimmomatic_table_part2, TABLE_WIDTH)
+            
+            
+        if tool == "fastqc":
+            fastqc_dir = sample_config["fastqc"]
+            for fastqc_run in [dir for dir in os.listdir(fastqc_dir) if os.path.isdir(os.path.join(fastqc_dir, dir))]:
+                doc.add_header("{} -- Per Base Quality".format(fastqc_run) , pdf.H3)
+                fastqc_run_dir = os.path.join(fastqc_dir, fastqc_run, "Images")
+                doc.add_image(os.path.join(fastqc_run_dir,"per_base_quality.png"), 400, 180, pdf.CENTER)
+                doc.add_header("{} -- Sequence Length Distribution".format(fastqc_run) , pdf.H3)
+                fastqc_run_dir = os.path.join(fastqc_dir, fastqc_run, "Images")
+                doc.add_image(os.path.join(fastqc_run_dir,"sequence_length_distribution.png"), 400, 180, pdf.CENTER)
+        if tool == "abyss":
+            doc.add_paragraph("kmer profile with k={}.".format(sample_config["kmer"]))
+            kmer_1_200 = os.path.join(sample_config["abyss"], "kmer_coverage_1_200.png")
+            doc.add_image(kmer_1_200, 500, 300, pdf.CENTER)
+        if tool == "align":
+            alignments       = sample_config["alignments"][0]
+            alignment_path   = alignments[1]
+            alignment_prefix = alignments[2]
+            align_dir        = os.path.split(alignment_path)[0]
+            doc.add_header("{} -- Collect Insert Size Metrics".format(sampleName) , pdf.H3)
+            with open(os.path.join(align_dir, "{}.collectInsertSize.txt".format(alignment_prefix) )) as collectInsertSize:
+                    lines  = collectInsertSize.readlines()
+                    line  = lines[6].rstrip().split("\t")
+                    insertSize_table = [[line[7], line[6], line[4], line[5]]]  # this is the header row
+                    line  = lines[7].rstrip().split("\t")
+                    insertSize_table.append([line[7], line[6], line[4], line[5]])  # this is the header row
+                    line  = lines[8].rstrip().split("\t")
+                    insertSize_table.append([line[7], line[6], line[4], line[5]])  # this is the header row
+                    line  = lines[9].rstrip().split("\t")
+                    insertSize_table.append([line[7], line[6], line[4], line[5]])  # this is the header row
+            doc.add_table(insertSize_table, TABLE_WIDTH)
+            doc.add_spacer()
+            full_path_to_pdf =  os.path.join(align_dir, "{}.collectInsertSize.pdf".format(alignment_prefix))
+            doc.add_paragraph("Insert size plot can be find in the result directory: {}".format(os.path.join(full_path_to_pdf.split(os.sep)[-3], full_path_to_pdf.split(os.sep)[-2], full_path_to_pdf.split(os.sep)[-1])))
+            doc.add_spacer()
+            
+            doc.add_header("{} -- Duplicate Metrics".format(sampleName) , pdf.H3)
+            with open(os.path.join(align_dir, "{}.markDuplicates.txt".format(alignment_prefix) )) as collectInsertSize:
+                    lines  = collectInsertSize.readlines()
+                    line  = lines[6].rstrip().split("\t")
+                    duplication_table_part1 = [line[0:3]]  # this is the header row
+                    duplication_table_part2 = [line[4:6]]  # this is the header row
+                    duplication_table_part3 = [line[7:9]]  # this is the header row
+                    line  = lines[7].rstrip().split("\t")
+                    duplication_table_part1.append(line[0:3])  #
+                    duplication_table_part2.append(line[4:6])  #
+                    duplication_table_part3.append(line[7:9])  #
+            doc.add_table(duplication_table_part1, TABLE_WIDTH)
+            doc.add_spacer()
+            doc.add_table(duplication_table_part2, TABLE_WIDTH)
+            doc.add_spacer()
+            doc.add_table(duplication_table_part3, TABLE_WIDTH)
+            doc.add_spacer()
+            full_path_to_bam =  os.path.join(align_dir, "{}_noDup.bam".format(alignment_prefix))
+            doc.add_paragraph("Bam file with makeked duplicated reads can be faound at: {}".format(os.path.join(full_path_to_bam.split(os.sep)[-3], full_path_to_bam.split(os.sep)[-2], full_path_to_bam.split(os.sep)[-1])))
+            doc.add_spacer()
+            
+            
+    doc.render(PDFtitle)
+    
+    os.chdir(currentDir)
+    
+
+
+
+
+
+
+
+
+
+
diff --git a/de_novo_scilife/MP.pyc b/de_novo_scilife/MP.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..701e20e37644db72e2526c912e388c4e8512f7f0
GIT binary patch
literal 14674
zcmcgzOLH4ncD@bpAyRyak|@!V<<=XrB~YSl*`{TWB~q3+mMFTZ5p9+gR0G`vngq~b
zcS9s4AgPQdnZ=~yR3*DCGF90mtJGAbDp@3{%FaI_o7tqYNYyNpZ64ouZg&G9WyOi@
z2t?!dxvz82J+FK2QTE?PhiAV&y;4%~zYKmq#y2z#r5yYjs;bmlS~k?0k-#%*Et8b9
zYAu_T`_x)rQtnr4{YiO1tqmsS18VI+QXW!kLsHJD>aeOERBH#7!MU^YKBCq}^n0JY
zkE*p%{oXI{V`^<ozYnPDxT;O4wFy-_q}C1rI;egI^e2^bKnjPIGbDv0${CizQRN(z
z!jy7Gq;O0*qf$7ooG~d(D`#8^uPJ9j3MWXImE0jv%KI<;Tgri4D(c6>u6pIVejUJT
z$*<M?_<acPpUs#2dKCCo-6RW8!&%Rf?vZX^^@A`P!TW-{Za1sZql#N|OKia5*=8Q!
z@L%zWl-e*<%TO;2)y7^(d2>Jo&s3Bl9FS?$IT^LtN9fKUCeVc^@2YlIMfmGe8~vPo
z8U)`MRIMzBdy!G?KGnjXVwYA%y~wJzp|)<S)%raZ9oSnD4ef&r?}HrN2N~H18SR0P
z5ms`2SnQIJH}d^z=;2j`gLo8zX1(ydof|{hl0BFvH1vb0Q1yy|9e8e7DDD=#dgulb
zmaA0Ntw-!zb|X8A0)d6Qp}=as)2zB7mWNRab-U&k3bHTjeo#Z}3BoyN6nNNJfV@(4
zFafUHVYF3>hLd-@xEqFDuVf9`S{Wj6pMpCYo$i6#0Ny!nb2N%ax#}0~Y5@$h?v(`+
z+O<a2O(3tK4Oi+qFI_9wNcOYqj^GfN3+we9YLq;}J1EUGyplarxi<6EefG?oS-4YJ
zdieNZVR>=+&i%!uJ2Q@3sQXX-LRj*u-nzT*{lRj+u`6T*WyUdJIK>H!@$8w*q0AeZ
z>FlH-pZ-as#2w(5TPlov7=@UTyI5^S<YP$$!y@xr95~0G))79A^1*q2h{ecc7cqFq
zsnvJDnhf**hBYU96W?yEN#~T-U*jd#5`!v$?#rmV_Z&!=-hd705Rv}yUD*<-ba^%E
zdW)de^*cmyW1vS)p?(O2`smt3Di{D$U4hO}A{v;gi*8`5F;XBj9A@pKN;06CsME6Q
zCVo~#PL`l{lnUH+H*o7EH~i!7H#2m%ngpA5KQbGE|I~9_=du}Y?lu};y=;oEu0b7`
zVdOUQ=5p1wLpNz@+GaYi>8+c)e$%uA*Yu#jp(0!dB|QYkiYhLe+fHcOWgFTi>h3P`
zm(CW-{^avHDh?=WrTh?D7?!|}Dxz)N9WRWu7OL5suH(TZP%V@yC|W2etwv<Xf=sJb
zQ_T&^ZlP$`3hN&A7}W@+J8%l~cZ(kM(;FQ~$*)%3QnV<lam9P)K5(JdN?}CVihI@U
zU~{3_sA7!B4Mh>}k-z8!TE}Xrv&4?h{XyiDmUo8pp2S0CMvQ)=4?1ST7|k3t;!kGO
z7|w{kIgU0|Goo+u&>>(_nnticjovg_QcNiXK$}0<J}@2FY!h<wG0h~|uwU2^!az3c
zLqFI}-LGG@q%v$KZ3jd`1W4YEC1_B+P^vwUbc19JC^#v!wu1=}V*z5Q@NWeIL6O&F
z2*`+jhfmXn6NP%938Ving4PhwI-oG!&W#ML_QI2V=1Yt|q}u2>Ot)vc4Ks|=L6l&A
zQ5r!BW_OgH&t^sizfr9bdSshJDwxj3OGMLyL<>&Q^aiTO<z)h0#}drZWaQ;d#*-#k
zF`U3dLYy(S<<cj5YbSea5B1jSzA`)ePWIO7zG*ESKKvcwt~yY6aPp4H;0FZF<6II@
zZcV7xA=R2xEi|6)Y@7p!53b%x<-D%Sld64$c6nn=wT`g6!IyCo9WNunuQMqx^sFzg
zn^4ZF&bpj8WZ_2@#MUwQ!KYM&Wda}gsi##7n4eLtDa?~;A5$%Os1s^4t1zW=R{9D)
zckgZeIz#5fYGk8taz(PyH)GT^^}FT+1=%8_Zf;Fyw|=AWx5~EQl>u2JR=1Kn2iei?
z*CV=L;<0o)K#(X^gjzg+U)!-8ksE~JpW+4nK%L6*!Lr%#8gA9AyO+(Rg-I1<M!s2e
z&1R_8sf;SB8H%><y3p4qTrcxc$K6WRbl7l9-uf;zZ_SMVK_gQUYd^`<o>Mb`m*&^?
zbZ~~Crh!-ZCGhJS)-2g|nh79-TGNIe*DKNeyikmYp3?(XK&s(m%^_FNtZ&x+?K&W#
z$GPUr=WU@<*90D^BTi;i(Hff?y<vO7HPP)X%}OU9PHP>$DFza2;5Vblb3Jiy`N(Vk
zm)7HXJ1<UWzWgktu7BPxG*gJ!lnwi-9n!d5*{}mKHp_QHqH-lhLyXb$wuUu;nsfbn
zcnl2|@8AE)gT<v=^Y<5*=bpFcX6LS7ox655q{Y2|=hnjF(%rf1A)Vbv)~!W&ihP~K
z>&pE_R<15CEj(PEdwcHMJJJVZ-M_PhK|<OWU@Dufedv4`IS;W+E^qrA>hA=BkONIg
z5uSulbb}zIO~Ws?WV=G1O4u&;c)1%n>VJ#Y!g?JK*$3ECdiWNXAhd$njjU5_Lt4rV
zI3$#|9ZgAWx5cR}leMJjl3#aa^Fmn2NFPAN<Objjxar#Y=3%Z>aZ8&8C)fpU($#NZ
zk2!PLEW$N`@1lKxl3lIlXt4I0S-SIRSPtx3G}b8>*8M7oAdC}p>?fdsSpFIukp(Y+
zjrW6H>C{DXR9KfWj*vp1m3C#6xV%_j_hoHlR?VU;&04g(GMH4r*$CYwkTC_#c0e@K
z*sDe}5+~(aflLn7wg(zQ;`c&lU)pxIz_u3IGhqO-P0S1A5K2YhM$Mr9*seC+AH@40
z`4{jF#TApY&4@8z4C6CtOv4*F8v7yx#t1x;!%{Ph@`!O3pHuN?$rv>*8^`fEftn-I
z>M~&Bvyf@1AjD@m1G&&DobH46Rx)Y>{xIxXa#&{r0OiyUixkdF7kCredxm9T3%Kq2
z9s3T?9IBtDZ0A=z;x9b;OGRg)pDqeL6Zl5_fvZyncu?*9KF#;Sli#N25zhzzLtE2+
z_yq{$h~0osGYy{uE(%S_5a)pZkzxp^h45j*iRv#D_6@@jf{6|$EwMl>2W|(<6z~{^
znfiso-|Cj5?G(_&QGsUz+9~e+Qgj4QoULJlcv9wpnJ&KTgjdrFT9{51-$Wx{rf=^;
zDOAn%X1zoOY+XSoE02fh-;kW96>PT>E(9eF47p`c%H?Nb3`M)6GRNK2+UI?isDi9H
zK1AE3N<}n?^&u;6@*!%7+y?P^DvJ@nUJu80c8<aNh~Erw4vHY!^1AH@n^+tr{Vp5d
z!y~md$&mwXsnIFM67UcSiiMWyF_vpReO1qo_TP8>z@p^{`8DxS5Qs#GMvcP|j!_8C
zI0R+fI7boTsfpT?mP|xsJ{1uN1h|^6ydrCWbHT@CJ;+Mz#AkS1mADt!l`Vx0wIlx3
zKU2bX{)q|(GJzv4#RM!)`S`~;Dh5WUB!W4U7I=+;khuN8uCY2T;qpTY<cvUhf(VFb
z55WOf)5nIG06CFYVe)RM2;DxYNZ&rF2q2+bwpv|v5QhAmIF51=MmGsoawO`KSg2<>
z^2N@9^$wt7D2mPX5GO?~3>{|Yc5D|xWb9uSkLT@S9h3VQFT!%mG<ce!X*n5Ol3B{n
zyoU87wj$Vr`02^Gu@p-BjH2Ih1P;80crZapaEXOnhTjg*Ralo+w^^6-sw_r;UY#~b
zTqQX<$Na4UoR}t9L^i3BIV(T_ww1Ufu>_f!b3o3dyw$|1H`1mvX#|m%Y2!LdZw_O8
z?=tfkWB#1TQH?0$3Jl9hksKU>VTMLBQwBd~X4ojvwc<8$we$Fjj|UNP^q--Ak0)UX
zA+Jc-ZAn7g8gF*AD4nuoEb|9j64fe?4&t*ah`u0Bo=Op)B_0qr$hn9*O$P7=B$Z^n
zrmS8QpqVs?%%G`VDo4R?ZL{c9p4M;8-+g3#@!-?j3-=x`MXZj>C3`p6tUawb#rmVW
z^S8dRK7H``-ooveB19&3IpF7&MSU_%69qQnK2)Yssv=?{p+5<YI(0i?&)6dAkWNqn
zHGdH1r~r{);wv^w!6P}TJqrpO4T~_r-@!70iqLq_nNWiaB)lPk1jI%Fh1US>*$IcB
z`Xi}k_@j^2dM92$8|r8X^a{*2ZhfHFurWH;0U&gxD5vmur3<vAK4Hc~Ktbwvsw0FE
zWoLPL0k;E+nUM~t$OCE}6>^7z8NYz01ij%(z%^he%mH@*3SV3X9r4(JS{40gz`KA_
zML&Z$i~frXh+ycD0S0F<q}u^G$fyWcBC(PU9Im05#~A<M)Q8lIzH~gS8?7h)r`8=j
zSi<EL2B8b1UnaX>4)-i8X{8sz_7dFS%EeOxmc32Vn1^#*mHQNeJP2>ZyL=)8Pl;rn
z)^|iBaz}I`6NAteI}^|Gnv7e<9*?P=zhR_k;mKdAHi94H3PUp%(acbnoPfhPj-c8}
z5D}CbQyYj~ye^ACW1$Q&+<GUow@!CPj6^yoE7>zZ+~|#O)5z>;gc(=4m?<LFOQd*>
zrv<$8)sU?2=R;hb><AXboj%5GHMroT5+pl67<W9Aj)&bit6xCYb0EPXMVcW*g-DZZ
z@>d_L_F;yZHm1dazz!h9a~_^ENDOl0<wEz%#pI<7A)HjJPL8HqucN~#F+V_tFDd6u
zg|=nL%~5iN^OkZhE9VNgpFp|hL=XvV-o^zdp19q_6Ok@FPpF+K9W@-2sNqS~eqE#$
z!g4~jUgz2=lkgmzJXeZN)9q8TzO?eIy3vLTd82oYGs#NYrixCg4K%;X5!$C!bV{{O
zs}1xq3E4175foT^=4G|llG?K`tG$rao_ksCY*KsvWwjTR+6&6LPPUB@ev>_Iol!@f
zx786Mb+N1R9D8(E=DI3n8QqoGpLAHkv%B(5hV}q>mq9*o%vqH11BpF{5`JD&t@9}1
z2b*yLr4tE99HpSOK*uwQDm<%NSWJe@00a$04|R)JQ=U_;9INBk^Qv{J>-B<ay(yjb
zQE}^V-)aZ%<Fevo9ddxh(nW!-L5{9UFkf0JYNWgJN>}BHy*)t=S(fvTggn9Y&~u>2
zTl<!U7XO!JQI5`V5*}bF+j#G&R@6XQ=o)H52>z4^{=xyO$SE*>UhNDRkZ@WN&|4&8
z;mH-%o=JoTET~Nl^mAUdW;hQB6f6y*{975dwKTZ!<R4V~YOHIV8?s7~3@RNk5D@Db
z<EnDrm3y>n(nrV6wb8w%ocGwld7l=lW1hG$n3=inIQ9oG8++?_23KS9924^V*#S-m
zRd`dT1F1d~ok1?_ECk?VP|zQ7$o6c!lGdzpex#g_yHJmZhK<3!#NjL{e+&JncCM?|
zb<v&ex21Fquy+!A<2vJQQd(NxNT_>TP!fsgqVDme6E^9Fa^|JcJM67@>#Ji%`(4#~
zw^yMpBnpkn@D5wI-&3vkl=BH}AbwEKpIQ9_MILM5_eIO2kyiOVz26rhef1cmBkjc9
z?>xf2?=(Wk7C}<5sJ$}O*=x{rv4^JZIoS~K$d3u{v;>Ea%B$!`=Tp#b4+v&tFX%ql
zOSV)f(<8%R`Ul@;%a?l4!#H34O8sxP93J5Xb(;3|17QL1@q&K88MB0QPrrZoirxPp
z*?m2nEC)ga{Um(EAqDnf0{cX9Si*WX132v6j9AqHn{TKAUVCdg+ky>z*h8T<&H!4-
zmwPwxqx~DW2gD8B1Nw#y+&Y}apLlH-<H(385S$eHDQ*Uwp_SZcJmIY3@u-43nj~z;
zJA9{!pt@<B$ge_pkEt~#Z+}7r!vW_tkyj(s#{EAa&8Qm6jlRhcGj0*$z`DeiCGSSg
zd^$cL(Q3Qy*LQ1vGb~`X4Mf@HfESk~B+kBiYB_`<FbHJ4lQ>PwIr_owBT4Q;EBTkj
z^ESTWH9TA-t{KWp(&ujQoSsoPa2i7tOWyj`4HwFOK;1y{owbr1L@$r|PZIpAy6e#c
zJ18ThDKT-cT#4VkBWxq*RKF27VSuveN0DEP0b>B*D#G7h;nhY2lIY8SHk9!T1)VH~
z1S!UWKC!Ezt53RqsdJu{()X8TWc>_N{XM=R@fa|&#$f!3Bd@sX3ExE3(%r=?E0JH?
ztoYU1Wpky3D>Wp7@GkK_a+u8fcG1VZAnrvNa8MzW81i{$LgDJb4QJ4S?a(a0(QwP&
zU!&feXG;>VyCr6lCgEjN^h|4Bxsp!&$^MDMi|FtGK>~9bw<6|kq-&VVfsL>;5WR&U
z^lk`_wuE!#cJ6tbBVvJzh~;p=_?i>&qFeRr*lOgV_~d<ZBU+lh_!Ke4l}0?p6+4-N
zL`m@o6VWlZy{IB$vgGa{EN9CtZs12q8!;=mA1%6WomXvnlep9%RS+MH#iTgD4#_J*
zR%8Gmgss5Mu?Qd~uk6+_V%01+k&?p@A4C^}q$o+^4cAhMqQ>0J47MciRjbXKSGV(s
zzRfs(smTyrID^3<=_o{Qse&6^Y+h0-)HUV>pyV<ZUvTWmrnK_94b-AIT4YfX?Kpjp
z+$@=Ua=T-R{4WCrS*1>ZNN759Jr(<@s|n%aqQT$A#c)ni6eUxriYy*La4GEvh$EW*
zy6Nt?r6%cfY0l#QTbJ;VR7UKUTpLpf6t`_1bi`es+_)17oskp%5=9c+wM*C@)Qa+|
z;v$;KNm|<o8g>yGK$mgxu37q0-h6;P@^LXDsZ86TC+@bP5h3QXO;mz14q%LMLR(T#
z+EHhj@%_4PX>#i@+-|HYO<c17q|vC^JK@(r@)nhE+!8D2wlBt8Xr?TcvVX!OCZ^p*
zH)c{L;Zz5_2ppL?iyKWDDY=|an&dlU<GvHvyFecN6Sy_3xYGrME4pAcNjVU(o(jWq
zUI(}Mh%>u(E#<|$1Xpjx$pFcGVLURZ7r1j4RgT2OJ#HVAHdH}KwR;wq`DMAv3`1rx
zo(-G9QxMIGVZ00Nz}B}5ejRe(3k{xXC`0pS9WN{)oeS4lyhSfu+k1<SJ5cXGk&+z#
z391(v_J#_v9nmr2oSFO5dFzsS+lOec2qxg=1o~wz|JIk6D9ziL!F&izzOj*|FW6q)
zXd&<$4cF-%-(xJ<V=z~82XfD^&gHMJx6fblVBz##LF7lUCQR<sDsLGqdz+bgpT)6u
zio;^fE<=i`jvH<ucg}@4-kxtj7yc2HWOybWV<pK2yze4pTQL_Pn}JspS(GcMWM>P=
z4@QFm2%R?IfD$j?M1ma^q@=Ehg53NNR$cBcSeG`!WgR2~7-ENJuU-|5{v8;3m+`P9
zW$tu}+q_Q1Gj%4O$y7Wu&czFv3V<2*<?7|5oyZg2gaOr%C&vE(iXIL7H+O@^3*SSF
zcrIS%n9y7i^IsquS|jWc^CX3U`pNqbFqhm6$XfLVr5S=L+B(G}-<v*2oW*b>`J?Wm
zQy!U$;RnFxHUzFiQwGzK^=LXt;vsW(s1OnyZNEvq6-j0X_4l{ATV>4KqGVc>QE^q4
zoptep;JFl_pK#ep8M7Uy5H{=*uA*+TB}r=+0YP>qDO`|CQ20HhG#h(lMKK}sca|RA
zvC>iE`y<ZGb`2XG5!j82#bKm_4FN%ZHs9HT7DKMM?#=2;F;2m?i;c9(?8dgdt(D7J
z3KP;Dh$56;gP9+qfp9s+rNZ!)DpD#3ezPH^^{S6d_5}BcW<vUNZU?u)?B|6naqxTB
zWx&vF2)?*ph9%4K4q<PC3Q4Cu68gT@PuYZyx%D|7g7xykCy^InI%>v8w{G9RQ&?SG
zcyv#^I_nSEj5;zb@uCm6)^XwrQLXxAAM}eVVvl8jBBh3D{va9E32mWV?m<48B_quR
z5E~2GHc{2mp8O_@71~CUQ_@K@D+pn@Wzn@))XhrXtthNQN7YLXf(G{L`-n<yPerWs
zUse@sexZu)m8$=)8+~p*{4hCjOmh6LRm8^?IRS`y2fc8iO7B^Cqd+AGeaFospJBmF
zHm3K#2XiKur=SGq9$<nk_bbIyaPN#<fUYh{g7Mz5UQ!besB5@CX`gg>UB~3Ev`$u_
z;k*`2;b|d8{#8wSW+rX4ZwRBH|Gx--yTDB9BFW-bc$JoHL93u`fM|VbmZnK-79|n8
zK=6IN9W9^y4<j-X9fEx(K)6`8lOrJK99Pw}9h!B8G1r^bs!URdAU@D`q++60;f3zw
z@|V}mzM|jt{S!f}=P=Lre%;M&{tJA=8?<?Sl3j(oE96*ZJLw{aGXuud(5dWbb~t+m
zNhH(!W`>asK59(&4;urLk2PtW!TTtBkD@>BMl<J)^Ts8-p@lJ+Wzq?Ax&~v&INq+a
zoSDSc`T%O=gR#y^%Lyc=Bzaw@jMw!C*Y5f=f!U55mohONcpW#+XC_d47?=+mKgQkc
zaU&<s8O-z=u;xYnnDn1Q`6Aw?f#Ec8)ZZCoyANZ|&w*1?KLOYT>W|>_n$)vR((X`p
zG?P0C%1Oqfm`q`zUAtD50{_F2MK8?y7!T3+4IBdSzXoUt8jkhntlMQB_4J<q7o?9%
z+!_5JcErju9I>Yz4_$e&wiea4Hpy%!|LevgFI(hyixMSiLDplIJU%LXkPkWxCZ+E$
s#2}XHrjBiV#MOkH;40{!e1yu_y{YWb@uBQc|IpE)nV}>2^bZ~QZ|c85%K!iX

literal 0
HcmV?d00001

diff --git a/de_novo_scilife/QCcontrol.py b/de_novo_scilife/QCcontrol.py
new file mode 100644
index 0000000..6c8980b
--- /dev/null
+++ b/de_novo_scilife/QCcontrol.py
@@ -0,0 +1,184 @@
+import sys, os, yaml, glob
+import subprocess
+import pandas as pd
+from matplotlib import pyplot as plt
+from de_novo_scilife import common
+
+
+def run(global_config, sample_config):
+    sorted_libraries_by_insert = common._sort_libraries_by_insert(sample_config)
+    if "tools" in sample_config:
+        """If so, execute them one after the other in the specified order (might not work)"""
+        for command in sample_config["tools"]:
+            """with this I pick up at run time the correct function in the current module"""
+            command_fn    = getattr(sys.modules[__name__] , "_run_{}".format(command))
+            """Update sample config, each command return sample_config and if necessary it modifies it"""
+            sample_config = command_fn(global_config, sample_config, sorted_libraries_by_insert)
+    else:
+        #run default pipeline for QC
+        sample_config = _run_fastqc(global_config, sample_config, sorted_libraries_by_insert)
+        sample_config = _run_abyss(global_config, sample_config, sorted_libraries_by_insert)
+        sample_config = _run_trimmomatic(global_config, sample_config, sorted_libraries_by_insert)
+
+
+
+def _run_fastqc(global_config, sample_config, sorted_libraries_by_insert):
+    mainDir = os.getcwd()
+    FastqcFolder = os.path.join(os.getcwd(), "fastqc")
+    if not os.path.exists(FastqcFolder):
+        os.makedirs(FastqcFolder)
+
+    program=global_config["Tools"]["fastqc"]["bin"]
+    program_options=global_config["Tools"]["fastqc"]["options"]
+    for library, libraryInfo in sorted_libraries_by_insert:
+        command = [program]
+        for option in program_options:
+            command.append(option)
+        read1=libraryInfo["pair1"]
+        read2=libraryInfo["pair2"]
+        command.append(read1)
+        if read2 is not None:
+            command.append(read2)
+        common.print_command(command)
+        folder_output_name = os.path.join(FastqcFolder, os.path.basename(read1).split(".fastq.gz")[0])
+        if not common.check_dryrun(sample_config) and not os.path.exists("{}_fastqc.zip".format(folder_output_name)):
+            fastq_stdOut = open(os.path.join(FastqcFolder , "{}_fastqc.stdout".format(library)), "a")
+            fastq_stdErr = open(os.path.join(FastqcFolder , "{}_fastqc.stderr".format(library)), "a")
+            subprocess.call(command, stdout=fastq_stdOut, stderr=fastq_stdErr)
+    sample_config["fastqc"] = FastqcFolder
+    return sample_config
+
+
+def _run_abyss(global_config, sample_config, sorted_libraries_by_insert):
+    mainDir = os.getcwd()
+    ABySS_Kmer_Folder = os.path.join(os.getcwd(), "abyss_kmer")
+    if "kmer" not in sample_config:
+        sys.exit("error in _run_abyss QCcontrol: kmer must be present in sample_config.yaml")
+    
+    kmer = sample_config["kmer"]
+    if not os.path.exists(ABySS_Kmer_Folder):
+        os.makedirs(ABySS_Kmer_Folder)
+    
+    os.chdir(ABySS_Kmer_Folder)
+
+    program = global_config["Tools"]["abyss"]["bin"]
+    program = os.path.join(os.path.dirname(program), "ABYSS-P")
+    program_options=global_config["Tools"]["abyss"]["options"]
+    if "abyss" in sample_config:
+        program_options=sample_config["abyss"]
+    
+    threads = 16 # default for UPPMAX
+    if "threads" in sample_config :
+        threads = sample_config["threads"]
+
+    command = "mpirun -np {} {} ".format(threads, program)
+    command += "-k {} ".format(kmer)
+    command += "--coverage-hist=histogram.hist -o preUnitgs.fa"
+    for library, libraryInfo in sorted_libraries_by_insert:
+        read1=libraryInfo["pair1"]
+        read2=libraryInfo["pair2"]
+        orientation = libraryInfo["orientation"]
+        if orientation=="innie" or orientation=="outtie":
+            command += " {} ".format(read1)
+            if read2 is not None:
+                command += " {} ".format(read2)
+        if orientation == "none":
+            command += " {} ".format(read1)
+
+    common.print_command(command)
+    if not common.check_dryrun(sample_config) and not os.path.exists("histogram.hist"):
+        ABySS_Kmer_stdOut = open("ABySS_Kmer_Folder.stdOut", "a")
+        ABySS_Kmer_stdErr = open("ABySS_Kmer_Folder.stdErr", "a")
+        returnValue = subprocess.call(command, shell=True, stdout=ABySS_Kmer_stdOut, stderr=ABySS_Kmer_stdErr)
+        if returnValue > 0:
+            print "ABySS kmer plotting failed: unkwnown reason"
+        else :
+            subprocess.call(("rm", "preUnitgs.fa"))
+            _plotKmerPlot(1,200, kmer, "kmer_coverage_1_200.png")
+            _plotKmerPlot(1,500, kmer, "kmer_coverage_1_500.png")
+            _plotKmerPlot(15,200, kmer, "kmer_coverage_15_200.png")
+            _plotKmerPlot(15,500, kmer, "kmer_coverage_15_500.png")
+
+    os.chdir("..")
+    sample_config["abyss"] = ABySS_Kmer_Folder
+    return sample_config
+
+def _plotKmerPlot(min_limit, max_limit,kmer, output_name):
+    Kmer_histogram = pd.io.parsers.read_csv("histogram.hist", sep='\t', header=None)
+    Kmer_coverage  = Kmer_histogram[Kmer_histogram.columns[0]].tolist()
+    Kmer_count     = Kmer_histogram[Kmer_histogram.columns[1]].tolist()
+    Kmer_freq      = [Kmer_coverage[i]*Kmer_count[i] for i in range(len(Kmer_coverage))]
+    kmer_freq_peak = Kmer_freq.index(max(Kmer_freq[min_limit:max_limit]))	#coverage peak, disregarding initial peak
+    kmer_freq_peak_value=max(Kmer_freq[min_limit:max_limit])
+    
+    xmax = max_limit
+    ymax = kmer_freq_peak_value + (kmer_freq_peak_value*0.30)
+    
+    plt.plot(Kmer_coverage, Kmer_freq)
+    plt.title("K-mer length = {}".format(kmer))
+    plt.xlim((0,xmax))
+    plt.ylim((0,ymax))
+    plt.vlines(kmer_freq_peak, 0, kmer_freq_peak_value, colors='r', linestyles='--')
+    plt.text(kmer_freq_peak, kmer_freq_peak_value+2000, str(kmer_freq_peak))
+    plotname = "{}".format(output_name)
+    plt.savefig(plotname)
+    plt.clf()
+    return 0
+
+
+
+def _run_trimmomatic(global_config, sample_config, sorted_libraries_by_insert):
+    print "running trimmomatic ..."
+    
+    program        = global_config["Tools"]["trimmomatic"]["bin"]
+    program_folder = os.path.dirname(program)
+    adapterFile    = sample_config["adapters"]
+    
+    if not os.path.exists(adapterFile):
+        print "Trimmomatic cannot be run as adapter file is not specified or points to unknown position: {}".format(adapterFile)
+        return sample_config
+
+    rootDir       = os.getcwd()
+    trimmomaticDir = os.path.join(rootDir, "Trimmomatic")
+    if not os.path.exists(trimmomaticDir):
+        os.makedirs(trimmomaticDir)
+    else:
+        print "trimmomatic assumed already run"
+        return sample_config #
+
+    os.chdir(trimmomaticDir)
+    #now I am in running dir, I need to process one by one the libraries
+    threads = 8
+    if "threads" in sample_config:
+        threads = sample_config["threads"]
+
+    for library, libraryInfo in sorted_libraries_by_insert:
+        read1=libraryInfo["pair1"]
+        read2=libraryInfo["pair2"]
+        orientation = libraryInfo["orientation"]
+        if orientation=="innie":
+            if read2 is not None:
+                read1_baseName = os.path.split(read1)[1].split(".")[0]
+                read2_baseName = os.path.split(read2)[1].split(".")[0]
+                output_read1_pair = os.path.join(trimmomaticDir,  "{}.fastq.gz".format(read1_baseName))
+                output_read1_sing = os.path.join(trimmomaticDir, "{}_u.fastq.gz".format(read1_baseName))
+                output_read2_pair = os.path.join(trimmomaticDir, "{}.fastq.gz".format(read2_baseName))
+                output_read2_sing = os.path.join(trimmomaticDir,  "{}_u.fastq.gz".format(read2_baseName))
+                command = ["java",  "-jar", program, "PE", "-threads", "{}".format(threads),  "-phred33",  read1, read2,  output_read1_pair ,output_read1_sing , output_read2_pair, output_read2_sing ,"ILLUMINACLIP:{}:2:30:10".format(adapterFile), "MINLEN:30" ]
+                #print ' '.join(map(str,command))
+                stdOut = open("{}_trimmomatic.stdOut".format(read1_baseName), "w")
+                stdErr = open("{}_trimmomatic.stdErr".format(read1_baseName), "w")
+                
+                returnValue = subprocess.call(command, stdout=stdOut, stderr=stdErr) # run the program
+                if returnValue != 0:
+                    print "error while running command: {}".format(command)
+                else:
+                    libraryInfo["pair1"] = output_read1_pair
+                    libraryInfo["pair2"] = output_read2_pair
+
+    os.chdir(rootDir)
+    print "trimmomatic done"
+    print sample_config
+    return sample_config
+
+
diff --git a/de_novo_scilife/QCcontrol.pyc b/de_novo_scilife/QCcontrol.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6fc82921b2ea9144c7784bba5c00e27270e42ebb
GIT binary patch
literal 7018
zcmcIp%W~Yt6>VVn=I|v^4@$NLTYiiab0|@<U18)TdeAn@3ax;$m9VU!zyU~t!~h@~
zLy^+VCdvn78&{lFHs0h5vUjp_s#4`e*7<{ElT~ugZOm{)QM-sU=()K4y4`*I+|$j<
zzb3}+9XM+ZRsPxd{V5*(J0t<VmWq{H@5z>0x4L>;t=nC>qSh;2c|ffXbmc*{J|wcO
z;$hVsQR}11V!IU;kE!OkS|7*zfcgzOsj6U5gb5W4i7=^xVG*WOFe1V<En2IM!n)|c
z_*<=65Q`xs*<lu^#nF2sZ8p=S!4VCX&n^7wKOiZT+ObsIQco<^!5~E59a4Fq4%*a1
zGk5#J`>In>g{_K;+8I#ApqdM8wKJsJ6;^y=tImLG<EvPuZL230)v?sxo9ce@o+^fq
zXA~pPXhx4~FgI(p3QSx@cQ7<fW37iFd3h`G9(QVEkX;${9JM;l3oni~azBqk?QI--
zQKG}VK<`Rzg@s=fIn2kn54Gr;Y0!#84O>v#OZ;Z&c`|>SY2HNXaq8LErmu^=M#28%
z-QPIWx|r_17I}pEMUALY6H6zMvlXWse(W{UWHZ_-sL_5ii^Hzw0yVHW44!K<m(5QC
z>9>@<&7_8kN)mks(n1zB{DtkS3;W@tN72IVWpDM<&p-9<uH0R|v$DFp5QJWm?x&t^
zL~*nkKJ)(bTQIT6)3}};%IsjkoDj|7DQn8A+B5bgd#-ZII%PFDeav79aiMQ8|Hg%$
zC~*<E6C4A+B7OxW{KvQlHOLY_fzRj_DKkj7!K;<Pbx_)dXTjsJE9N@L>(dNCGo%hy
zEV$S02RE!Q;B>>P15X-MTX5b{b#PI&hmdz#b;eYCRQz*H8sGb^y5An*uA1wND@sF<
zs@+24QmBn>r_}DK%KzETsCG?*lkC7lN9Da7Nrk4DGWwH7cE-6;F&^yFIgWcmu_}6!
z9ahbYZVdnfESym72^Pm!@+Q04(s(c?C8R+#7N83>i89BERZl85rHU!FGfkrdbOQvL
zRP8DBWLtZM!uLKmtZeY>_*tt>VNqAj1~3j!XTHa6CyucZC9-YOtccP?%cjiyD8Jg1
z7B$hRF59KP^(exN(scdDopKM=A4QpX){#aRLE0(`?qmPitT4~TqOugx4I;F)8HfNR
zrBW>=u)`Y<gJJ-h%rCY@*hyhC>x2i9F3e7E`nzEe<))IKWg&JLE3KwUXwZZfXE|zO
zubLf5ci-@JNQ{ut>MV|mq6&3mJ8bNFL4JsR<FcGImZDkNx4N~F<!K`XUa*sfAICLr
zAeVJr_T)|?F?i%h$?YhYW<Qh__%MwzR>?#wTe;uFlF~Slz3xg7;0*8`3TTz`O0t={
zEH6`F3XAo5=m%!iC@mJJP-auQ@~{+Hs}L|0J7g_AO!=o+Bih_kEMpDu{(_*$r6&>A
zb)B79RqG<~XWAwTowi1;32Vk0!)F3H(@5*qJJwtHIM4}7aZGGOQ0lMnlO7;3kUbxz
z06^FbyTdBK#iiUCWPtAC6E+L*3Aj`?fcPiirBl%$%=NI0B>+w4oiGOYBK!#O40Rjm
znF4GXVmAa2Y;iQ)R|QZ4q(g05hS2;E*|x;8VFlO(jsOLL6=n8+Bzp|mBa%I?c8~{S
z&;!~Q7z+RyP!6)s_e<4;NC$sp{J#C*_lf{JrihmSwMjGw6oWJc33vi&8WJ!C$TFjL
zZKd}t%BLvjUt7eT)6$EesPqCpV-U7KTL6&RdtXZwKpV5t#==Q$>@8UM-iI?1JWD{v
z3k)e?^iOtgd4e+>U^Ryd#ObUG&dC%AGzRCpuhR;_d}l_Ga!z481Q&>u0m8{N>Q_3*
zno+?^YIi{8t{uFrKKghMO-iNUZFXT5r&Zvn;8o6P@S1uu&{JI&)$2Xg8{JZG_Nz>(
zw?u_CHSQDCr0o)?2bIC;@4>BC1zm)Qc)Lwt1kr^8!Ybe{&(qwA66c7nIK4o><glF6
zZ0W+;2%Rhs0c-_JKgCe$hki45Pop)#4C%_9#j9|z8#jNswl;s)Jp)BVKogKEwh1U&
z6Pue^1W0t|lgxSC;a3xg=XXWqrk<Z~r2Ap+Z-w*QK<5vb2(HyBIrAw^`z(oyEsc=k
zo<$Y+91?-J6dW%Br9&hV^P(h)LW6SvD)2wf2fLDqNr3Ho5}K#`(C4ALar1C(&HDsX
z;u*ND8?<wo`TP=qKNAVI!#H+{R4yr<iFx1ewri%EY`ln)Eoak@;xJfpTFLIiBz>4T
zm@bVGFpT9*O?>SibHtz+{RN5Uz`1M)NoLNxtKQ<3EA=edivEpaCl+|OEWj8bBRST>
zyWJ*E&Md2IT34^v_#^TOme%MP?nNdqF?pHED@=qCb1@Kt3-=OL4icHW#x?@6I1YXp
zTP1+Hudxt^?p|i{CX=_AFy6RtBaubDm$yP8)*k1V3wW2O9s>Wd=<0IE6m-gG7TbNF
z#ou8v&*X<pIA1PZ)Lme5mB}KLB_zk&DP!ZY)}f_`@CcEIMJrD}_v2RhA_FE*CqKZW
ze}TjVOoT>zOv2^_0_Urcs@8-(Bq_t_m^=)l=OI<CIXoBa3F|Uw{}jqstr5JPmd{y8
z2(4U6Zln=B5?WhWD1wz$@_vwa#7M+U1iU~2zU=a3XksaX)*OzFI2VxJ%UOUS7Hqt$
z9#UIqahSB(geRcu?FZ+{_3_>wR&$7B{F(igt@bwL03hTXh{zx(C(u_>317sN<652!
zkb$5EVT|#1_8bBmR*EDY9gHwSSbRsYLc@qFIL=CQqD5Rlsd2R_T2bLB4pp^ZWr7K`
z=mK#NDdHvKV8xairx<IPH=)iUa+olNGqZ&_%N~Vt@azCHX3HajR75<(;-b5fuAhKv
z=8PL=np<d8FmW5G9Oi;ef+<_q<nQl45v2ZPp79_KldWRg`2e6PbsA}$f+e$&T!Kv;
z0Yr;KoR|d#=I5iofq06H!#Lqta4ZYtCrS-%^>duT4fAGf@EUqwHn`-Xk5&^$2}Xt@
zjRE_D3%Q?w%~zm=Q6;<w;elwHzWL4KLQp`kRX`>xVgn$eKZv8I2!|Bd!M+&)J-9$)
zW}0v%fI;AFz8~_O&ORIQW{oahj3Q}93C`zDkbQac56qjuhV1Ts!pvF{NP1yL@JUo%
z+?J^<HLb*)dm?FEl$&|DhtpjN_I&fQ(BCbHue-$mp32)NqL{IAQaG+d3f!8U6$0vu
z1f1LGX9tgd9f<;5oWtj;^^$cO5Ho2{TW1A0E?F-FOaweQyNs&j3%Bs-KO?39Fh2U2
zY?mj0occ+WONuU-QtkzFR3+y%>>GJFA`5@j_X!}pSddq=hSMxD39rBzL$*;Kuz?#m
zL$HA_2DH$K9Iy44q5;UrtCe8aj?l8QEg*}@IL@}eeVT0_d~5C}zLEP0FmnSl<1kz>
z2OuCsTK2vWX~4{+(B1EdApzDIHxOS5hlSCoE*A@=-FN}#1{Q&DlX;gzKoMcs&lUmC
zHKP>71T*I(O$wTU+k^J@=Yp&E=gvHxYwGg)Kp6C?r*lo+UM{%stOEDO+8NRfuFb}Q
ztuTI$LxRa5*PzvHU?Iw1I9fpP?yV`+nI-yxboZ<R@8<dCjObr^Mt@e7X99G6Nwv?A
zkGIdFN4s-Qwa>{RrK0jDHb^?#oTV`!@?Y9CL=gHqje%5E*Z01%_r6p3-sdEl!xLye
zsJ-}vag5q!c=4)mFL2$26tC|x(5ct!hCq`B^koQ*mVN36epUb<^%p2{@8}-UY4}N!
z3O^@__O(-%bT+{tok%;()mhkxHlr|b!1goVb!ewZ$u5OlW~q)yke3MQG@-8pO(xc7
z61vc>CR*}i!t|koZX+UzVMIOhEfmQ85i%r@NO-7AVA1U1ai@3t)4lj<^)v!eJN~{e
z<a~a|&m~gaUDoVqzI%8e&wSi5x(o-3;Mz5r;gvggKKt3q>Wy1>R_-o6?kp`XUAwY$
z^@`j%L4Rj?6$;G*2`-oVhF0=<G4<hd^1;=PIK0e_IYK?$=5%-0w!9AFe9BE#Unfq0
zBZh<x!-9nX--IoO^2$sKGZs?pFyjUj;ks{7dYa+Zh?B=@ENQ9)hwcTCxwMhBE&by|
z*eek&Pt$^zNfOD991AM5qII@^61`;wy1-SBH(sl}@#?A;k5r_)W#}`7c@rqq_$jT%
zmN>3m^iC?f=&|h4byZ2L8MqCXc0Yz#!}FqUoan#5#-m+!Vhze&(-_i8Yrw8rXM{)(
zLhe#%5@>#^_*};4mGZM}pSRB27onX2{hkGJ9>hm3Eaf;P9%AA(j@TmbzXfu<5vG#i
z*5B9eD$EeNfPXD8pyOIkOmoTb<s$Id<-R#IacSiG%;k~5Wtet3wxhWd>Hq)VgAsIc
e-TYJK$E1y#N3kn1J=4~7Wprlr;^=#$=l=&Ur|l*H

literal 0
HcmV?d00001

diff --git a/de_novo_scilife/__init__.py b/de_novo_scilife/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/de_novo_scilife/__init__.pyc b/de_novo_scilife/__init__.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2d8841b23e174b48b18b888c018fbc7bd2b417f4
GIT binary patch
literal 162
zcmZSn%**v=jX-cR0~9a<X$K%KW&si@3=F{<AQ3+eAi;n}6ng{3^b0bR6ZJC;^~+ML
zsxtLmT;u)x!~Ej|JOf;PJpEktQ&Qve^2_q$i<2{RGSgBq`0??Xd6^~g@p=W7B^*HW
SY;yBcN^?@}K(-bGF#`Y=dnL^P

literal 0
HcmV?d00001

diff --git a/script/align.py b/de_novo_scilife/align.py
similarity index 75%
rename from script/align.py
rename to de_novo_scilife/align.py
index 669f9ed..f246854 100644
--- a/script/align.py
+++ b/de_novo_scilife/align.py
@@ -1,16 +1,14 @@
 import sys, os, yaml, glob
 import subprocess
-import common
 import string
 import sys
 import gzip
 import pandas as pd
-
+import common
 
 
 def _align_reads(global_config, sample_config, sorted_libraries_by_insert):
-    reference   = sample_config["reference"]
-    reference =  build_reference_bwa(global_config,reference)
+    reference =  build_reference_bwa(global_config, sample_config)
     #now prepare for alignment
     for library, libraryInfo in sorted_libraries_by_insert:
         read1       = libraryInfo["pair1"]
@@ -19,9 +17,10 @@ def _align_reads(global_config, sample_config, sorted_libraries_by_insert):
         insert      = libraryInfo["insert"]
         std         = libraryInfo["std"]
         threads     = 8
+        dryrun      = common.check_dryrun(sample_config)
         if "threads" in sample_config:
             threads  = sample_config["threads"]
-        libraryInfo["alignment"] = align_bwa_mem(global_config, read1, read2, reference, threads)
+        libraryInfo["alignment"] = align_bwa_mem(global_config, read1, read2, reference, threads, dryrun)
     return sorted_libraries_by_insert
 
 
@@ -35,6 +34,7 @@ def _merge_bam_files(global_config, sample_config, sorted_libraries_by_insert):
     elif not common.which("samtools"):
         sys.exit("error while trying to run  samtools: bwa not present in the path and not in global config, please make sure to install bwa properly")
 
+    numInserts = 0
     for library, libraryInfo in sorted_libraries_by_insert:
         read1       = libraryInfo["pair1"]
         read2       = libraryInfo["pair2"]
@@ -44,17 +44,23 @@ def _merge_bam_files(global_config, sample_config, sorted_libraries_by_insert):
         alignment   = libraryInfo["alignment"]
         if insert not in BAMfiles:
             BAMfiles[insert] = [alignment]
+            numInserts += 1
         else:
             BAMfiles[insert].append(alignment)
     
     BAMfilesMerged = {}
     for insert, insertGroup in BAMfiles.iteritems():
         dir_insert = "lib_{}".format(insert)
+        if numInserts == 1:
+            dir_insert = sample_config["output"]
         if not os.path.exists(dir_insert):
             os.makedirs(dir_insert)
         os.chdir(dir_insert)
         #check if file is already present
         bamMerged = "lib_{}.bam".format(insert)
+        if numInserts == 1:
+            bamMerged = "{}.bam".format(sample_config["output"])
+        
         if os.path.exists(bamMerged):
             BAMfilesMerged[insert] = os.path.abspath(bamMerged)
             os.chdir("..")
@@ -66,20 +72,22 @@ def _merge_bam_files(global_config, sample_config, sorted_libraries_by_insert):
             if  not returnValue == 0:
                 sys.exit("error, while soft linking {}".format(insertGroup[0]))
         else:
-            cl = [samtools, "merge"]
-            bamMerged = "lib_{}.bam".format(insert)
-            cl.append(bamMerged)
+            command = [samtools, "merge",bamMerged]
             for bamfile in insertGroup:
-                cl.append(bamfile)
-            returnValue = subprocess.call(cl)
-            if  not returnValue == 0:
-                sys.exit("error, while merging files {}".format(insertGroup))
+                command.append(bamfile)
+
+            common.print_command(command)
+            returnValue = 0
+            if not common.check_dryrun(sample_config):
+                returnValue = subprocess.call(command)
+                if  not returnValue == 0:
+                    sys.exit("error, while merging files {}".format(insertGroup))
         BAMfilesMerged[insert] = os.path.abspath(bamMerged)
         os.chdir("..")
     
     sorted_alignments_by_insert = []
     for key in sorted(BAMfilesMerged.iterkeys()):
-        sorted_alignments_by_insert.append([key, BAMfilesMerged[key]])
+        sorted_alignments_by_insert.append([key, BAMfilesMerged[key], dir_insert]) # memorise insert length, bam file, folder
     return sorted_alignments_by_insert
 
 
@@ -90,20 +98,20 @@ def picard_CGbias(global_config, sample_config, sorted_alignments_by_insert):
         picard = os.environ.get('PICARD_HOME')
     elif "picard" in global_config["Tools"]:
         picard = global_config["Tools"]["picard"]["bin"]
-    for library, BAMfile in sorted_alignments_by_insert:
-        working_dir = "lib_{}".format(library)
+    for library, BAMfile, working_dir in sorted_alignments_by_insert:
         os.chdir(working_dir)
         output_header = os.path.basename(BAMfile).split(".bam")[0]
         command= ["java", "-Xmx16g", "-XX:PermSize=2g", "-jar", os.path.join(picard, "CollectGcBiasMetrics.jar"),  "REFERENCE_SEQUENCE={}".format(sample_config["reference"]), "INPUT={}".format(BAMfile), \
         "OUTPUT={}.collectGcBias.txt".format(output_header), "CHART_OUTPUT={}.collectGcBias.pdf".format(output_header), "ASSUME_SORTED=true", "VALIDATION_STRINGENCY=LENIENT", "TMP_DIR=/scratch"]
-        print command
         returnValue = 0;
         if not os.path.exists("{}.collectGcBias.pdf".format(output_header)):
-            stdOut = open("collectGcBias.stdOut", "w")
-            stdErr = open("collectGcBias.stdErr", "w")
-            returnValue = subprocess.call(command, stdout=stdOut, stderr=stdErr)
-        if not returnValue == 0:
-            print "problem running collectGCBias"
+            common.print_command(command)
+            if not common.check_dryrun(sample_config):
+                stdOut = open("collectGcBias.stdOut", "w")
+                stdErr = open("collectGcBias.stdErr", "w")
+                returnValue = subprocess.call(command, stdout=stdOut, stderr=stdErr)
+                if not returnValue == 0:
+                    print "problem running collectGCBias"
         os.chdir("..")
     return sorted_alignments_by_insert
 
@@ -115,21 +123,21 @@ def picard_collectInsertSizeMetrics(global_config, sample_config, sorted_alignme
         picard = os.environ.get('PICARD_HOME')
     elif "picard" in global_config["Tools"]:
         picard = global_config["Tools"]["picard"]["bin"]
-    for library, BAMfile in sorted_alignments_by_insert:
-        working_dir = "lib_{}".format(library)
+    for library, BAMfile, working_dir in sorted_alignments_by_insert:
         os.chdir(working_dir)
         output_header = os.path.basename(BAMfile).split(".bam")[0]
         histWide = library * 2
         command= ["java", "-Xmx16g", "-XX:PermSize=2g", "-jar", os.path.join(picard, "CollectInsertSizeMetrics.jar"), "INPUT={}".format(BAMfile), "MINIMUM_PCT=0",
-        "HISTOGRAM_FILE={}.collectInsertSize.pdf".format(output_header), "OUTPUT={}.collectInsertSiz.txt".format(output_header), "HISTOGRAM_WIDTH={}".format(histWide), "VALIDATION_STRINGENCY=LENIENT", "TMP_DIR=/scratch"]
-        print command
+        "HISTOGRAM_FILE={}.collectInsertSize.pdf".format(output_header), "OUTPUT={}.collectInsertSize.txt".format(output_header), "HISTOGRAM_WIDTH={}".format(histWide), "VALIDATION_STRINGENCY=LENIENT", "TMP_DIR=/scratch"]
         returnValue = 0;
         if not os.path.exists("{}.collectInsertSize.pdf".format(output_header)):
-            stdOut = open("collectInsertSize.stdOut", "w")
-            stdErr = open("collectInsertSize.stdErr", "w")
-            returnValue = subprocess.call(command, stdout=stdOut, stderr=stdErr)
-        if not returnValue == 0:
-            print "problem running CollectInsertSizeMetrics"
+            common.print_command(command)
+            if not common.check_dryrun(sample_config):
+                stdOut = open("collectInsertSize.stdOut", "w")
+                stdErr = open("collectInsertSize.stdErr", "w")
+                returnValue = subprocess.call(command, stdout=stdOut, stderr=stdErr)
+                if not returnValue == 0:
+                    print "problem running CollectInsertSizeMetrics"
         os.chdir("..")
     return sorted_alignments_by_insert
 
@@ -140,26 +148,27 @@ def picard_markDuplicates(global_config, sample_config, sorted_alignments_by_ins
         picard = os.environ.get('PICARD_HOME')
     elif "picard" in global_config["Tools"]:
         picard = global_config["Tools"]["picard"]["bin"]
-    for library, BAMfile in sorted_alignments_by_insert:
-        working_dir = "lib_{}".format(library)
+    for library, BAMfile, working_dir in sorted_alignments_by_insert:
         os.chdir(working_dir)
         output_header = os.path.basename(BAMfile).split(".bam")[0]
         command= ["java", "-Xmx16g", "-XX:PermSize=3g", "-jar", os.path.join(picard, "MarkDuplicates.jar"), "INPUT={}".format(BAMfile), "OUTPUT={}_noDup.bam".format(output_header), \
           "METRICS_FILE={0}.markDuplicates.txt".format(output_header), "ASSUME_SORTED=true", "VALIDATION_STRINGENCY=LENIENT", "TMP_DIR=/scratch"]
-        print command
         returnValue = 0;
         if not os.path.exists("{}_noDup.bam".format(output_header)):
-            stdOut = open("removeDup.stdOut", "w")
-            stdErr = open("removeDup.stdErr", "w")
-            returnValue = subprocess.call(command, stdout=stdOut, stderr=stdErr)
-        if not returnValue == 0:
-            print "problem running MarkDuplicates"
+            common.print_command(command)
+            if not common.check_dryrun(sample_config):
+                stdOut = open("removeDup.stdOut", "w")
+                stdErr = open("removeDup.stdErr", "w")
+                returnValue = subprocess.call(command, stdout=stdOut, stderr=stdErr)
+                if not returnValue == 0:
+                    print "problem running MarkDuplicates"
         os.chdir("..")
     return sorted_alignments_by_insert
 
 
-def build_reference_bwa(global_config, reference):
+def build_reference_bwa(global_config, sample_config):
     #build the reference if not available
+    reference   = sample_config["reference"]
     program = "bwa"
     if "bwa" in global_config["Tools"]:
         program = global_config["Tools"]["bwa"]["bin"]
@@ -173,40 +182,36 @@ def build_reference_bwa(global_config, reference):
     current_dir           = os.getcwd()
     path_name, base_name  = os.path.split(reference)
     bwa_index_folder      = os.path.join(path_name, "bwa")
-
     #if needed create directory
     if not os.path.exists(bwa_index_folder):
         os.makedirs(bwa_index_folder)
     os.chdir(bwa_index_folder)
-
     # if needed soft link the reference
     if not os.path.exists(base_name):
         returnValue = subprocess.call(["ln", "-s", reference, base_name])
         if not returnValue == 0:
             sys.exit("error while trying to soft link reference sequence")
-
     reference = os.path.join(path_name, "bwa", base_name) # now I have a soflinked copy
     # now check if index alredy build or not
     if not os.path.exists("{}.bwt".format(reference)): # then create the index sequence
         bwa_stdOut = open("bwa_index.stdOut", "w")
         bwa_stdErr = open("bwa_index.stdErr", "w")
-        print "creating reference"
-        returnValue = subprocess.call([program, "index", reference], stdout=bwa_stdOut, stderr=bwa_stdErr)
-        if  not returnValue == 0:
-            sys.exit("error, while indexing reference file {} with bwa index".format(reference))
-
-    #extra control to avoid problem with nuexpected return value
+        command = [program, "index", reference]
+        common.print_command(command)
+        if not common.check_dryrun(sample_config):
+            returnValue = subprocess.call(command, stdout=bwa_stdOut, stderr=bwa_stdErr)
+            if  not returnValue == 0:
+                sys.exit("error, while indexing reference file {} with bwa index".format(reference))
+    #extra control to avoid problem with unexpected return value
     if not os.path.exists("{}.bwt".format(reference)):
         sys.exit("bwa index failed")
-
     os.chdir(current_dir)
-
     return reference
 
 
 
 
-def align_bwa_mem(global_config, read1, read2, reference, threads):
+def align_bwa_mem(global_config, read1, read2, reference, threads, dryrun):
     aligner = "bwa"
     if "bwa" in global_config["Tools"]:
         aligner = global_config["Tools"]["bwa"]["bin"]
@@ -219,10 +224,13 @@ def align_bwa_mem(global_config, read1, read2, reference, threads):
     elif not common.which("samtools"):
         sys.exit("error while trying to run  samtools: bwa not present in the path and not in global config, please make sure to install bwa properly")
 
-    # extract base name, assume the only dot is
-    libraryBase = os.path.basename(read1).split(".fastq")[0]
+    # extract base name
+    libraryBase = ""
     if read2:
-        libraryBase = "{}_{}".format(libraryBase.split("_")[0], libraryBase.split("_")[1])
+        libraryBase = os.path.basename(read1).split("_1.fastq")[0]
+    else:
+        libraryBase = os.path.basename(read1).split(".fastq")[0]
+
     if not os.path.exists(libraryBase):
         os.makedirs(libraryBase)
     os.chdir(libraryBase)
@@ -235,20 +243,7 @@ def align_bwa_mem(global_config, read1, read2, reference, threads):
         os.chdir("..")
         return BAMsorted
 
-    #if not os.path.exists(SAMMapped) and not os.path.exists(BAMunsorted):
-    #    with open(SAMMapped, "w") as fh:
-    #            stdErr = open("bwa.stdErr", "w")
-    #            returnValue = subprocess.call([aligner, "mem", "-M", "-t", "{}".format(threads), reference, read1, read2], stdout=fh, stderr=stdErr)
-    #            if  not returnValue == 0:
-    #                sys.exit("error, while aligning reads with bwa mem")
-    #
-    #if not os.path.exists(BAMunsorted):
-    #    with open(BAMunsorted, "w") as fh:
-    #            stdErr = open("bwa.stdErr", "w")
-    #            returnValue = subprocess.call([samtools, "view", "-S", "-F", "4", "-b", SAMMapped], stdout=fh, stderr=stdErr)
-    #           if  not returnValue == 0:
-    #                sys.exit("error, while while converting sam to bam")
-    
+
     bwa_mem_command       = [aligner, "mem", "-M", "-t", "{}".format(threads), reference, read1, read2]
     samtools_view_command = [samtools, "view", "-b", "-S",  "-u",  "-"]
 
@@ -256,14 +251,18 @@ def align_bwa_mem(global_config, read1, read2, reference, threads):
         command = "{} | {} > {}".format(" ".join(bwa_mem_command), " ".join(samtools_view_command), BAMunsorted)
         bwa_stdOut       = open("bwa.stdOut", "w")
         bwa_stdErr       = open("bwa.stdErr", "w")
-        subprocess.call(command, shell=True, stdout=bwa_stdOut, stderr=bwa_stdErr)
+        common.print_command(command)
+        if not dryrun:
+            subprocess.call(command, shell=True, stdout=bwa_stdOut, stderr=bwa_stdErr)
 
     samtools_sort_command = [samtools, "sort", "-@", "{}".format(threads), "-m" , "1G", BAMunsorted,  mappingBase]
     command = " ".join(samtools_sort_command)
     if not os.path.exists(BAMsorted):
         stdOut       = open("sam_sort.stdOut", "w")
         stdErr       = open("sam_sort.stdErr", "w")
-        subprocess.call(command, shell=True, stdout=stdOut, stderr=stdErr)
+        common.print_command(command)
+        if not dryrun:
+            subprocess.call(command, shell=True, stdout=stdOut, stderr=stdErr)
 
     if os.path.exists(BAMsorted) and os.path.exists(BAMunsorted)  :
         subprocess.call(["rm", BAMunsorted])
@@ -272,45 +271,6 @@ def align_bwa_mem(global_config, read1, read2, reference, threads):
     return BAMsorted
 
 
-def compl_rev(read_file):
-    originalHeader  = os.path.basename(read_file).split(".fastq")[0].split("_")
-    outputFileName  = "{}rc_{}_{}.fastq.gz".format(originalHeader[0],originalHeader[1],originalHeader[2])
-    outputFileName  = os.path.abspath(outputFileName)
-    if os.path.exists(outputFileName):
-        return outputFileName
-    
-    fd_in = ""
-    if ".gz" in read_file:
-        fd_in = gzip.open(filename=read_file, mode='rb')
-    else:
-        fd_in = open(filename, 'rb')
-
-
-    fd_out          = gzip.open(outputFileName, 'wb')
-    header = fd_in.readline().rstrip()
-    while header:
-        read    = fd_in.readline().rstrip()
-        comment = fd_in.readline().rstrip()
-        quality = fd_in.readline().rstrip()
-        read    = rc(read)
-        quality = quality[::-1]
-        fd_out.write("{}\n".format(header))
-        fd_out.write("{}\n".format(read))
-        fd_out.write("{}\n".format(comment))
-        fd_out.write("{}\n".format(quality))
-        header  = fd_in.readline().rstrip()
-    fd_in.close()
-    fd_out.close()
-    return outputFileName
-
-
-import string
-
-def rc(dna):
-    complements = string.maketrans('acgtnACGTN', 'tgcanTGCAN')
-    rcseq = dna.translate(complements)[::-1]
-    return rcseq
-
 
 
 def align_bwa(read1, read2, reference):
diff --git a/de_novo_scilife/align.pyc b/de_novo_scilife/align.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ceeae65b8af58c5a29af03150fa27fce15b01683
GIT binary patch
literal 14125
zcmd^GOKcrScCGHmzerJ}L`k%4t8GamIwMjdN8`z8B#&v5qUeYeryupPV&|dxy7}1D
z`?0GZKNg83mYo^I0RkggWtAYCc$L-cvdTK!1PQX8MFM0OWD_7r&bigy@9`H2f;k8f
zNi5dCZdF&Ed+x1L?%(@*!oPYaUsTB-8^1rqkNzhFKK@y%tklyiZmFkMns2M8b{fyA
zr@1uVrk=K?@pkpJJ&m{a)1jVrr1_odX;&KWR!_T?Z>uj+xkvdqiJVY=n?!n*-!73p
z<#$Nrr1Co@(y#n3iJVe?w?qb%-y@Mh&cZ*TzJ%~+7|)ONVs_zw<Ih4dhiD&uy6`iF
zAN?(YSg9TSv(yV)?dH_hpQs&MO@H>a`V{FlwcD<CFfB_B?{-McMyyj}ImEgo)`nQO
z#M%+VWIELD2^Cvv&6b`mwTt<nm8G6_sOSqukQ_#(DkR8>1n3?$5I>=IdesXvT9>-L
zepWq(NcoX&NV^cXBdmL2G@ivK;uFZLMPX2ly*R8@;|`?5st%$UePf=nA9BudCxg`}
z@O&M!H?JI)sueUFX=BN01j~)E>=&Y7C5VD*F(@o=deU97R;i$8u4ZvHC_XFr(RS3R
z##~S7hBXV7pfbW7$u5;^%U-!qtW{USlH_QwQZEN-&KszxYf&8dg>twYd6<AMEN>SQ
z$thqahlp-!ZOe3%_*`|RCJVtynI&Ot!sYIEIeC{;bGd3V4<q&nl1c)753%d@u;^W1
z9lyR2eExZOeR`&_@Ziyd!s6WG%>B8And^Q~sMa=W1zilw;Yx5IEekMK-<IVmnB|aF
z?m_38^Hf&9ecI~D{&raXR<G4(U9x0X4x<J|lwJ82#10iVBew~69?lpxBeo&76V4|#
z9=Fj?(gd=HeeS__RlUd!y@qzSdSNO3RK>Ph!+C9I(!y@z-t16YKR4U_cV(M*DsE()
zOzuf+WW>4<>ycPDV%U<%>p`qnVkZ#mlUOfeC%Iv<bDwpp=<n6eNma6BBloJ?J2?7%
z>PHxHy_Dn)ueGrb#X;U<&a_``{i}rz)*z+A;rnNchp$)d;)I`6Yd8ux5I95w%Jlcl
zR?)b{)#=CZ19FHu)moQYgYW|~@w|mCvxXT+k`red^Dt@uR7<ymE!}=&`2$j9I_Ic^
zd;1?`|Iyz`{}>T7mz<q`N_pFi;!Bpb{$J7*D|L#Euo9H~y6vA<{uyTCV+^Bt2{?0F
z#h5BOIW1E~F#xQC4fHSV<RcD=WTqMV!#`{Nmv;3J1bWU=kWx<V4ymmR?0ovO(`pyF
zXoVF&YpT!KkGVlkF`MnV9nQ#}!Qx7n9L`80HBfqpB<B)ITBI;5{PGofT*4PWr?zgY
z7z^L8)-Yj3S*Aa8)b4q;gT5gzB3O*`Sf2sTO6K|>_G6UhN6zD@EVw5T#>6$iN?faz
zwE)4A#BhYvGJpg<t00PMk+ZoPmIEh_w!>=4iE9qP%W=|{w;Z6CQ?12LJqk1c(Fv<g
zyc#%lFJ5)LsxM{8GnnQWOuOa)%{(1A74KQ#=thM8YiOxsuUwWM>rt&9MCEO4a)6Ko
z7GwL+N${ElnlV6I;rXthRjm=%8?h#gnB1{tuOgM6?@E*%jg4V@ph8vi*QnNS;+IVN
zS~6>0TZx@=SbfI1qiaG<C5TFaertbKX0z4`R!gJq$QjgdxysTCt5{q$N@ZKegw|jy
zj73(jUJt6i=#?-IBK%ae6s**uiWiHFHPeQxDn)3lV=W(%Yv_lO`H00;L~)d$Tb8xd
z<_9g~!w8}_ma(wKK%3r*SZz@+^(d^y&@Pn<zL-n>A%hqlBZE+|XTi1}A>g`Hn=Un)
zD>{pl{AhAsW-gn{9YT(bSZ!41M4xK6pY=~M7+^5S;E>OWBh>6|o@?!k-ZIK<HmWu1
z(lh4Ss_^i6lgjL~Se7<X5I3UgBd^>DWKEJ8N*oO+aC`yP6J?rd^;T`l^~20Z@as4f
z9~T7LAJZ&AqJpfUhy+Rr<=0{L*d5j>YY1xZ6k@HautyYOpFM>9;Y2MCqHdo}6?xVk
zw9Z0NcF1q$ILPm_25jV^Hgn!YjR9%>wo#}2ru?K+_|bocg9R-Lb!s#tIGIt7(dhmh
z)Tv&fex;U$LZy~<(*)Hn)aWoPDYZ3e2~Yg7^(i1%cp{V>M?mWifh#83sMA0o0bfvK
z09!;v%XU)5g1$njg0Ci3h5z}`br7+S`iv^>q#9-;0Hu&k#-Y3r@08A<t+F`iuV|JM
zRHtZjDD$Lij08Q9)<h@zD+?Qt8nMeriv)C;Q2{6e?>?*ib3%)INa+0YX%g}p1%WY!
z)edIx23hM1%O-iiNS&g#zOeiYYWCjx7uNb$qJU*_{5NH|^-mI3O9WbgkT_{PkBf6t
zlkRlk-h=rWIlN?Mk?)>Gk$Vn-Cgu?O1X2Zo*1Qc*6DvlaRJO+7FKHgp(I-!CEe25~
zAATO(nJ7uK(KRp9)E!f`aycl*v&D}>PtON&6c+Uu3whw(nY%OY%)-=6AwTo8hm72L
zzAJd3i4b!Oiw~EOqq)5wJX|uVv0`hNWARq3-{C;_Cf%jNQMKy+iY9(c=JOBdG28=p
zX=eIP95oCsKbpKhH$AyD_h6xrUvlRbW+BTbcka(D%*`w;X`Z;H`NhKYoO|cGE=FEl
zT-9fg3C*xq7!JdNy}2+79yDUuC`xS}UOW>;20vpTdMJ%Tv53|H0xy??3iW7}3M1`(
zifbyGXl!gmaFPolSQS(^!U#qLv8WWp?iKuY-(@Kmb{U$q>Qw^KGrC@eTH&`|tA$n9
zLBzepfHmFUU?BL;8c?xSm%!$}hd>Oz7HIzw3vM&uwh%cIkeB_oS&OI@3)DA+45M)i
zt1u#h$o(PP-DEo|=48o|D3*tV9tWCI5VP?MQ?tuh&jLs1tO=!qfKfszVez7M%esoZ
z3y2d8`w{E4PFe#72-#5vgnxcKKsalF@Tad0ggY>Ay3`pWR*HwfKtKY~(?DXMKq5dV
zgFrX|jz&RN6I5%$p`a2hnxxx<aQJl!husI^5M`}!2>fe>L&$325S;=J;lcnM(qOS^
zK>-~N9R4DK!(UkIU%vzn-COu>?oSxV&OHMCLg5{EQ0>*>kKAy|V3lDs1jtqh<N?bt
zj(cZ*Zeecz;e26nYU$1meHA(P=JHDqX5Gp8!ri(1M8_0xnmvvYbS|Rq0qkq3LzE=`
zHT!!!H@$R^;7YCV8u&+WKT;ePCMeY+ggMmi2PhQXaA*yjb@!*J;eN<!JofwH@2E9B
zfQ0TO`}v4LhJP-JiaX6<hJnN25V&*ivY-id{5(fOojc3Ydkp3f2*$30Za)tFpb2d5
z$E@&M2-xj$fEjl|0%ix+HraSvsG_fN`~&dTV~tujjs`d~J^pY3^&cJ&z`@f%kKlW+
z4REi-^n@eQ+mUj#gz*`c2b&*GPT2P+4r6wwo2|&~8Yz?=!t786?1F0n#yxy2hS`DW
znPe-oJ15u$GyEX4i!1;gm|bM8H(H?VjlIl{1Zm+Fq0D_biv6I4#1S>-z3AC=13aPV
z#et!3UC7R`i{8u(ec-=CT``$?uI6VzsHXA>e%;s|tL*J8MW*kLnQ@ao-)#|t)F=un
zwT*y+wW830lD+8jHX80npS^2wteC@zyej7WI$?&?J%O3MAM+a&9KoI52U|LnJ5jN6
zqNI8PEmO#$F-C8!*pILR_j+T0c)%MAJ8_>kcIkjO78Dl-B0ZequSMTU4tS68;C_>~
z>5xM%nK<Nf?9do&1@5%Av>6D8)c92V#k5Gx05J2!eati8nPy@PK^OgRk^}2cl4;j)
zTbguJB(Y!hjAaN9hz+0(1H#~i3lmZ$ZscyU|4_~No_h=~%%mM-!l^tom0>@(OdHlP
zAuye%?zCfoAFfY?1Ja;H5_dr+iTUC~z)@(6>(f}$@MIFRltr=!AEU8p(UGBPAZT!Z
zf~=t)MYH!v-Ih9A2lvQft+(0hoi_c*W<@dgeUeV3EyaVIP8O>uOG71+7jIhPxy)g2
z!Ry#!+J1=~m{&(RE9s&PtA4O`+}=tdI3mb3C%b_Wk02a^j$eagkgihWymRSF6T}(S
zVw3BMBd3x1AzQlD!Fq!?O_^77GZx#H*GV$l1K7By1lTABqf~9-Xz!;&q&`roKtuK5
zj5b3!d9&3`M>lKjtaxy*`6JXO#`vUKb4jCJl2n)0tr)TH_ZXvg5&k9?Bdpxwrr*Ns
zF0)v8U&i7Lsmn~-{r0PPx#`^i=9eOPy6Gh;HX>Zl#bUM64yKE=AjUIOw4_answ7*k
z!b+`7qxL2;#rMdP#HQ^@Dk4d3L%a8w_@K+Ve2X7V4y^1RAaLp!9E5E<Z1-B{ts%Jk
z=mH!Rd-k+--oB97x3qI<<6cA#YFI^LG{K+BU$8pD$)dv#{x;l0A&%=Ydh))XCWzwn
z=AlLtp$$lUoT9BcvpDW>`2jHzw)E6U6Z8j4Pn|7236s-*n)YOzf$Yg#v*8BlZ&MHf
z?#s9^bAhCQELgz2RHj&&X2@y?jwUsd;?&U9Ig2@Rch{mF0FQ<P605}3*X&mAR5-BQ
zt+87w2f!jckEkkUF!&)UX_f?Z!Vj4xVfh*<yV3}Bh)4+;qHjecfPI=twu$=aQ?A1-
zRL%23PnfMS)!DZ!JzyP6Ls;)pylRA;Ly1R{j>utE!o}6ebXY8dAimzTVSJ+#cplf#
z3Q7WNYT|O`$ZF6x{WRI(iS*wR7JN#QZ>Pxt@e)9(^otgO=0v|xJ8MuF@H^n!z|rc+
z{oOm}LNRBpZ&Ei<%y*T4i3~fNiho)8S12IXl(Ru&BavJZQkHZP&D!aq9HskSDvsO?
z`d8DWxvkqH9U2^z4xV60cLq~{bVE5vw{M*2L9Ur2%f}q)$RDA+B~ikuB74b)9C;7t
zZ^2ze*rL+DK=LIj4Q~dHTcOEC&i|FWK(UC#m0TDfTk&+f9%7Nq)t^aAA&iB%hI>HK
zi<0kUDiU{qbY{ZS(WshBAF*uq=izoUF$KisiiI|Mdfxns<rl64=|tTKgH4mYY<}g<
zuZDPtN42qEoE<WN4-iPUBR7*!BYSjoqDz>UYG=I~l*>)+EF)5IGscJJS4FM}$7f}3
zkiEbfNzVuJ*fUvrRCyKEOub;Vu;}Ne7Is)5id!5ncbODboT+D#bqPNC5h@vD`803-
zPBO2Z0SSV5m*x5mYL?YSStn0bK7v-3bOkr1AR>~cx`sR2d_*ZlD*dQ(mztlPpXb%8
zAM^84W#c=hmz*&NNiVSr{Dx_XF}<Fk*!={--ijP9tN1bN9HRrs<$m}F{OI37Ko3u<
z?z32~1G(?o7md=lzMrW2w?PN`thU@$sQ*E@x#`~Kt=|=!@xhk#WjMoGS02;wOK`1(
z4Dj=j48TgT2<uHUkR&oP&?Z_}-fSTcdP9Q@;7tm=(|gDO?5OtEo*HQ)BLl5HWn{qg
zl#v0DR#<&>Z_C>Um^`2dR91X=BeWp;7qS7Wbg}$!QJ56(N#i9B)i#(6KC<Z(O5fON
zJr;AIclL}5ZEVs|{HFJ&ECTqH#VAh5Vhl!-00<%y(oYW(kR@S}rQ;?MC|+S5kT?1Z
zkpVMEGtr#YV9+HbpiAYm4h-wis*R6uvJpTSV2ou+7<uV{t=0I&9?`~w76qAr;D7vx
zkBD$?@Z1HT)Ofvcs|i+tRR@8<^`<!LEiw$a#k$09owt+EeqIKJR>!hrs_?O|)X$z!
z*JF0SvYVo`h)7XT1W~|5UMh-pv?%}h3Zkt4N~}^02JU7C>$3Px3-bMbj`JSojRZIc
z=Z(#Q4I$E(y2+rhYYh?d!GA82*N|C(lwjGwcTnM+#7R~xJjhD&!GpjqKo&qVl2x?V
zKsMlvKr6{#gHI9Xy}BWb|86=wRvwi4*D!GQrIH<RDVcrpnh3P4xu*uxZGzPRSHoVD
zS6TRwR<kweo7o1*NB`fp^az#8>(C*uT>XDVhlEIs845&23f_~ji$Vy0zIt6UL~<}T
zmbM-r(_WY+CYX>LAK^t<aYI1NZTN2rPiggmn5Sc+*Np?g^GSoek{4EWOfp2lp@!>$
z>1fgX((Z3FpY&=pznbu>yE5UM?x)B#?eIFcn)aXr*JB$>k5>o0_Z^YjKU)1S@jp=C
z8It2Lc8Nig=?KXYe$%8j6nl}^=R$J$+*RgoTnu<d)u@VdU4$!^YdSDoYajh7a-ci|
zVMxG7<pvB^7?cp0`~6LCvRo~Uw<acp!>z0upF|E-i{)|oD8;fVtB*^S`UH+)3st%&
z_w&c(qG-O2=A-5_*oC@|!4IV__C*kq>mVQWMHu3e&=yb@b01IY(raBoy$h(v_y~S?
zSnt5kao)O&2T?;w?5uqjZHDnYO2`a1K?i;^&&J;%0YC?Qr!)pwWfI6Xo(Q|e6Jcn~
zbn1!7Xbe;XX$`OrOaSZw<Oq*r;o^XTK=<$r$eSj(ut-qkdLCa5Y=k+`GO*(jAtA|`
z_wgVZFR(tal&RQHP>n`^&5rUQ8MF65C-R^W`4N6YeFH&{gFkqm!?%IiD@>;jS6id*
z)T)l>(4raTJ2I(iwpQ~V{EyJb(RO9Zh;KE!{%+rPrUT%)sCnR-JxkKOlLfcl2qLc(
zj1~2UreO@Ys3fyPbjR^FpE*~b%Y#Md^29ag^3C0=Vpq8&$xTbi;38eDQ?*9fJVV8c
zQdzd7LqW*}d3JvoK!{Be1@*F548$?%%J~QTRqir_1_E(b=9i{%UnWqn>EZPadAoUf
zSgiTM2zR0}f9P8^<_)WuX~`qRX0lMM`0g%BkL!pM4-0sER4~IY@sRR@Uv;F`1)s@W
zwR&)%+U$0#m>wp&AE)r6{}TarJ6_A;2(8J%g1YCy+O>pDiz$HTd^jpRWH==0nZaoh
zCSc>@0%ry%2h<T~2`7XsUfDQNSW*-r$=7@M2uL6@#H$*-k%k3<)0F0-0H+ME=ChnL
z=3O9tb2$rVjBP+`aZi%ANvQ}5Kr)4|TdaliXCBux0fr(NASGakCV_5}qxb`?fjNq0
zIP}w9>#RhzLr$f<MK-6gjCQg$1*>&XcRxdp=m~nEwPe@v+E*aGwjnxiOTul<yl8Y(
zP!^ApgU!&7SLH1WMCL{}XGao?H68N5v<PyDax?0{mtSEG%R%pVg#YVeo)PoET@-NY
zWE<BpD<ff};$EeM9J^kQMbgNt!aq;#yj2b>as;-^-f~cu=$466MaE%_7q%Qpdm91%
zGmAM3U$KZ=o|+$TllgXyyUbrPGM1%@w^1S-BqL88*Lfi9aJm`v;QxRmC!(zbuV`UI
zTW?r5p<Mcszs8^OCfdmzhAhOkS1C(`@7x6_WGysaNLEXx;PbF9d3C(8_O#2}RRcR`
z82d4U`wSKsJYc|u6DNTCdyM@YfpoP{s|N00BGp_IiG8>KC5Kz;cBSSw%E1R%5~Upk
cH=u;Gzq#&#?&<Df{5{`2+WkTIhuvrY2eOQk;s5{u

literal 0
HcmV?d00001

diff --git a/script/assemble.py b/de_novo_scilife/assemble.py
similarity index 99%
rename from script/assemble.py
rename to de_novo_scilife/assemble.py
index af9ad3c..aac89d0 100644
--- a/script/assemble.py
+++ b/de_novo_scilife/assemble.py
@@ -181,6 +181,7 @@ def _run_allpaths(global_config, sample_config, sorted_libraries_by_insert):
 
     #NOW RUN ALLPATHS FOR REAL
     #TODO: must check if path is correctly set
+    ### PrepareAllPathsInputs.pl DATA_DIR=$PWD PLOIDY=1 PICARD_TOOLS_DIR=/home/francesco.vezzi/DE_NOVO_PIPELINE/tools/picard-tools-1.103/ FORCE_PHRED=True PHRED_64=False
     command = ["PrepareAllPathsInputs.pl" , "DATA_DIR=$PWD", "PLOIDY=1", "PICARD_TOOLS_DIR={}".format(global_config["Tools"]["picard"]["bin"]), "FORCE_PHRED=True", "PHRED_64=False"]
     print command
     returnValue = subprocess.call(command)
@@ -404,7 +405,7 @@ def _run_abyss(global_config, sample_config, sorted_libraries_by_insert):
     program_options = global_config["Tools"][assembler]["options"]
     if assembler in sample_config:
         program_options=sample_config[assembler]
-    
+
     ########### HERE IT START THE SPECIFIC ASSEMBLER PART
     
     assembler_stdOut = open("abyss.stdOut", "a")
@@ -417,9 +418,12 @@ def _run_abyss(global_config, sample_config, sorted_libraries_by_insert):
     if "threads" in sample_config :
         threads = sample_config["threads"]
     command += "np={} ".format(threads)
-    for option in program_options:
-            command += "{} ".format(option)
-    
+
+    kmer = 54
+    if "kmer" in sample_config:
+        kmer = sample_config["kmer"]
+    command += "k={} ".format(kmer)
+
     libraries = {}
     for library, libraryInfo in sorted_libraries_by_insert:
         read1       = libraryInfo["pair1"]
diff --git a/de_novo_scilife/assemble.pyc b/de_novo_scilife/assemble.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..50b93c43b47edeec17c8ce48d268e627093f5e14
GIT binary patch
literal 19602
zcmdU%TWnodUdGox$9CcqJ2xk`Pm(4(O`14q&T%eH({}2%PV6LZePi2cXlBaM$=TcX
zNzQez?Zj?uA(7EY3<xoAjNk>3&`3N`0VCi6p&|q?2nlHff(OLYAb6=zh307l-}hf@
zpNnsk4Czp6XM5e(+H38-*6;tVwVV8+H#PXhe|iOF|4ZQSd-$inkBg6AN0pSi*Kj-P
zo)d>B)V)OPPO5v!*xjM-b;RyYb+0pax0chT?sX|Yp}vX0UCK|2W4H1<#L=z%PI07^
z-zAPc%HJi99_8;AN3Zg`#j#iUDRJy$Yv+dcpykE?#(&cV-c6_dcLRU*C%8mPJ#f^T
zqn<iy9XBBEhg~Z4)ymH^93knkkFTlqq>2(MN~#APD(X~eH1|Q5T1ztH(}Y^@P;2;A
z%(9kHPm^lhQS~$GZsnkgc5RG6U(O9BQT;TELL;ixlGeN6%7x37+>`a8-3;x*rBE%G
ztCc9tI9&~+T&Z|J%!kE5=kBlOiWMD%5z1F;F^KX}6rzE+vsEo&<*L723UtKMa=A*r
z9OQCR>OwUvBlSLpvqoLb*ZiPTeN-Ki_VlvAQuThmlq*y#3&q8VAv#~Km4Z0r2t&}K
zz~8n_Y<#}rM=2z=?%YCU2tFkjeI2ghTCtEHUb-~=DEQ=);_%pbZhGd!ncVEe?D*uw
z^!TtJ<k+yBE)+|}g<x10!eT8N&TAc%@0WsXZB_0Wx4~+34@-CUIX#I3&Iu=-EU-fk
z;U7CehW!C<5u<wRHii(R$}vu=GzQQ-3s03=?-Vyi7}1?F$Vl3yq7H@3Yh4`kaDqAK
zm<Nx61UZbySY!C*VZzhkcc?U;ku;v*G#=i8b$JvTp}SD;ZkCUt_|W~HJU(pAZdJjv
zv`a0bq;9p=tsdZg*v*0+>*;Rwq*^_D#Iy1bY{}Tif2Y2I#;4T!9<_!Srw4`ih^rT_
zUUBV(Yp=NW!L?6Z`{CNJ*6?B}^>DWe@5S{VXszm?dXQ3UJq%*g>i?nE@CNp&hY1z_
zL7d~z4ms@29NwIyx1~_OAH&7&Mg@0mmVwqR(&bKs`x{h#T3*Nliryo!_aHm**6pZA
zJ-|I3m7$d`^pB~v1Im9%tsPYUaTV=FxykiDc>-isl0M%slqbwkZWtf8wMi#iJzI?2
z%HNpr`HL4YKHVz(cB0wyaGM8abI<d6#dD1@mO=AhR%ttS+m1vtiwsFimio67_>BkT
zRO=0&ZuPtpd!DENH@-b*w(Rn=anXD~e#Tw?<7BhT;r`il`Hz$JA1B%6&(oJ?mhy+B
zv+OHyPWk7R|Ej#e9gJ9%wajToUr>H#hv=;Ghc`!?`LNRr3s>Nan0KdjAFi09E5*v9
z8->M6F<N!A*{m#6)-NklbvddnN3tr;b5)eN{XW(?PVVz2W42b3eAQZ1tX8z{MpP!F
zgZlhxEx;nJ)Iwc%NZ|WZQoz-Vx1`8gz8GF=xGrOYP^yZ>w1Tx0MM(?nnkwsP<ZCV)
zC>-s^O3JdItV;n!+)JRjL}xOkU?Gy_JCg~Ei%XGy7s2+f-g0%d94+y_ABR<3tD(DG
z307)B0ZXS_thmuq;F_h?HH)pgP()p(V$&abZ?nLxj{F&Pk%OSe8zaWUP$FEp6{JM|
z*cl*>tGiwbN+o>|uH~p$8qSxBixn@(`x^afYmM?DctUg0kNbQfucLes0m}Bb<O6-M
z3+`7RN9c{CaH0!9BE<^WBnrZE;1}~zkdr&ouOeRR^ybE=a^fO=h);^kRdT?UBG?4$
zdP(ZfWTG%%F^j&I0l-tkaVXUEVf?Y(Jlxh!xs505S!uL>sU;cf$`*B%1f;WJpqERL
zmZuZRH-(dN;tC|^Vo(X1+4dtE+1dh!7$^;_YUvwvWZ|(7G*GIRkCw#opjxbW9KeXF
z{bGoRp&G8{f|a6<w2V?=3Bl1IbA>@IAEFPdB|ivrItrHy(K33OJ)f`D0uUBUpRQH{
zk0>a<D#8W4^z!{$SS<uvOL!q)Dv4u0Tn>Z`b*)s4f|Y3K5b}C_nLNI19$$`#-Wat)
zDbGBS(|}27d|^2ZQPWs_57B{Uj@4!;OGGmk!+iPr#I%s7_2%qr?2+8$y*7xuDsQKC
zPgEAF(k&q#RPzSYb=hMFd8A`c#){q9XfC=+-jq#&d`ozD=)>i(@?pNT90-rf<wZd$
zZ%5$TK0y8O>rZh}PN&o5q?{h-FgUft>BY6*IS7AmvKv10(}xuC4`Q4&<B<-oM5nV`
zQtd;|&cyj7!?(^kh;(Q0FOx8(b~@*iKRGQ^^@5)H)9VO9vv)Zo2|<UL9RDu}dILRo
z0a07&`FGms84owiL_#z8w_DK*Q_^?=tyr$%wcwl*j{)aJ7yx?|raEvka2bre@*PJ|
z`j;J|^j<|ohJ8tZ1|q{nfIc8D0yIz)VFc(taUFo`kl^J(0plMy7OBvR{ah$n>S49s
zuh#n213+t^@<DD0M2KnTVf6se4q7){YAln(OpjzRHic2awLR(@nI84_sE4~$xRx+(
zT;B%$6Mazyib4wfq85|4zLuzeAx#{RCO%F)cM}I+TocpVHxWR8>o%Gy&<Rw>+q80&
z2FuvTeQJF`t;tBP8RMY-9l$99Ogcb6SRbGd%4ZiCr1wu4>kK{UyF|#vF10?W)&>b!
z`_uyr@F2T)J+0Q#3h+zQ=7XevO8KXi|BCX@DF3ViTA6%XXGy9spY+eM$OE(&L0&Rq
z;G;@EA#95X%WTb*Ll^M=O>555F-c)A=bEAgrTBWr0UqZ(k5jS7h2A56$#RN?c`>Xm
z*L1d^A9=?Z%$X3um}w<YpN3!3h+=TTwNtS5aU~aLN`Ds#+aochKfqFElUBpUe5I(%
z)*FG&F1SRnz`x)wg!#psE`EX_<BWVicys|kL`lQO#2y>RDL>c3YZtKcR?Ec#Ci;9B
zU2tPBR>V}1%d5vp*BjuaeWCD(DnbdlOF{{C<9XM5*nP9(PuAt``RB=cN{=8A_e-zT
zgr>)Tc?)GMgBRROOl$sKaCtkA^C52cB7Re{Vw1?gN~~dOc0&#CJYx+kIj#lewN#<c
zp$I$$=x06`dbw7EM&nj1rB$?r$m&9oacv_sdu7(}#b~KC%RCbm0JyLq@o#K&el#~W
z;f<W0y*nm-G&?ynG4|2OC4r{di5sKdSZ;o1W^zuFfsTnK)O12$Zt~`gcVj#^d)pfy
z8zFNDvWhqN`fDRM^CcZ%uEo;N>b&Jj+>XgxnzTCWjgMrdAaDHUxHmqH!lp;37}#jv
z+@0%eVphZ-@6Pl{{2GYWU}N#l==&&ebaHZbbpH07+g6uLSS?lnp1%9I7%icfTmW-5
z%(}CsAg=?ruoM&?y78N5V9W(Zy5(vZpl^+)CI$)dG@I4DtwwF_y=*%9MOHc_@A)ZI
z>G1{jUg6~oFY+RsrHg#y@d5ETl?YEg4#9rLEFcC7f8lv3<qL)-z4LL(LaC|)FT>nf
zhVdq=g&~5$259syG3+ugS9p1im#e(Ij*E<Qv2sghAu$Yu#fp_lOhCrZTQL(0p4Xez
za6;lDzRa70$BKnWz_dAMdDmFk8@#;9%Uir`m;l&6!oi%-#7IQZ!k`}Cb3sA5yeJ;z
z(<(d|Kb*ga0F6~>OCUUGfslv?bW3yr8M_kgP&kM*uL?+}oR@)zZT>`eB88j>oK$B9
zzzIPV@w|2-bQch^SI|@7j$Om%3EU0CmiX$!9moz$2b#0W>1TAnn?@*<Q<7dvEdrsi
zX#nBV$gBS);ZOi28^Wo5nMB9~TL(x7mLqwISPE^^-zO_IXbv>E@+*b|p9%-|5(NoW
zNM%J)XrBks1mK~lpn`{R9YPrJ5U#^;0T1EohYJK|vJXg;4zeoY0uu_NLPHzkDqLvB
zHKX3LQs|tNgCSlXA(iOA68BLdly?)h=wq!VzSQbDZan*Ag?E3P<%HBLH_i&`za2-o
z?Gd(ylM*T*E6I;bGYlxfZSKX2gT`aULF2LFpz%Cw4fW&BU6zc_!Ry131BRKAJ0t@M
zBs1Q_BINeA2>QS;2w_$+7P-m_W02dyXhs$DA%2+$#fZci<!7mP`4@?p{w2uz_yfjY
zgoPmB4usn)!YY97GSS+<qSmNuCxE3E=3X;hKEMLVk-Iw>0h-_6Y#$oqU$yNERfj3m
ze|?Jt{x#|OHt|lHurc);EY5#(yVP$f|7$|9))qM4;lB+cC7F&i$<(&JvFG781BS>g
z)0*>hygts&jLw=JC1F#V|IJONt?0BUYXfzRhvgs?p(j%lE$a;<5-ySci~{szT@!7M
zTDBaDc9)G-BEz%X)hg##av?URg38FHix&+Hidgsc$%*S9r?%xnf#Lk4MeeMPkogGJ
z(4QWPhooH2*QD4h7gGX}vW8S3y$F2dLAwj3{GuMYk`jrHxSOjimve>XsJgJgU}||`
z@CY{<<RDs(Tuce*)TIi-Q~grYd6`b{J%ny3|MJ%5U)@~(rIg$iyWeVuZGMU&RE;ol
z95h)9u|$~>&|+?6v%{PE+Qffg{!Chx^`h}sp&^^`9yPmkt3r6*afEs&7-b2GwhY@9
zC(G(Weqk=C3tW^R$t^=*(}e{q<%u2BngQ*Kii>f=LbPZSN&!&#SWhO@Uz2*ESheV|
zLU}EZY33cbQl$cm@<f3p+y{8L0$+cK{4GgeAS*r}wsD+1vYW@H+Iy9ko4AOMSI$3#
z+8XK>M&r|DG)3<goI}Du-OSZc_;!Xh@8_6!4i^#oI97K5*N__DSH2$4LR=gK?`>ZA
zbbL9K<?(&}Tl~}C!euL^&4N`&m*{CdxE@OovAUc-Tw}#81x2kNzFt6+QO<gxyY*WY
zjv@WdO8_k5*)ibR*PPCdqtMuToRdx#Kk3BocEqBk&9(_*{bLJanSD*{K-;ZoY#2ZH
zR<uXJ`H5T6*n*a=Xv$`08#)fzxDEaLERSjJHZ*t&WW`-_>|v9zK&_1sli8|kkkf>@
zj0^+95<)TRT2IRo)+-xFW@*LVc|r|fxgKaqyzhAux2~S8C4<T~mn@X!*am_Th)|}1
zed2*N0;~W40^6tnLYNcP46sM{KDF{AvvK>m=ym`@pdA=&06hb=gKH2jpdDOkxS;$A
zp`{`5(RCDI00!{EF=2olB|0C=9yW15O+cP9JL;gDH3*C#2#9>xnnt3hP^Myqiy9W>
zL(#e97SP5MBL3AsP1HYgSiURef962cVp@S!D3i}#CR9w8xlf@${EC(a)Y`rWUvBMA
zi+rYUE%F(UMLy#>)0)R_=b9L^U1<mBTx&8bikW2FGzUXVG=%E=s>r-}E(b*f2HBa%
zG~Im0AhTkxla$mg2Eh?!g*==e$Wj3W`a%8!%*EO*M)D$?q<@eE%-Og2vYgl0KJz-H
zN~n;BWnlh1hCiHCFcgSGQ&$wYilZe|hX$dF@cSL6HQ>S>QiA##8RNZN5imDq12(o)
z;J?xIwr6`&`9Q-f@y2lduRFkr;6n5j`LH?X02AdI<aVz?L9Wv=3Qpzcmc1LJ<Us8r
zoc<7gZ1h2=imL8?$R|8mL;(%*i_l0pub0C@ek)Rx#V+3D*$D)>`iaz&(qzK-$0sL0
zx;Zg-JIBBgVFQpP<q^YqbDoWniF(v}C`Q~E9iA*!mRB<Qvj6&P!`zSN$~QheX4s7?
zSxR8FCZr?I^0t&|)JN`K&W^hyQ1YIv<I?!mNyGsAjD-0fJem^b`*)^hw@OVaogMX{
ziOr9Db1B&sz2%M0-p+k6HSXo+Chm=o<UyY)VYxeV<2jtG`e0)ER&Hu^etd2Oe8>&7
z$%(0n`5Y_B-I)1s+#9{cw!IEI<&fTBcf@rdy!xm#6@+upZ%3|NbPJ1*fsxhF11*hQ
z%wE1~;-QSKj=Xm3x~V`~b7SVt^n7j}YT(@MnaMGfgz7mu(|4vMcyw%T#C;<r>f-x1
zP3wX1_P>-E8DlfK+cQ%$von()@eb#`i78Sv!P-7IG3agxLL(&=25^|Xi^mfC%q?8X
zzKkEOtX|Sovh4HwmfI2mRE*pLED}7hS@%xmVWs-G;x6QiC2Y)!Fqv(hP}58D$fCm0
z>#K8f68&U7W6>4B>b;C=HZiPp3(}E^DK~1OEoB-Z;=Dy-?;i77&ZXpq;|7~KH+~0q
zf?QHwobCk@uCy(ieLm1_GA@x<BxUS-;}C^xNSCUQ156H(BO3(VgkJ9($T39TdOlf_
zLvOdFIX^_4!tB%3hP)gB*hlkA!DJAjpR{R`9Lrpt9T(HbsFd)*@@tKrnr)4qnnm<8
zkh5&5w;5oVi~<M)%?8???hQeB+`mNDmRk`;({|GdOr`p_&?<6wr_%w^vVE63W^eoz
z=FT|$=}q7V``03IZW1|rAZ_+L`$5+Hd%&b-j@BPCdLo4~_c=$w-UnGbN<4~O=bdZ#
zF+9$_b`d=JzBOq4o7iUf>I<F(neBJ88(2UM%g_w~b`JrTvLTc#K)cX5^+U*<WLf~B
z(M5Y5IN;@Z&<c*7A_c$!Ipn~f<N)S^e?ht?$Geh)8{2#195@P!90JtLfpw6qoHvT;
zN1~WESh)`w4J%U`HLOf&)UYz8QNzk)YQxHdL?-=x;o7v|lNWO{4yS(ZKmrBYa?AUl
z9UwM0;E$-cc{AclgP;d&VEs=MJo9N%?@pkhj5uaegYbda_?4hE3mi^RK{kHG#DyLM
zNDrt&0!9&-9k*^6J)y28->&Bz0J^Q>VO+&=$&2|wWWkn-jw`9?H{y!mZ>h-mwyo$c
zhYOM|iWO#*;ljv+S#iadHLjpo(?2+4+>_+J96__)Mwi$S<Ux<w&Oy>&OTu&jv#w0>
zEUoPVqq&0j#y-9(7SJ)Zj>EIS+LxsfA~+9_bW!&y8?*-H89jsM)UV?7A`+j*aZaRZ
zJe)^~kZe0!VEPU%*x!LI_Vwc}EkJK<YQYJ$?y9xpaSKkcr~%|!!+o0;oKR~xoXZv*
zQT0DT3y_#CK$=DiL=U$uXokw>KSP^AqPw>T9f@1d7jc8w3<IZ4Y_<kZ52aP37^4Yi
zdC~*YFRUGC&=6aA2>GPoe)G7%N!l{hjVqn@g>N+-pnzX!J_e@F@P4NikTWf&!={w5
zaT?E0Yy%i2c|$fF_>7-3Q~(R8;cd@$i5fUGc^g{Sov<T}$xy*3Ag}S>jr49!IhqdK
zBAgbWk4*>RB@vJ@N2~n(Rjs$O2n3xZw)JpizCD=i$2nipWpGdwZe>y1Eiy)QE>Vdq
zHOQ{6NYbK!i9kyY*xi}wy?GQP+M^D{LeRm;S#O#Z(HJp|Z^X@-G^T=MbSze^kh4+^
z*kR<XB!`PpRLfj(kDPt7e%2MB!XH{7<r>N_z|mw_&xO}Qmlz}eE8b+~tqN=#Zc?(}
z5;x7b?F5_D6-}xah?+kIV~V0miZqNV-?Ip@VOyMA<7MrA9kDHVIZUrGCMn{%7}IM=
zpkm`%+BAqp`j%&{g%`2={VaIMd%*KC%_EtlSh&q6LUZHoe$Z)K{M4*Go3j0nOo*l|
zEH2q>@s{C@W5KRAb_!=1uod&=D7aFI*(c}g{%tblAEFsMX}RRS!=TMRFeFb&_Q4(h
zZQU{-L@2`#CwT1wRUU9oiMvPKr?J6qD6|J#>r`{QoKq<E2spOC-G`L;NkIDrYn~M4
zHzv(ayS*-?Nz0n!Bf9Smm}y^q(gZ?6;#~y4K_!C3hCOHzYUQBaZgbEMG+<(k!dw4K
zb(g@0E%I3de79S+$N`QNiOsG!x9D-E4q@GJnO*S|T%ad}VN0GkDD(+x#X1gFO=FSa
zZY~btLDfVY+64zTU-<_RilvC(-$9Zy2P}WcbCp(7%_h)N;v|kx0-=q@YdnUR9P2R~
z{njJ9+58j&B5*@2gKo!~`W3@*j4~F{Y`93XSl6IHgRHTRBLG{dI7>*CHsL@GZp(F~
zSpt?xijyX%5QkIGCh9*__`PcvN&zHd_na8F)0!r3yil4&4I8J6I4h1lAXzhc#7rV`
z5;4n&t(MPi*EEr_hUN1~apq)!da{l$6lG;G0@YDeV+4q*C``@Dkj^g1UdTj;aM=_~
zBek(H=zYX=X07CoZY&;9A@WY~a+;S{csauh)jRJjFGIYX<K;XrW4!RqGa7*i3K@E{
zxD4^Ne7>oBg*mtynnQf@>4~?>z@L2B2<k1)O39ZkoTNv?FuyA1yFEOA!+s~p;?00r
zd%i6I9VL@v{3T`i5pVg$eo0A9vN`#-JU2h$JWRUTVIEF$?&RLs0hx=Nduhj<Bba45
zn=+O^gZSyhZrVv_0CT6zl$1D0VKP&G{{Ld8+}S3F9d5t5#EufEc^7A}YY197sn$4;
zro}uLJDu?|-7&uYuj(!)Kbq*c?o}(Fn+>Ob6LV{y;{1vEjmOV1f5Jt}9rGt#JbsS(
z6D}S<hdB)wt#r(P*kqE~Q7<@jn{PD#*x^@?<_k|8ZjwzewgqR%W%G>9slZr91m@2;
zg3;J@>5#jD2hHTM^ZI@%xnE3_f0xp5unCzk9m+)Ck_mG?Gwo$2Op#0_Q!@^C@0X)J
zm@OG-P1-#)Kw<Z>A+Thz9>8|Q6nl)5CQT(df&)XG*3m!3hu8NuZ5|W^s}j>U#vuIr
zgeeVXGbdMbnm$edv$#=lY}82I8&l(JLzqCu=#^%+r<Mcas5Ul%{v`oZ7P}`ff6g?`
zAG_hjS)Fh5v>Y0VXLVi6`#7-6@8mco8t+y!xFEiXZkuqaRW^e;(`tq!ae-y=3`eBJ
zpb-ls1Cxnjv`dqO=Ll@TB?egcWxCB_6_cV|Ll&Dt+>%1yMD~_r+{i>Z-JVs3ppO6C
z$4jg7Wt@d%*Dv}^M2Q`HL_t0*6@&0ySt>GBskga^xG2AA`ZRTR9255l?Q%m{n;OSO
zL~3u-*&|~;Zfp8*(%;hbnVTIQ8=rG`vVA<{waH$f@$*wHm9i{0SINc#={2n!&dblY
zR5q*}W7&KQ1&C2o3nq<9F1~Wj`kr9{{gO-#8bh+$nRg_EMMwVv!gg5WI$&$a>elNF
z$a=TYzDSErfKw$<qz@+g5+||lwVVlwSG#`H(pc^O3@_wY-!8yv$Egx?FHVnOF-s^1
zhd^K|fJ)4HTheh#yqKnA6?)}UV-b8$ECN~9nv*WnYAk!mh9v^)2$wXN38GEM|N8+h
zX;?iF#w88Q99&$|u*_i*IVg(=U48s?3iB<E6$*VQ5ut|~F05aOZ-}%uzP}Ma&~Obl
zTs91bg=o(DkD8}#!yftI{-{ZdW!5aBSoTosu|^4yhDAA*r5~%^xa2zCh=*`1@mQ#B
zt1EAcC1Vb6nB*rL$#={MAscnS+(-=Rb3>0<3j?W6%ToFZk2%mb;flH&8M_5O+QvuZ
zOQHm9t+Td7xf>j`H3kbJjBwdjXwn#6cv!e5|8CM2+Im*!T>J^TG!unBl@qApZzfI2
z&@&rNX`IY8_hb_D^Yu7yy!uL;w#b=jCPg*;vVH5ESvlr=uCZ=^FA15`tlK>2!3IH*
zy-biW*4P!i%I&gcA%DNR*pMy7x7DiMw4r1{l63L%P2;XTl=$C`P=3=~pReM(8_pqH
zecU8l+-?Rs4rHKJN{p_olUOBTkqIlf8S`lsKfrS3({(PP__eW7P*|!|OV!0ySH7Wf
zaa3cO-^bX*8Gg*=$~gvu2%O7UD84XqMcH)m9XLcHBU4JrCJDdoMkv=DgkER>18>dE
znD3HYAr$u=R}QKo84byWaM6{+qatvMDg{ouF{-)lyKcsJ#e03`mi;y-Q)9K~kx%SP
zF}T{3NC&@}i5KB&xCso5JxVidTN2n-1idkew(JN36hTd@xCt-r>!YCqDEygC<_;zn
zqxcz!K%P5}*E63YrJXF>#qx~D-(|B~8v%sVh9zxxyygEMlxsflZ?euUKIXw}!PDR-
zxeW1_1Pl~F1sJ5+W%9x!1ntBNR1~RC_Tg*qrbSFW(fpIJ#GveyBLrtk`K32Q(~=_X
z>`fd-Zg4~*<Gh6**~<n;$p2};laZq`A_vh|^JVjOKmYGdMzqGlWMz`QbiKpN3@>~i
zy&5mS#LKVp^6R{O7ndRYYq`9rkDyrR+va~u^$w*Q&Cka!;)dm?v%9Ce*ge%f*Zr`&
K-2Hm@d;bGPlZ49v

literal 0
HcmV?d00001

diff --git a/script/common.py b/de_novo_scilife/common.py
similarity index 55%
rename from script/common.py
rename to de_novo_scilife/common.py
index 8b38b69..7658ca5 100644
--- a/script/common.py
+++ b/de_novo_scilife/common.py
@@ -1,6 +1,20 @@
 import sys, os, yaml, glob
 import subprocess
 import align
+import datetime
+import time
+import types
+
+
+# Header levels
+H1, H2, H3, H4, H5, H6 = 1, 2, 3, 4, 5, 6 
+
+# List styles
+UL, OL = 0, 1
+
+# Alignment
+CENTER, LEFT, RIGHT = 'CENTER', 'LEFT', 'RIGHT'
+
 
 def prepare_folder_structure(sorted_libraries_by_insert):
     mainDir = os.getcwd()
@@ -98,13 +112,68 @@ def is_exe(fpath):
 
 def _sort_libraries_by_insert(sample_config):
     sorted_libraries_by_insert = sorted(sample_config["libraries"].iteritems(), key=lambda (k,v): v["insert"])
-    if os.path.exists(os.path.join(os.getcwd(), "DATA")):
-        sorted_libraries_by_insert = update_sample_config(sorted_libraries_by_insert)
-    else:
-        sorted_libraries_by_insert = prepare_folder_structure(sorted_libraries_by_insert)
+    #if os.path.exists(os.path.join(os.getcwd(), "DATA")):
+    #    sorted_libraries_by_insert = update_sample_config(sorted_libraries_by_insert)
+    #else:
+    #    sorted_libraries_by_insert = prepare_folder_structure(sorted_libraries_by_insert)
     return sorted_libraries_by_insert
 
 
 
+def check_dryrun(sample_config):
+    if "dryrun" in sample_config:
+        return 1
+    return 0
+
+def print_command(command):
+    """ prinnts in ahuman readable way a command stored in a list"""
+    ts = time.time()
+    st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
+    print st
+    if type(command) is list:
+        print ' '.join(command)
+    else:
+        print command
+
+
+def _check_pipeline(sample_config, global_config):
+    """ check that user specified commands are supported by this pipeline"""
+    pipeline     = sample_config["pipeline"]
+    user_tools   = sample_config["tools"] #might be empty
+    global_tools = global_config["Pipelines"][pipeline]
+    """be sure that pipeline I am going to run tries to execute only supported programs"""
+    for tool in user_tools:
+        if tool not in global_tools:
+            print "tool {} not available in pipeline {}. Only these methods are available: {}".format(tool, pipeline, global_tools)
+            sys.exit("Error: invalid configuration file(s)")
+
+    """check that programs to be executed are supported"""
+    tools_to_check = []
+    if user_tools is not []: # check that the suer wants to run only some (or all) the programs
+        tools_to_check = user_tools
+    else:
+        tools_to_check = global_config["Pipelines"][pipeline] # otherwise check all the tools supported
+
+    for tool in tools_to_check:
+        if tool == "align":
+            tool = "bwa"
+        tool_bin = global_config["Tools"][tool]["bin"]
+        """this step is a case to case step as several special case are present"""
+        special_tools = ["abyss", "masurca", "cabog", "allpaths", "picard",  "trinity"]
+        if tool in special_tools:
+            if not os.path.isdir(tool_bin):
+                print "Error: tool {} requires bin directory in config file. Directory {} does not exists.\
+                (N.B., not the binary as that one is guessed at running time for this tools). Please modify the global config.\
+                If you are not planning to run this tool specify the list of to be run tools in the sample_config section.".format(tool, tool_bin)
+        else:
+            if not os.path.exists(tool_bin):
+                print "Error: tool {} requires full path to binaries in config file. Path {} does not exists.\
+                Please modify the global config.\
+                If you are not planning to run this tool specify the list of to be run tools in the sample_config section.".format(tool, tool_bin)
+
+
+
+
+
 
 
diff --git a/de_novo_scilife/common.pyc b/de_novo_scilife/common.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9ce3c0c780a4374cd9ceb3e7d912371b13e226b3
GIT binary patch
literal 7034
zcmd5>TW=fJ5uPP=A!SRlW!aGuC*54s(nTUCaW1H07)>n2QX|vCQjQ%WF0te;rInVu
z)Sjgz##T|lL7O6d^B?F#AA&yg&-AC{xoE#{4n@mO5EO-7RMMWEbJ=s|%*;14=cxGo
z<iwwU`)aKvgP$?He~rid6GebOM`Drskanc*9Jd#wUO1+UQZF9UBT^qZrbneddQ6W=
zee9SXm-_fIU6Oi9f(!B`29%{fA@x&IKP~l1sZUApf_#aErlo#Hf;ssT3!RnvjMUFb
za9X|wZ?h6iDsf(dDJ3pQFs;N35}Z+DPJ**a%u6t%#ETM~Q{p8FW|g=o!FeTK=B{g%
zc}O7o7k;XdL$bi4$YO-WC<}@!!Xr7U>nqjG6)$J^`pT!9Ifr;_H&!=WoZu9maXi=Y
zm@%~EB0G*e6ghH~F33*taU*zmSdgQ_ao31ED9A%cvNgfPJCLMSoy1NwKInEhYw2Xg
zE&=Tn<p_GgA2c#1Td0o8VImJimhZJG<)bk<!aPvSxXd465>O>jWu?lXCL~7%Jt!yS
z!KgeKk%y!DZAm!Jel#vuIYK|biXm9~L#(~`t=zHf1rywggQaT~%B8A8TmE?SV{-#l
z5^fM>t{-QiAN1YuFfzGWasxlv3NtrP-E=?9-a5#lJVayeFS&V`b)v*aJv<Csy*x@2
z)mu)z)mHSiVP7{lR5e{c%HA1L@8)DA&7v^LeSU5D;o@Rt0x$hKHLB&UFmD|M%5c}u
zx0TpQqeMF`qdCpQ^o{nRDbGu2==i%KHf40l);7p|8qF<iF%N5LJ715IU8C~IU*rU7
zmWM$jj+&VdwoIehZ$ya+vplCAb^It<#vJ5t86x;JjRW+tdcBur*amxNQEl1kx=z%u
z_Bu_Bqm}0UZm4tFHTo)o0lk*AQ;+L-<lQTysNk`MSp#*s8@2q)+wWZ755M{<y1cy7
zsNVYgR%2srV`Y7<x^g)P8%esKHcTsuqjva%dMoX8(qysQ&*v~(Hw(Ld7B<?tjfTmy
zUMufq;W_k~pQ8}xwDXcP<D79O@q4;3=1e>1oC*DXMawC)E7T~1A=JJ{<-da(cKMS~
zBcgz-lZYCT!`vb&R#c1<R1W}FA_~|7MMME6popllz&3ys;sp>_m;zDuldJ5*->?s`
zv3(^VOvio*4)!vOE8y+aV_coazN?;E;uVlXOa*CZ+$7D1pgIU$vz_+hKwF!kyB%iX
ztN2m;cr+AGT^W7cQzoHa@fazz5s$L>2qljUc#|xqSkTtJGc0CMz>kPDlJKCB_?=Ke
z=FwKY^DHi~pjCME!XDA!y~N^wWRT>f*M(yV8^-T+<FL_6lXkRq3GK#3ancx18sbIg
zqN8R=fst8_3wWCd1<VjG4}KiFqg#Y)hJlP~yK|x)o?Q(mcVq{H@yRHIfkUeh*)2=f
zkU<|l8`D0-0s7-HX&2yBsrD0btD;JL8I7tR#k}cvVmKUi1l2SND=OP31#HDnMdZ|`
zddzBgP=(5d4et&bo|kv<=m9>$W5|LybND?2UvSQ8QOL9-?)Ew!^A8jd7ll-k><sCe
z6%;}%DhN6d`aDDqS)mCr)W+Fi^aG}M6o(i}nBs#G*)2%cCK3@kKSXP?tQ}+8@r`zj
z>t~3DjveuujwtDfzv+nbQzO7B<{#JL10G=s&^Yv|eGLB?QGz-NDyvQmKvXg`D>&-=
zM-A0q0{t0Ep|-N9;U!8EL=2AFSZUlpTGZf__VRqtdUJ!VR^={6S=ds*`nl=G2wMES
zon{?Bw=gEEE0nhlm2t3v_(xmN=y1YT$S!SV5Nc>s@ak$c8e}cKiqL*M6Z<y(C|1ja
zS780Q85D7#=5i4Pkv!sDfKg3@LSO;SX9B@Sg9!!BR6xJ@6$l$lEHI&N^SZVi+m{1_
zs==;kMQU(2R$)^$m}E^+SfwKn^FE3IT5~J+k6at`hS?56p+We5)Afz%b;4lSk@b>G
z>Xa+$kJZ(xmsJOQ7E^i@T7@jDgGG)Jw(>OVKQCG4bm+rX8aDm8jW$E(1mSwp87WLz
zNz$I^h-j$hI}{KV<PwYgN#u{PHB<}o2Th2-(d2k2Wa1Nl5Hg^+PHb7>W`9+TCwJxW
zw(uJbiu3r66@}ySy(K2T@V7fie6<PH!Tbo$R!^85P65-x(;~?R+DE0f*Ooh~IEuTf
zhk>C<a3l0gW&s6fQ-&>9=^NUjuG=yNEZ0;mRqKisX_QA~+EE-T)cjT}G&X0v+qm_a
z)go7JGtbnj!nA@WdBz!-Wd-^R8=Rc>E{?jEjgL21)#<!eDWUPPqws!#s^T?F_M<FK
zlqRbt>{>69XHnO>4i{r9T*MB_(yh$zcm#p>9$R&r30;XwhAWX5%0{>Qc=ykE<6QV)
zJ8Es;17R5H=n$rhbIusv;{_LP;XJ5PK{ZdoQ1j{xUI$@AI)jQm%Q2!9<!d6}!MP3=
zHGsY1(dO3h!Fod24f{GN@xX~|c+4ycSP=~#5eO!vA!@!>;fSgliai8ax$s*(oh-hs
zTH4ie-*%$!SsMy9b2aum&A`8waeA^<6y;8tv!@o_Ee>ZqpmmJxWh(eYHl+#QbK{hm
zV}soM*a>Qnyik$i{A_fHY3@ZeOC><3(8KN+^9X3QMlWtXxb8E^`f$hgYJl_DebDoI
ztHUZgp0U9aAFb`MwcD^9+~*QRmJMW27?keiF1lc)@J|ozRRdBCn^z+J1Obn!gtfIB
zt3bW=VgMzxs9W4Uk`mi^V>N-F6tPiF<zbevg4@lIbL7U264&4Eb^OHT(cN#xp?l!>
zUEk$#&rbr^ARZy6ahx0TSkJh%@WopTowpVOcVTsD;pWl;vo=gx)eJ@bP#z)Yr1dc?
z*9z^aouwUCOpY{NyK!)AvqjSp{gR8Q7t*X)9ngRm1x{}CW%B1{iDZ=G$#IHM8T=j%
zHk&Q*OgoA&dA87^^)b6*x}=)>2r_`E>0GEBPz6wkfryhunTKbXM<hGE#^c2Dy$>{k
zK{?1*$0fVOSO=x^P=JpS_li&&#ERI6G!~k7_AUwj?rgy<px&Z@;MwALq|uU0J+ac5
z9Dc|1#qzzs3|7J|5;Nn!^K^^o$rA`T!}%6#&g<zGC(;QXN^w<(OFCT7;bLwJw@gTD
z!4|S(pli~BGeoFHbVfD|zRVQ|*LDNe(NX~4dsm#jM=}@)X6?X7yw-ba;3L7nJGkYm
zRWv+*0%^Jm7&qVcbGL^J6xVdaR@9DQh69Zl7k3Y?>2<q$>Ebr~7#$h68+AjRlC0$o
zD-0TWn#M-m+{R#xF~1qKyU6Ky!uI!((kcXC?Z;oEbJ4w(#9RS+o(r?yP6Nxp@Pj3c
zGH+nIl`KoMC7@;Bk0Y|nV`wka8!A`Bqq(Hf6Wv>hhLz@luPvL(VG)xwVSD7+Z}yR3
z(7rmp>18cDuH`q=Eya!>#~Q!Y!SQw{3v@K%coOA(^BD|sz!103ZD!$K4@stRu?DZ~
zY`RHwTUM3FMHg35gC2bI)D6U<`_xaRy7<ZBn<_zwh%@^cJczLhWYP@HZ1s>VF*nX#
z_~9f<wp_v<xiszvsXA5ArA2om4t<I_4WhQn+}(=PrXLR&{K4vLZMUEH)E=nAZtM?c
zPr1<Jxd%pMr=*W@(>8A=ae<=aD1FL-TX-Vuxdx)Z1>>TTpO9y}7soEOp>t9MNE?12
z*9{K%4@CO^D+KG}_1tpeif95hYEwblZDe3sdW)pTPYtj+y{gsZ;Bv<M94&hPNsKov
zKp8Fvqih3%KB=5%CT+sVhB^qv0Gk>FjHWB}6Mk&xdT#N7H)EEA#<7$57ER`tD5Ss}
z&Zz<qrnAm$ky&ZEVC!XPrZ9*4O!0-nOi|Non~yM({Mf%LG!N7Br#(f|H)!r)|8t=U
z5&v^xD0KbLMdcMzdUZ#YXguxvomdIVRpo7Z&2E<R^x}QN@l_T#IbJ;_ZkcV2U42JG
z?drSsb;Z8kv#<B<>jQn=Uf0*Hbq}+M$7I0cuDpvZlqq_5Pp`DzCUuR)8jH_Z@C@qh
zqIj$|rJiv%R>BHb;aYlec#TY|&wd*^8T^jS6;BtZi{%UDnQ}>=+44krw0!E{`{V_{

literal 0
HcmV?d00001

diff --git a/script/deNovo_pipeline.py b/de_novo_scilife/deNovo_pipeline.py
similarity index 74%
rename from script/deNovo_pipeline.py
rename to de_novo_scilife/deNovo_pipeline.py
index 1932bc2..a1e75f8 100644
--- a/script/deNovo_pipeline.py
+++ b/de_novo_scilife/deNovo_pipeline.py
@@ -1,9 +1,11 @@
 import sys, os, yaml, glob
 import argparse
-import evaluete
-import QCcontrol
-import assemble
-import align
+from de_novo_scilife import evaluete
+from de_novo_scilife import MP
+from de_novo_scilife import assemble
+from de_novo_scilife import QCcontrol
+from de_novo_scilife import align
+from de_novo_scilife import common
 
 def main(args):
     with open(args.global_config) as in_handle:
@@ -13,18 +15,25 @@ def main(args):
         sample_config = yaml.load(sample_config_handle)
 
     check_consistency(global_config,sample_config)
-    
-    if sample_config["pipeline"] == "user_define":
-        sys.exit("user_define pipeline (i.e. a pipeline that execute one after the other the commands specified by the user is not yet supported)")
 
-    run_analys(global_config, sample_config)
-    
+
+    if common.check_dryrun(sample_config):
+        print "Option dryrun idenitfied: commands will only be printed, not executed"
+
+
+    if sample_config["pipeline"] in global_config["Pipelines"]:
+        run_analys(global_config, sample_config)
+    else:
+        sys.exit("Error: pipeline {} is not one of the supported ones:{}".format(sample_config["pipeline"], global_config["Pipelines"]))
+
+    return 0
 
 
 def run_analys(global_config, sample_config):
-    analysis = sample_config["pipeline"] # analysis to be executed
-    command = analysis
-    command_fn = getattr(__import__(command), "run")
+    """ check that user specified commands are supported by this pipeline and that all commands I am going to run are available either via PATH or via global config"""
+    common._check_pipeline(sample_config, global_config)
+    pipeline = sample_config["pipeline"] # pipeline/analysis to be executed
+    command_fn = getattr(globals()[pipeline], "run") # this stopped to --> workgetattr(__import__(command), "run")
     command_fn(global_config, sample_config)
     
 
diff --git a/script/evaluete.py b/de_novo_scilife/evaluete.py
similarity index 96%
rename from script/evaluete.py
rename to de_novo_scilife/evaluete.py
index 507e1cf..4c1922b 100644
--- a/script/evaluete.py
+++ b/de_novo_scilife/evaluete.py
@@ -57,6 +57,8 @@ def _check_libraries(sorted_libraries_by_insert):
             if current_insert != libraryInfo["insert"]:
                 current_insert    = libraryInfo["insert"]
                 different_inserts += 1
+        if libraryInfo["orientation"] == "outtie":
+            sys.exit("error: in valiadation only innie libraries can be used, please reverse complement your MP libs (N.B. if you employed MP module of this pipeline then the reverse complemented reads are already available). This is needed in order to run FRCurve, please complain with FRCurve author!!!!")
     if different_inserts > 2:
         sys.exit("error: in valiadation only two libraries are admitted (usually a PE and a MP, sometimes 2 PE)")
     return
@@ -183,7 +185,7 @@ def _run_qaTools(global_config, sorted_alignments_by_insert, sample_config):
     os.chdir("QAstats")
     BAMfile = sorted_alignments_by_insert[0][1]
 
-    cl = ["{}/qaCompute".format(qaTools),  "-m",  "-q", "0", "-i",  BAMfile, "{}.cov".format(os.path.basename(BAMfile))]
+    cl = ["{}".format(qaTools),  "-m",  "-q", "0", "-i",  BAMfile, "{}.cov".format(os.path.basename(BAMfile))]
     print cl
     stdOut = open("QAtools.stdOut", "a")
     stdErr = open("QAtools.stdErr", "a")
diff --git a/de_novo_scilife/evaluete.pyc b/de_novo_scilife/evaluete.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9d86548289fad1eb35142843d82c70934b48f941
GIT binary patch
literal 11251
zcmcgy&2JoOT7RqCU+vhj^Wh{t$xJHq*&|{nnb{$b3}HRC<76jxoN{(D@{ZM1cU8B`
zuC8j=TkY71`+%|%AR!G1E-V+25El+ed*Bb)%N~$etyThY;lK$FoZtjP@cTV)mAmc9
zWMEliyUXwQ`@A2|@A*AXb@5-vNB<*yeYLK#p921W8z248czF1;RH)S2LE2JlR$g9E
zYlS>rRBOdNJ)qVG^7Nou8_d%~YHcV_539A|JUyb;N_l!zt&J*wRBfJ6UO~OEl$8lF
zrZ&gb+DYXV)%PLLgz^R?F{!*kNt{yNkR(nkZ&(s%ls6)Yv&t(;VoG_Vk~pWl6OuU3
z6|Ppsu-f3i@Uxs2k(_Nd>)eSUd<OBE!$;3rN~KC|T58{-WZi;F3u@m;iEde`rP2Yl
zIjA<VR%xdfr3A~BtQBDxv>3_rSm)xsw4^pi)xP2=9J0vau~t)a;&2UidLXMA%WI0#
z2Q`CP&G=VphO(NIM{6>1hRuGDP=>7`mHdje`zh1d#q#|6P1PMx>4Zur)#fRco>paS
z>gE}>KfrD;3aUG(_VI@$n7zOfEVcdb>PdtfFp9ZxlB<;y*vjQ}6i+t{no-M-Qk`OF
z)eU)mAkPow`7L>Vq<IoEGbs-ujpI=3VWg@_C#pX0>QhJut#;^hB-`^FZYNADXW5pg
zSC3n*I7-Xxq2na2hCwZHlfc*2+Fmt?w4bC{5o+rjetoM~opOk3trLV^HS(WU6Tjgn
zepL5y=1RpHd!|&|O*dp3e>c!r#|X+=?w0Qbi57Y58%Rn@R-5vkRV9f5T~C{-TYl2?
zt2MV(Z3LmO({T<R)ZN6Z&MnpgSErYICG|KA{d#&|L|+Y_`AdG91a+NG_1d)DWNW_D
z4lzcG@wi`2KXubIk?XDZOj#@TI-HMUSF4fR@~c%@P9siQZd&06iZIPEuDM~g9!HI!
zDJ9zFp64a6p&@tI_j()Hx4##A)xAUStEYTCC^z!tW@PDVBdVZA$wPmN)HUwPwT&Cs
zcKm10f@|{&)#ZnuJ*=+WUs-r?e|h1W=U1b6C$8#x5C#qZD|vs%4Lg47&$RbsBXEZ%
zFs#1HX{?FDXkpAcZB1I^)}S?Y^fNGC7%QBzE?ebdofnq7!%HCG@RvwHzj5QC&PH{+
zMYa2k`Y^wqs4q}DASwd2E~u@7O6I99P~<Y353+0Y9(6w}-72bNE6<<_6>(7AWHYE3
zdJaJYhh?O4hD%UXBIb5k;qM7Ml+@OMO8z3RAC-Fa@2O${!432Q+cKpwY7uaeDfKmm
zzC^e_0W^uLjyq`z4Z4U1ev-t=Ejx&8Y;E9r(#VdZa1Uis;M)iKX4l=wuK9LH``%R>
zm&et<o%lO`g8w-2T!r`IPGT>uu%oss%QK(M*g?ZasqLdpyyttYHCNY;8+N)8XuBP>
z{V<4p<ot+_myC>#qI0&J__iA|y=S{SZV<Y)(0_l%e$0{Z7x})2##mFFcz$B1v5kvj
z-*x6X$&P;@yU6UK)zcu|$ZKu4lWxSx75oGQ3cZfUD>f;88ux9HNanSI6q;mLI=bVA
z*htr2S+JoSNGz>fwRPO`)1c*R`vX+GU*R;)1v~_jfJ8=doi|uSc=UpXU{{*qolZ}p
zwBAX8XL*_P4qH<Z&4KO7!Ds3FQ6oM+O>}4suH&OAv9gBb_lz}WjasF`B%b3?$+O5$
zTB3Mt&ub})_t&_D2Z{$}qv5(ut%H`8p)Aw_m@~yvW4&QjMN4jrBB7yCLE!pkQEh^`
zh(Uw)W+kv`(nIe_$xxORJB~iXdH!I4ECD+z+yIl&a+={Wm<*r%lnIJCpgv|D=)s`K
zg|bz!0n|dnVOGEV8^Q)l0246LDETYZg%m?%3@?=GqU(t2j;LM0#SqY@uDWPl%ARCW
zC61glE!dxZ!3gZi)}Ts0w*VyIRr|*DXBmScL<t#|BMqzVlNP!neG@Yw^W^AQfA;FJ
zo&a(leQutwSb2qqyBz!*PF0L$D~RUO<^w-!rW^Vvs6zqJB1(g%9ZIR)>gbffvJo^l
zsI42onUAhtueGn*Vf+*&0{R+i8iodF#4HL*Y{oTWNI!N7#SnF)B?n|^5DINmiGn(k
zgo7YT{iL1v>5P3hPVANo+XeKC?4}<<JGI>}wtdIG@oBA1WYK97M@^`-?lhY)yuOEX
z#nsHYMNV~yyibMwB+gq%OY%;IYAc%VyvG#DghQ?%dh6CS>4+nZXw@9j8nNv)KrOAi
z07Tq|*3;q>0EwAma&_t^sV0(%VZ_!7Yk8UksV`^Zyv`wMD4l7J%Xtsvu9D@PM#&+j
zB#c5v&x)%JPwWT!F-iFwAUAGx1EPS4Q7Y|kcZ6oqv{Jld1RjFOBIlgPgTO<`8MDsd
zc{<|(Z{R-xXrRE7F|--SX98)0(hvaZq@*uND{&eqI5An^>1`ya<ra)J014Pbb5Bqr
z>VPO0#KT#wklilJ4&)?74q$F!acNeLq!vW7^JYIdiF1h$o{Qm{eiNjPd+2KIw`$?u
z@ok-)s!(tdu!TuAnStVV73)Q$cpWH=Y{P%zMialV4QA1T&HzA^aZ7{<!PgP@YE$?S
z5KAuzWQ$Wn$Oa^_1(5LoDl-;T22_=s09xRp(4I0pjxt~?&y|oH;XV<>;Ch`<XD<+#
zONYzOvkZP1d^bQPfOTAXC+W%o_)z7+?KD|oQq3Wt;Skji)3AC}=+<vooE7tn4Dj8u
z9Ay}j`7rjF@+LDG#t)Z?4Dit)!zr~7^oB!gCLNba;MSV#NoF${ur=%ePuJ9m#qy>|
z6q^GgC91K(SUP;)^Wv|;mqYeKo}HwTp%aR6%4#1=eNF98Vva%=a=^U?nJg&%j~1xG
zCd9ue`vM!)b8BHq%*kZObcvM~4tcf0Yij&HgOT)H43cDwvfW8FVGC3%3Zf<nl|2Ku
zkDMf<UUJPJ(*+cbK&dl}T0>!pnavO7=V!mJdGn^H+x|2h6&Y4CfaPgJZUt%KK5|A=
z<?d!x%^&;`fBrrnq}A>l1ev};OmCJQ^wtE%>1ngbX^mB8vVb&miL`GJd`73<L(phS
z?e-TgB#B@o>PfS?IY@#*>Nk-TCVAC<x&c3tq%_|t64P6@ZDU1*o|zeAD-JBgQL8`&
zg%NY;nK{4A$A@?rOVFvcleq3{<KosqL3=hwSRq(=nw?06T5fQNE~+sS3rx{SICt>?
zxdGV><?P!2s@oEXYWv24H3RxfL3B9p?q+%7Vym5ZYqk5Dwc3ZfA%D;<+c$1;mgL3~
z@L{z(N%R>V!DGZn-ao-dY}5|Y`W7AvMuNYCMR76<Q!pMU!EDc3GcYfC`bOb$VX!a(
zBXbc3r(75<jvL;~h497+qF%tw0h|Iy0iZA@fJ9&+2qKUWnhNLv#sS0(f&sXi6Lc|m
z|ABBczy7xpzRKYi#207;V1=oIwt`~<dWbu~8YKt-OC^7dYMoW(GWhBXP*3zYD4yyS
zgRhRLO(C%a1lTwO{&ahPEOij?oPa}gF3+DA0dmT09UXyrtmwiaF$piM7^5`S_AT`U
z(ydk)?pYSLQ^o!Z(U^O<wBl5kR*n>`EJy*j_1^u(d)2vzpH;K6vw7L_LuaYKf<Bcr
ze)!$T_ZRXpkCbMEP(P&Y>iy-#2M3Lh7H6%=*i7q{+2wrT!-ZK(vb+ZmpY)4$CM!BB
zE&*xvv9mCDFCVe5#0)18^}PGMJCp9Fg00$b3pUxJK8Bz5a8}b>yz9HrB;R&?9U~ZJ
zi+5~KPusKL*;%F5M3}E_3Am!7$jt4iS)pKp7i~}e1+l<?+eM535jD0YYpvGxjzE_j
z@p23fODloMSf>?fkvNS*MC4>C?J$*gge^g#G)P0!L%TaZBU*&Rdf1SFmjNxp4k@z%
zK_Sna9k;297>6rYikXJl1bt9Yj}g&uo5zSP&I$ziQ+zbTZmRGhsOYqH9>0^oi^;+{
z{4WDdgoT|)3*Jn@j6cUZjUs*>3nNtpF$I{E$*f3ja*Uww2}<VVl+3t{JxZ3jkbopm
zF(44x7f=Y1a1aXg?1v*Vs|i+Rqk~^bcQk7b)0G8916e^Ug!fMgSpiW)7La7JqdCF=
z?GS?o;GI?bBxQvz2p4c-M950Xf=wfAOo<@k*%C7#Hf}T67*MDS{`K|p7_RhXO?5%Z
zz^w#n;4%Ym&~f)J3+LDGsO|(Q(<V3>HVoY90@rK`xf!HWW%KVfFjasiI3UWfS08VG
z*Lv~<IQ!^xa;leYN*CM=BomtfMuh;tk1RvUHc|4rtW2mOSH^a*2Ey>Z{6njITJ4|C
zr5543*se1o;u~@-kf2v2Qd6j2(Pe4H^8~rXZcVA(zvGzm>pwZP=xhUEH>5{0QG`dh
zKPB66PVM7(#?;oZO8%gb9f{2<o&>lh7ji`wBJI#x7(F+MNp4`4<R-#$7@BG1;l5)l
z|M$z;{!Re`qWev(3YjM_mw2K0i<8)#vw5aC3G!<kn(=Dhl#I~jx1h7j&S56y+(RMG
zSE+4RSg^2PDR?vsH#*gQ2u;?NvBBw<kizM0LG<e;6NtYC#0Ow$T9Ze!68Xr8sq-Ba
z9W4iiay~`5^MI8$9xvsi*+{p*MkRXTTw`-G-IBya1Hh2dB|OaZGtIg)!y<Cy=iSoc
zT-#6TqYkcya;~$Yfrm_G8|F)cKp)}JEQ7F})VB@0zGZ{D))3A`gavUJgnSYNQnPRo
zL7YJt#BC4`Ng3ajSTnrkmd`Ho@`X$QGW7k8=9HR*pciyGLa7av{(wzLrJZFy9`eD^
ztO?kB3n{Tquw_Zm7Gu<x!uy)kxyjZau^~4oTi)WFbfX1(G+QC2znXJ9zsUi3Q$Cqp
z;<B7a?1R_1%K082U*I7oD?2L$aO$ZOvy?3J(X5!*JIwP?kY9wY!$4w#v#jUbW&`pQ
z6UIs0r&%b+`51-Hud$jJadEDK=P|v=p_3z2JJX6EBCi3b2Xs4+<(w@1D&Fbzh&2Pv
z?@9D#%9<`*1O-2b+-byr_{&Jo7+R0fs9Lv>XX!amc2jb<z_?JI*P0TL9n9gQe+Rdl
z#2nEpm<S0#^OZ3Qg;-nI5EEwx(jQ^(v|jQ?8Md|zru_l~z*2yX1Cg`5iJ1Tpvpmd&
zAU<FOXTvbDc^_-3DOzv1^Cp~)Mlf>X-Jj(<p&yE$k1z<#G!)JF^d$2z9<XsmfD+t%
zgg?ZxNCq$8CFCF(0Pcjq6-+cED`-4Pvn_Cj49qkR;A@5zGY)wg!uJ2iGoAj1ndD_O
zeHK_M7@mUl1>Qj;dOfN@3MdLzV1#A{D=;nyGgv50CZHIG`_;=iJPC|C!z5-Yg~HD>
z;l*#55QY!053r<5>iaO3;!nZq_Z^W~zWj*9`sPPuY%EOctJice#hWjk0$-az1nB5-
zedDZeee<k%$%2Kw{nA-+B<~ztr=xc-TiCm=m;wZLwelWRa#_CFq3IOP$(sXRor`y>
ztNu3LD*)JL=uWh#yJDjR2E~#J78+|LFt~&l`)<U}f~^>!%6AaxiJyUQi*XaYixbUw
z*W#Sm9vYh+ZJXXUdW(agffMQ-AiE$(UP#9fP`e`{O1l|2Mgc)M8?wI+Z@UrU2m?4r
z(u}AlcBA3eeYit-CC?=_rR;8ELhE~)-xSEju;<|?B|)vjx4YRM83ADvdNmwg>{a4M
zO+Of*Ja6|6bExqVu>>P22Tj2$dveJ{M1a()yuVxy{t4S*j09VX*t;f=tont&{ig&U
zhi<Fpxp&^cjGD8^*(I?sn<J;i12mV%#--uQC=!JujWk{GL#^xz-u%2Tj+$x9i^s#w
z-)V7Qe~$feet?;dZ+o0S#_;dv+alin%f<h{pY;F7N^TsigcJ@s_tn$n$z^)ie>9bI
zC>I#e8_<wY0txlFz2~%1;cWAf@WK1zFwkeBC!LlkOCyZkRN|{rj3}}5lnr<J*kfz5
ztzCQp!4D<5CrRfS>z}ipinbR9ElJ?}2_~e`a@$g{iyHavn)ADCO#RXunL{9P3g`D(
zP9}M9elm~2KKUNNk`GAh)iMNwFlt{3RfWt`f*@4RNan`-DDy2MymiEPr}Q!2#^1)I
z|ALPOW&lOV$FAVR-%G;I#tUb_#rR9sHj3P#eDTVm+Ufq>z~%m2;qu|0hieVn<B>2t
zG>4DoHGua9yy9Cc;dyY-K_q&GV04I~O5u6%!CnQ(R$kF7lo)K@8s9KPBYD397C1#R
zScFmI?8};Rd~b1X-6Wdzb@Av>@_05&HtVUoUST_P2YEjb{-$5h2qKS>{Xaq4v=Fk+
zs%CvI?nJ3vjx<i)@Dsc~Hbpc;&HCNMt<zc@<B9(kCb~_n<>gb>41OoAig;4;AmcW=
z;kJxY@+)e_>0%d$PJqL}mt6NYzCGh=(OEI%cu&e3jjy+eUg9fB@<o|mF}@%s;X<mX
zNgBm{lf!Zs9KK3$=J{CXV}+03<Aaygq5Qq=k<{^+F!MdAJ5)<~L3CZb6M8889k(Wi
VN>@tfOYfFSrC%zYD%~zE|2Nr6eF^{o

literal 0
HcmV?d00001

diff --git a/de_novo_scilife/pdf/__init__.py b/de_novo_scilife/pdf/__init__.py
new file mode 100644
index 0000000..c8747bf
--- /dev/null
+++ b/de_novo_scilife/pdf/__init__.py
@@ -0,0 +1,93 @@
+"""
+    __init__.py "
+
+    Author: H.C. v. Stockhausen < hc at vst.io >
+    Date:    2012-10-14
+
+"""
+
+import cStringIO
+import urllib
+from reportlab.platypus.doctemplate import SimpleDocTemplate
+from reportlab.platypus.flowables import Image
+from reportlab.platypus import Paragraph, Spacer, KeepTogether
+from reportlab.lib import colors
+from reportlab.platypus.tables import Table, TableStyle
+from reportlab.platypus import ListFlowable, ListItem
+from reportlab.lib.styles import getSampleStyleSheet
+
+from theme import DefaultTheme
+from util import calc_table_col_widths
+from common import *
+
+class Pdf(object):
+
+    story = []    
+    theme = DefaultTheme
+    
+    def __init__(self, title, author):
+        self.title = title
+        self.author = author
+    
+    def set_theme(self, theme):
+        self.theme = theme
+        
+    def add(self, flowable):
+        self.story.append(flowable)
+    
+    def add_header(self, text, level=H1):
+        p = Paragraph(text, self.theme.header_for_level(level))
+        self.add(p)
+    
+    def add_spacer(self, height_inch=None):
+        height_inch = height_inch or self.theme.spacer_height
+        self.add(Spacer(1, height_inch)) # magic 1? no, first param not yet implemented by rLab guys
+        
+    def add_paragraph(self, text, style=None):
+        style = style or self.theme.paragraph
+        p = Paragraph(text, style)
+        self.add(p)
+    
+    def add_list(self, items, list_style=UL):
+	styles = getSampleStyleSheet()
+	style = styles["Normal"]
+	list_to_print = []
+	for item in items:
+		list_to_print.append(Paragraph(item, style))
+        t = ListFlowable(list_to_print , bulletType='i')
+	self.add(t)
+    
+    def add_table(self, rows, width=None, col_widths=None, align=CENTER,
+            extra_style=[]):
+        style = self.theme.table_style + extra_style
+        if width and col_widths is None: # one cannot spec table width in rLab only col widths
+            col_widths = calc_table_col_widths(rows, width) # this helper calcs it for us
+        table = Table(rows, col_widths, style=style, hAlign=align)
+        self.add(table) 
+    
+    def add_image(self, src, width, height, align=CENTER):
+        img = Image(src, width, height)
+        img.hAlign = align
+        self.add(img)
+        
+    def add_qrcode(self, data, size=150, align=CENTER):
+        "FIXME: ReportLib also supports QR-Codes. Check it out."
+        
+        src = "http://chart.googleapis.com/chart?"
+        src += "chs=%sx%s&" % (size, size)
+        src += "cht=qr&"
+        src += "chl=" + urllib.quote(data)
+        self.add_image(src, size, size, align)
+    
+    def render(self, pdfTitle):
+#        buffer = cStringIO.StringIO()
+        doc_template_args = self.theme.doc_template_args()
+        doc = SimpleDocTemplate("{}".format(pdfTitle), title=self.title, author=self.author,
+            **doc_template_args)
+        doc.build(self.story)
+#        pdf = buffer.getvalue()
+#        buffer.close()
+#        return pdf
+ 
+
+    
diff --git a/de_novo_scilife/pdf/__init__.pyc b/de_novo_scilife/pdf/__init__.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..960507494edb1022e31931574d0f38e0753967b1
GIT binary patch
literal 4843
zcmcgv+j1Mn5$#=ENPyr))P<I;q}Fz<Lb)PPR8(<FksK=|Rc2X+lp&o9s*=recL*%C
zi-mRulqic&)N5W-`Nq8F1Cl;H072QR$`eI$hCTQ0K4-dTEB{$+{_yIP-Iyjn75@J<
z7xQnf5~3d09wk0y9`#sovQIr<<O*dK%B$3?l83P>Wi`s{)T_I8jj}n)=czYOd4qZl
z%A3?{y0JQC3zWC0*K+MS$`&bKqTUka%hX$Tz4Nle3iVbfzeK%DuD3ziD&?1{ciFX@
zlwG0xD)p|q_5x+sC|{%AnrpWx`-t|EMbg*lfc0FWw?wBzf9c(Tal7qh*!(xHngRcW
zVLD807<NX-!KY2>dNj6!qP!n`)7k6<N1b5T7V+~zG&Xt|JO~DH5ZU0!*iKpmU&(^4
z$m;vp?Czb7ySF#)+}^m?Y^ML^pKjaZgX|@)cGG;6>8&E((=6}`H&o-HojmI6Ht?_w
zuAW6@)Gwpaz}B&D6veu<Ef&AkdbC&cwH@fvt;U47$coameHJZt+l%%yZJSeR*B)ou
zjbe`{sj-i<;-yTQpLFb4ox2S$a}OV|8$nrFx;xO?x<Rbp(g)EvvwH)bPb|2~y>XPq
zp~apds}sIV6FV?&5ghP^g>lc4gP8xx4;1}97xOQ!G?UMxKG#0!3K+(8WYl^!Fb_f<
z&_S3ZJx@u6-ZDoUl(fJ@$O|IZL~fB@q@*tLlJs-WGWJjA=q)pN1<hn$CS0PVA<ZVe
zWv;JMvLFXtCbipMWaVP0jsgd7axvGqdgIX>qL2k&1ST3oB^GyQP}tNm6aXm_PC1OL
z*hFUs3QSZhT(!Bl$~<LheU!%0`e0-INWXrau5WFJ-KT$i8a~^3w*6$MyS<+1a9A7_
zp^4KhJ<vaCkCKD+bG){IdD_0ovVjKVHM|%$O^etDi_dI3XOhqupfUFtc&t%iwE|%-
zSbf|HqirZh2Wa4UdXM&3xSL@)<Uhv8qf?J#!h3KrwkVGsE~AkiCNq1)R|k{(alv+h
zR8f+A#{Z@kp$p)k2p+#=!A>DzqYjyFKI}U6>7*jYa$WhcrC6^7B;aGgv4M^fU4{om
z8D{!OXX+#FY|BIdX(3tt%1R8885W?AF4zf(Q|yG$sdtN86Lx+)!xy|e^9I8@TArbN
zA{UX}!Xbu(1wa@7INafaZVEe`rQ!{6+(=aAQ`7#yiYpX+e~t(X+;qd7A>zRYa)Z<B
z3up*H$XoR!0HJ{*Zn(<=v)~;!umhMH>;Sg@|5HQ!fO|EX<{HPAcR8VM@C@ODK@w4j
z9T)5eU;{dw>DA#5g9r*&EIVh+bL)%~F*o>zoRZuJA;I;NDx&>Kjb8l%KH2)~O**O5
zn;OCb+{AKq%99rX{fW85gPdYGvz&U~$(-=3N^j<77Yp9+O3dgMWgcacVfV)w2l&0?
zk(SGnwxN~+rxmh<b1Kwzw@oCuZJ3vG)-|$C#z{F8AIkwE8TBd8v2chC+aes5>CnnW
z#Zo!ZUN9ce7<fR1U$JbKxI*e^dUbEny8(Uz9n3=!e*T?>Ss1R-8l3M_c@L2)A$mR!
z8BRaP*C89r9_Jo+ktO8|O#TFq+B<ku_>ivi5fm6a%F_O@4N-EE#r<_J66aXKuNZ2D
zSB#Q*!ggpasZ1PO7B7u3UY;-l&g?-W0};-V-8op6k%P_Y0Z`@0HVYS+&v9`9ik$U;
z8wQLce}&Ol^*C@s5m<N@G0vqCrazOsg4@B~W!i`Aa!#1sUz@QVA+syb6$Me@8afV(
z=Y+#Rz?88<I1=7YqUk-fF_`B459EmyFX%1k1N9Ip8eaNOmSVU7?m!W8e$N6Y3Jwu$
zK3{nzCjTAybnVMYUxlVp6_MraDX}Kq&Q-kHEoEQggQ&)jKOAj-&DG<bKYzD<KTvv9
zl=exwA4Hic0y7?=ZGu0j+nYtAO()nK==gb%+MpQQj=_B&SUbAEz8(*v(sueq(a&@=
zN=+v&a@YAg!8#t8ho75QpPO4|j#tG4`|w41OHxKW$R0Z61e0-@W$C^!_r<udTK$&i
zyD)W@_LIm)!hVy!)(Y^c-(VJ(3uN&$^S>x#_QnpM<!D@~dsjHTthxLT{9vMB_Z<so
zf#sAVyktE_^H5?5KZ^h;BgDvZ4+~6kpZ?+bcq>)tkmC?{{y3`3>+6S;bMJ8p3X&w@
zq@>`Fo5@#O7?pjaxYJRdpHq2cXDh|g`{OiA6uyhjy|1t(Kg{>=NuVBKJ+26EeZg)9
zD@%Uk>JnNTs6qa+_X*#SZu<&1CZ7kxDA!>q5ibn$A{q0yTqs5;gi;BrY84U|Ni5)0
zO7zW!`WZUzpbAiZ#+3|x|3uw}3>7xFyL;P8eTfO6p!yosW3FcQi*ylRKnhuFjf=U-
zRo$<9b-z(<R2r35m49okM#F2(wQ9?a_QyQIf^cC7*hBduk9RGn-u}*0^#H>^=c<&;
zokjZ{e9Rw@#-_vbrz~*<!C~*2H~qXY5?U>M`qNqN=>Uo-Tmg9p%x3YGR+4S5zjtv*
zGJ|nh4#p_Ju8=n@(0ga2&g8ivuNH7#@RoQmwrM8$QXu3Qlovx0i~U0#TXE|LfKM3u
gz;W^`m}BrTt@|yn<*o4VO)l5?cZD<Hir4i22eV-W!2kdN

literal 0
HcmV?d00001

diff --git a/de_novo_scilife/pdf/common.py b/de_novo_scilife/pdf/common.py
new file mode 100644
index 0000000..573438e
--- /dev/null
+++ b/de_novo_scilife/pdf/common.py
@@ -0,0 +1,17 @@
+"""
+    common.py
+
+    Author: H.C. v. Stockhausen < hc at vst.io >
+    Date:    2012-10-14
+    
+"""
+
+# Header levels
+H1, H2, H3, H4, H5, H6 = 1, 2, 3, 4, 5, 6 
+
+# List styles
+UL, OL = 0, 1
+
+# Alignment
+CENTER, LEFT, RIGHT = 'CENTER', 'LEFT', 'RIGHT'
+ 
diff --git a/de_novo_scilife/pdf/common.pyc b/de_novo_scilife/pdf/common.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e7ecd9fc427d77387a486f84a26f821c894a88fb
GIT binary patch
literal 559
zcmaKp&riZI6vrQoUk=c#$9mg^jUjp<NHj2oWDYXWLnSuLI<iHe#FZuTZ~G6}b}9!G
zoAmqbYv234Hq}0x`U|<3hwwcr!f(Y)Z-h_)EVKd33g80ZB47=032+&31!xnNVy6nY
z2J{q`;^+jh4!91q4s+v7VqS&lNQ{teHf(F|K6Dk{?Rm`JduVJ9O_Z5v&RO^t2m3VI
zqCSd46mXQK+)Nl6sDe?zqn-qH+IFX9w_A2s?dwTFv`C6lG$|!1WhwG4iI~9UYUp?i
z$LI3cb?z5jrudVG@xn7y;=dR}@kIrwc82Vf9;xXMHT|U;4UxjDLI|NOB!o-E*v_q!
z+huOu+^%xFR`%p7o4URz17F_GmjpwGM@b(=Xzh|PuwvWFqQfDvMh@|&&r>p+%p7;(
rITnq`mSv2jVX{tE(VuunSJv<U@R}g@H;nGr(Lml`y|bd7YYpuSw!~#H

literal 0
HcmV?d00001

diff --git a/de_novo_scilife/pdf/theme.py b/de_novo_scilife/pdf/theme.py
new file mode 100644
index 0000000..86e259e
--- /dev/null
+++ b/de_novo_scilife/pdf/theme.py
@@ -0,0 +1,57 @@
+"""
+    theme.py
+
+    Author: H.C. v. Stockhausen < hc at vst.io >
+    Date:   2012-10-14
+    
+"""
+
+
+from reportlab.lib.styles import getSampleStyleSheet
+from reportlab.lib.units import inch
+from reportlab.lib import colors
+
+from common import *
+
+
+class DefaultTheme(object):
+
+    _s = getSampleStyleSheet()
+    doc = {
+        'leftMargin': None,
+        'rightMargin': None,
+        'topMargin': None,
+        'bottomMargin': None
+        }
+    headers = {
+         H1: _s['Heading1'],
+         H2: _s['Heading2'],
+         H3: _s['Heading3'],
+         H4: _s['Heading4'],
+         H5: _s['Heading5'],
+         H6: _s['Heading6'],
+         }
+    paragraph = _s['Normal'] 
+    spacer_height = 0.25 * inch
+    table_style = [
+        ('ALIGN', (0,0), (-1,-1), 'LEFT'),
+        ('VALIGN', (0,0), (-1,-1), 'TOP'),
+        ('FONT', (0,0), (-1,0), 'Helvetica-Bold'),
+        ('LINEBELOW', (0,0), (-1,0), 1, colors.black),
+        ('BACKGROUND', (0,0), (-1,0), colors.HexColor('#C0C0C0')),
+        ('ROWBACKGROUNDS', (0,1), (-1, -1), [colors.white, colors.HexColor('#E0E0E0')])
+        ]
+    
+    @classmethod
+    def doc_template_args(cls):
+        return dict([(k, v) for k, v in cls.doc.items() if v is not None])
+    
+    @classmethod    
+    def header_for_level(cls, level):
+        return cls.headers[level]
+        
+    def __new__(cls, *args, **kwargs):
+        raise TypeError("Theme classes may not be instantiated.")
+ 
+ 
+ 
diff --git a/de_novo_scilife/pdf/theme.pyc b/de_novo_scilife/pdf/theme.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5b90fa15856e42b5497d0654379598271b8660af
GIT binary patch
literal 2512
zcmcImdruoj5T8368_Y8W5@^!q@@V2l!e9baiXvJbPC%tLI@3h8(w@$@Yy05N=iV-+
zMC6~QAF$u0AE2F?#o(xawGy|w+4;@vYaTmynZG6r7v6(<0GCe&`|t2o4I}~pJ7Iya
zA+jL0p<#nXI0I1z;w&_>24^7}fj9?^oWUay<>5%=K#oES$LHY>fR6w_HO4@ULcKUf
zd%QzZaPZL`8Ou`dyufJdRCki}g|k=MDLMU;Q`bpw((zA~>^d)<PT=_3=__3d6K9hr
z?D|^1z-3lf%BxG|m8J3;iwoi3_!KSNMb9E>OI`QlUL@=K97#t?ZIoG5hTWiJP!3U$
zL`kX)ruonG1kzpE@=qguM5n#-*f=V-@m1g8K#3l+K-y$$BLiU;#24ho4eBh@*`~|D
zMq>m*bWsLG78Yd=Hi#WTHgDJ*vZIF0BRgi;QDh5-9Yb~;*O}l&3b5Fi1ThYB3g#Nq
zASOV33F1D8X=vd|W<WR~9)P&T@G}q(K|BI6%Ww|FV-R0~m<Krn$2evL#BC5yKs;p$
zj=uvREgrl8q6p$^md_5fi==JaG@y;CFb8rT#OEA&3mUf>-W|X@Anp<0SOB+Pe1;Cx
z1tgJd=^y;G9d<Q2HVxaI0j8AEN$*li@n|MmC-J3V6a{<I7h$(u)<a-*7(5>aYr|lD
z7;I=tf@+e+e$++|pFh9VBiL;n?7yytB;e%IlsX5MS4TYI4Qn;sp`+R%>t5BWM`{WM
zdot=v9R~i=b`l9q#vJTdE8CTW+S`y*l+54W+WG#qTl=xPtMUjR?W|DKw18WCd#$Y-
zb!CNGK+|bcI^YhzY7>dZga^0)IJV%J6LR<G$MDXE4;Fl|;lif8JQ;&@7A`D;EgOEZ
z;n!J;Vx3dJS|sz>4iN^L4HHSgjtI4k6|1UAR~Bh|)(4T&be<EY{UWl!L>(Zv+(Qp6
zcgo9s`Tl*lyj$_AwKp~IaR0DE-dGmW>n8ogQ$ZMoE%`6p6RqVzvCy}0K|GF!C+S64
z0zAwcwS+=tBLO?0Zt${SD5c0FNG(_#7|GrzcXu#-Bwl-(<K9EUmPWEKqnos2=4xxN
zm82f;S4P>uj6rzmST&N<1Pm`06b#kxSY81GM%t=+f|To%6GXmJQaQ1I?sOCFG^G=E
zmG-+jL|=)LDQ=X9N9R3RNz)|dC>p~l@Z?U;>ytBLZ_)+ieb4L4GtXN`)>v*;*(h_{
zPznZ8Q8$&HO#UC*>!pa}1%_7sPg)Ai&|LoBETW8y2^!?b((|~md0w1|(+HtUJ7_w)
zr`)^Pxjcgedu5jIt(xw+>DElQZn_QE!I4}*dVcD+Q@_`7IfW-tu6lkTQ?DbbaJsZ6
z=WsLfgOke&_uCGA+*o|p!E4NgR{Kpno1zaRW6qvf;)htc_i+{XArZ>1>omBTYwc3%
z(bZNfNOJa=m9g^N3U<aCziLx<!7A9(7PWb6!FrmR&Q52mMJl7(<P#MKPtTzsl|3{$
z@|&e7Y?jzeW#T|T2A^tAyP;Os9{ogVJZk<B;yCH@^+=k>GSGbPn|2O|@TJM5O)^rE
Nxw*`Yh0QKx{sAYXD&_zH

literal 0
HcmV?d00001

diff --git a/de_novo_scilife/pdf/util.py b/de_novo_scilife/pdf/util.py
new file mode 100644
index 0000000..81e3873
--- /dev/null
+++ b/de_novo_scilife/pdf/util.py
@@ -0,0 +1,18 @@
+"""
+    util.py
+
+    Author: H.C. v. Stockhausen < hc at vst.io >
+    Date:   2012-10-14
+    
+"""
+
+def calc_table_col_widths(rows, table_width):
+    max_chars_per_col = [0] * len(rows[0])
+    for row in rows:
+        for idx, col in enumerate(row):
+            for line in str(col).split('\n'):
+                max_chars_per_col[idx] = max(len(line),
+                    max_chars_per_col[idx])
+    sum_chars = sum(max_chars_per_col)
+    return [(x * table_width / sum_chars) for x in max_chars_per_col]
+ 
diff --git a/de_novo_scilife/pdf/util.pyc b/de_novo_scilife/pdf/util.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..53fc74f7af6ee586095aac4d64de003d55a41952
GIT binary patch
literal 811
zcmcIiU2oGc6unNrMqvWPCR7Pf5HEdMvrar!)dX5AQGvRNN>l=d$sBJnq)i(;U73=n
z@edF`gI~uV;MkqS1J6Xc$M@s-`uNI~-`mX}{OLr{&Bw65fawQ-i0D0#(RVnEB9A8R
ziY<NsG<|%zQAua<t?7c7xrx)P=fCP4b^KMwpO{p9jl*2ax!?C=;fKawY12`uf9N(0
zLL+<Va<Jb$*z4}^b)UO*Q+Viq3)X-g!SpWxbX+hpM1}KP*Nk>bkJhnG3{F2&S)uim
z45Ok#C=O0X^wFcTN@b1KJu)5@HCy;nu~%`7XH?e7R9)A)O%)(jsdbITdHv;trpqqu
z2~ADSX2B{ObO9Y6&e)-HiwgWuQ#5GzlSj*+H**>*$R=%P+HBW>#!fGhb7L<j=lM)#
z7=g=blc7D>i$ochnT6|3C7-n|A2Nxu^irGKHVV%Y$uCu8Vsjrc*j&V6rujl<T%?KH
z3%XlXwCQE@woRhu(uvUdY_knxBU-ls<UqfM7c7(rgSZ>4<kgi5217nRes|1IMkm8J
zqwz3^B%i0Nlxv|9buRyj7twie^F+)p`iU?Re4`vnHtMy#o4yAi_K<C}7JJ0*u*a}*
m+r9&19|7l4DmZ_87ye(3|3~N+&gjq5C{N^}JzHB9b?*;?+O-(~

literal 0
HcmV?d00001

diff --git a/de_novo_scilife/util.pyc b/de_novo_scilife/util.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..985dd66f65d9176eb085f75782d89f62129e3084
GIT binary patch
literal 803
zcmb_a-EPw`82z07jlu+oO{fx}AZ~iIcAa)rRTF5fpc3jPDp3j8Oy-!yEN$A@>B^K`
zjaT3;;OTe)j@?RJaEq1i`1|w8@h6vlZ`XgGKOYO)EQb9hOg{pIL?3{RzQbV@dNgj9
zZ1E$Y?qg*}#jW|bx(i-qCQ8z-|E6`^@|P`tY!Y!51(}vJzvoB74~)Omrlk`9$Q|eh
zMs~q+u-`e@>+J7!4qdt~Jn-Lv)nLak{R;pcmy8Ti?)=6rqn*N|Rb&(Wv(HqNXf+|j
zC@&F;{j(u`@~9|NQK40rj7NFJ7QUA3Rb0mlDyn44j<sr2IhxAUxW(qY-eOFXMF;kb
zCZ=Mu=qhS-2^}8(u|vfc<@lf`uhH(gM~feuIgJ!#leROhH>*Hnrx(kav6qvxY${WX
zz-6^bL67cvtc=S{gVnl{O`EnKGKtdUTARBz3NB*FuT^Lw^AIt*xrl;P^SMm9NMd&u
z?QU1$n#<-bO{`|piO|_}eGFqGS~UUWK);69o+}ZwqfUD%Z*Ek(Kj5R2_b2>xcsh7H
z91Yr`<g;X%a4l4<zQ}*#&GRt3;1WSB_*ykotd(nfFZ}>O>@nMB4fcfHV^3kDrhNm(
iJ^;?cL~#D@KK#G>{=djA@b{)kn8otQ{#RQCRqqeOOtM!1

literal 0
HcmV?d00001

diff --git a/pictures/NGI.jpeg b/pictures/NGI.jpeg
new file mode 100644
index 0000000000000000000000000000000000000000..e71176ef0860711fad0156c253dd808398e31690
GIT binary patch
literal 9957
zcmcI~byOVBv+v**2<{Nv-7P^E*hP174-g1$!94+z;IO#c;uhRBIE3H?4;tJh0Rorr
z@Atdso%_zaf4=TMGpDLQQ&n?LRZmyfJpOq63E)E%!HNJ95&$4Qb>MLc2nDdvF^Gu>
ziHV7bNJvS@h{-9)$;l|ls7T02$tX!*P*ReRQPQw6kW(|$5R)<R(=)Sk@$m4F(FuwP
za*48W^Kc?zV`Jmt;!)w_Q*p7-G4Tlrafu2E@p17AbMbPCa&d79@o_(0|J{WCulqmb
zV>dv6378@opdb+d$OK3z1W1p40M(O?X#ZIHe;P6hDjGTx#?z!4K7jNOg@%oVgNpJG
z=!pbS3D5}X`O%4Fp{@+(@8k1`Nd;h9+VF&$NemJ}Az`<7JyT?iaysrGYJ1l=PMIu2
z@-5z+Ge3!=eA0u0^gon62}edkf0~ngvI!Xp2^Ad!6XnUGf2{dOHNlf6C>jy5tZO{U
zlNR&$rwoEZc{M$g>x^=4!b}Pv%i709;MtRH$OI?^fHZJJ4g_N&w>r&XLcw66U%?c=
zFTFoi$Y2Z>^u4qIUcIEHKfp&i+dX@(AI#|(AfQ4PBv5Ety@v$R%E;!~FmI1`_q8qF
z(HS1mwk?~7b)84#i0TzdKQq3R>S)-~xlF%S$)cPx7b!%DKwVHG$_=v#w0N;L5li#1
zaS>QceR4V_vPHOqvi>R^nhbRXkU|Tw^^wuzh~w}nr!8sfp%qCXa9!UGmB6);EkX~a
zqz#xnh&0w9(g+sY*ILIHi(~vcsc;lbaXfIJ|C0VB81K+|gmH@YH=w;~k>pCgJwkpx
z2n{Xlp>ARB&ph5~&wt5uw^>ydIC?I1!1(Ps{p*MinBoM9F4IP7i@v7}Ep`G)CR*RZ
zoH*2cU3;0d4d=>uHIPLeh$ibHux=72SW?x9bA?jDynKgWPeT|s;)7@ibRIq3moMFK
zvUlH(2<pAX!QqYR&>>7&=}ugrs*Wu6Qf#p|OcvD{Js;RuRi+~W)Mes0zxw;;uc3M`
z8iNKu<pC}wxL*Kb(QdT$PJkB^<qPl(kiqH{{fs2jjS9#-4PqjDD-|fz#9m&8{UcQ_
zy3^?IaZkTo<ioD0UvFCho_~`-co4<-cXz@Wib#mB>|G%wjm#c`>R9AKJ=qo=nG{<!
zKK=;&80x(DyL6j(#nF4~ePN>ZK;BU0%=Qp8Y^G}JcHb3ZkQ+6Myk9t4+=I=d^$2jl
zf}Ntq<C7i%E2H8d<cDW=cj_I1)@cl9gY*>E!Rt0o!4h?{0N)(yyU!@Rr*XrE%u_j7
z8lf>v_>!vj?%Ih~;;**ex<`2y5Vs_^(%Yr{AnSzIxxW4djo+im?RVIhDED&ln|#H*
zV&(U{a_m!H7pLT%(z1o|BLJ)Z!`OFKamKLct9{pscryxdHTH5XZWL7M--%Vj*w$2+
zf`_@1)7HeQB2fk@F~g@LZ~-}DMW0~XMxc69h2WFbR1+086jn26&WHAOQII1yt_UV4
zi}f7<vpQW6%_cdjD8IyX7_1tsm@Co)af#4)+VY4H<8Kr4ck7t$&V3tcY}Vc6;q=Lh
zj5XdCX6{LmFcMy#8nTL!HNef!$<M04cJEgu_qdVdS}VFX7)jBAg^Jp@5WcxoG~h;M
z&n-}g#fGX+o5a&((fb$I7Z!IPlk2cKG?Zqi_^38XGv#YjS@nw=9}{o)Mu7Js&m(Hx
zw|K4_&Sr`b>o_(6!+YzwH$9hs9Bu?t&6GW=T=HNTw!7M{Qy%P9tuL2y=I34K&@!Yq
zQUb58u2u~UUqD6!BD4GZeY?=4i8bDCtp6NNH~sVM<Mg^j?)&acKfb-FsWTjTQX{RY
zk#E=~>+!|rd%@(>(wV~n#jB~a-FF%O&+fl>RQLT*gyzz3Psm_7<~0!kUNBJ^+|D1w
zMZb~0DX##}Gub-<)s``*U<@^7<-K51N~&BmW}y+_@L!%v@oSr^y{toC3+leU1c)w2
z`?&7r_yb;yrpCUkYXI}GCk%@>0swDiep2-#ZxKo%^<Grb)hnAwpnC+$Xg}m{bPs8m
z!0W~etM$L*&}w`iEY;hHu9}WN?vmae*=M`SiTiKvC)=SLvz$x68R&^LqeuR&^m4dc
zj6LtlE8Im*+IPmAWP-f)!fk4=tc!R=VcCB@c$nSxB0zclRdFcJ<@B9^$|!o`0`Rte
zY!=4)2sm~**t!RmJKxvFI8#0XdjyrwFSYi(Uyltk`@uUNzV0a`1?{_LN-t7Nz3Q@z
zl<x7(xgCv-oj(Rok_5MGxIIspK>B>~7t*zLLNr&`7x~&qm06>8f47M&2`eAnbam((
zFV4hp{ikdz^b)2@YIF=d;{9Nux#FR=Yx@TxKxbF!8_`wDy&}*V%zyj*e7ej2rcBsZ
z>3IKC@U+8EN@4G3!PJ}Wio0nVcjuKKofv7W9nKpL1$2{AJ1Yaeo*7Kb)n!WUiVq@(
zb`94lk3hlGOLWyenC(Kc;`Ll0s!iunB4MtIB>UgPEF_Lhibme{s{MhhH!Z9?c=z)%
zsAvrb27+g{ib%aFx$wR56%_Y9ES4~q=MOzAbl>PPucjWA+KR61&2fCTlG>?ad*@WL
z9~Gh;EofKIn(k9L!-m@Oo?_iN$ZInA#sA_`f`WBXz-cfhQd(B#Ym8iU;u=4i>>ns$
z7N-or?&&QZ#!qB`?q4G>yDs{Qu%4wTmq;GqKMT66ywyaeNrZs3@7SqEx@l@==-E3M
z8pIO|NGh^J!A0pX(s*HR9cKA$qI!Z(E*;QB=9_iyZ?B%G;U>?si5+ntqzjrH7Jp}c
z`8s-a`R~kqRQ6EEmjBm|#6Mfb75GQhD;h(dpcBR&Y4*H*m&ukikN9vblTCGGu^i<R
zZYwNaVvlDu#n;wpd%MKIJEiegEQ^$Mh#Bn9r<a@MCAsK;A(58W7Z_le09d={o7f3>
zX*QT-$cn<$X66ZP_^8kFLd5zF>Zj(oic=^=amnr&)|9z=_<EPb<F&I!DPCOo>-Bd`
z+vViE7rydnXigm^G&OGlCz592xb_tg^VBm$h+D9Sxef4K7RF^3e3@QZ*4h27&S3<u
z+}ThZmv_t~gE=9vCoDV%pt&%kT01AJPm7KUC?4imthifDPSC+r8L~Mz*s2?esj&<x
zO;lYDsGQi*4u4Z<3sc*Eohv(b$cNI{U#|~?F|T>%H`t=dSjJ0k%#x4HtS;+CPE>m;
z>0T}*oEoO7(bgNMxL$FnJABgfh8E6XXYPw{YH*~Qkb?|xN7Ds&TXja%dX3gtY#a}y
z#zzo^9F9GpJWgUwy@<Ro)5j6ya~1ZB&N@ANw<*q*YcN}z-<8Tywytu>rpf-V(6HsO
zuo>;tHy?DFQ<O?o^wlL_F}Z%TlThci?~derBVqN+%mc&tP8|s5$mL0A38Bp7+acgO
z=RB}0dw$qox#e0EV3Gj8gK4<tO%5n0TH$~(wBryZ_=@Bo@SyS>;5By8O$z^JA-|dz
z1*gEEo8e?n6Y-RMgxm*hEfBpn335^u@y$TiAHF8s4&AvUy}%vKqHw*0qFA+Cx1Tok
zrV9IWn<e=xt`K_LZ6qzo2M&?!cyjA69qhGV;rq=ohRQIfC^bRk)J&|yU^eb;sTzjk
zylrBO`oe}JWH={Ol@k&%P_5K_<*5i$F?E)g76z*mf;+^yRh;ffb_6?1`U2kj{qV{1
zTzhEwLnc5Zas6S`-l<`$nWsWC{`0S52Uu%om6JsAnK6!zK@&|S<=KWr3TAf*0UVEh
z<d6^98r}Fc#xr1vy|>r_X+9nr3WPDmVU7d4e%^d2pLIN8fcC%EXcEu9^kw!|P8rW`
zlA@7!E(iE%9&NANH}y0V6TDwwIz%>;DQ0*Uf0e5;ajWe}O*_6|xmUQgjjjemzx|;I
z_;~Uu)hkVOgLPW<C9{&~mL%@lbG{P+=RS?1R9{Gp^N=T1C#`j?>><YXieZ*AkpCf+
z+D=q4NZD%Eo%r?LXsZ644$GFKCEXNZgQhNkUl!4gIsc9vcz!{SWNYHn{6smSc4NyO
zeEAF*1<T+Bb8;KGHe!OyGR5hU%O2uT1rc`YoJj%fXAv&cUNVH-Z2^S+Ul5fbk5)ns
zm!k31hS+e<X;)j@vFbntyJ_v4xVB@B3^m_Eh1J-=W&fczyKCkx<*X^|McZQCPlVJ=
zfEI`)>Gb_5+eSP4g18*FlVrBGIhr_Yd#j_l489#^iDXZNqtK;Q;+0`ONt#{W#;!7>
zZf%%uTJ-Xy@C>9f%Wn?hv0O2)zEOI_4K{>-eJ<@j32~H)?>>s;-@VSyf<x!iOm?{4
zLyhaV)N9?}7f;n@48xE(9pxg`#Pg|YShc_>8X?Eb<t(JE#U%Y+Veg%rM2L;B$!s;t
z^k|ypSqZ(59K9k0EZzJV9FG|6v~$IXA!RaZ0>tCR*)IpaX6-fK;GCPQXPB(p8TBMF
z*_oPbt(Mzxprpn(5^qQn`GIY}?Q#$4)HfQPg8U5a7Rx=t!?aL5Nb-IaK_z#G2crsh
z<mxtlgZ@xbfa4mcwz1_Bd?Se2U;M$1mm6{b51P31$QeOlJKDTpxHfE5{iW&%25Xg=
z1k*zDT!R(1?LZ=tUosmTCOK^?GV<jYcE~f8_TS(<L!zN-{v}ErBt%vMbiU<`U8-+2
zE;8l4#6aWMeVB@RJtA1yRn$eYX&ZmbQUdM=E*?tG470RXOy4o6>=x^Xb!R!9zQ#-O
zi9+gr1o(qV!{*-rB~J+p5G+=a`&@J?gZXN|s&=D<%#bPFn_{2~v|{4t=g9M-C2fjC
z-AR(uRJsPVdig!0Z8vr9DACVm{IKzkOL8^s!YhCXZ$YW?lU37#`F{YOBQNVExjRSk
zZi@U@))c-rLXYwm9Ip-bTO}3~I7YNFH64&K^Dr`%#<qoy6O4sLOVo6T|B&({@l7|!
zNxa4re9r**0@g;10{{!2yg#rHqQmPZn8}!rIm|`Fu=!B%h0vGo*=krr1<zPDN$DG8
zPi$n{ISd7)vS9vJm!GPA@I)D;5x5J0#(>C*ubdoityD=le`{S|QQ2u7(urjzZaK%e
zM(oG@(AS=3-=K9=Ed;aOL|WYfxftEpiL${8s(9QTmL-PWK>oX>he{bV6KS(Ri&7vo
z3}1ZGSfV3T=_S3PqZuCIUk#U{DG!yXAi1b<(!xsQ1i8}4;J=pg3;Hdy<vfMbZ_OC0
zmcn!!#^!AhsdmuB_vy&75=r6L*NUc&F{4{+(i0Q?AHtx@<9m8LtZ5!%_L>OZOjznw
ztODqs_k0ub{I6;G!9lnd>%G^2eOK)KJTh%WmV9mGU#w&ZNt^zM^5PmsL|;qSc3AvL
z$o$q&P0%3Qt3B>lHj0T^t{HxpMp~)`c`_AOgZe?6I`gYn&ZDk7dP_?=+=PF>%Ukwn
z%+e0^q>wuBplirHz}_8oUMc2)eUp9@^99)cPN-kCDGUzX+358Zly-?KkZGz#z(<wk
z6lL@>k64QUH=q2*xi}l`Fu-Rh<^Ucw9WW?p*pINhlv{Q(p&8{9=9G)!=wT^p&dPKv
zWHgc_^-`%SKVo@n@kX)aO4rF|G3)~eE{GGFXenGwr0QgB{j`%o2CU7T$&q&d<(QZ#
zqwdKTZ`7pli>uapkM8PoN8&G=;|eEf5Dn8m&EJfyHX2FnU2u%etVQ$BZR!6Q+<r0i
z$?3uEe%woYKIRcHT(5~8k&atx%k1JiF0H0{X!vURA~A@*$xA%Yd_~Jt@~{2J0co}e
zd8cEy`_T@vRR2=1nUfFf-sOQK17;A#2Fyd-!RD^XV<*_s{En}RDdtZ`@)<Hf2`tQn
zeHe^Jh{*&4;sHVK_)UO_>bVrK%ZEX&gA`2ZbV?zEo2boAHKn8<%vMjZD6+|lme;Gi
zBnMoF0P0iaAP1o&5gF#!7M$-ZkW`AoaeNinkcn$F-RKf^g<HiNbSm8*R1asel8VPB
zZn`*kj(i!|uG{`L?HC=-`mvQyohdN|*=nqix&){MZH4g8RX=<bYDnO@(E1jO8Y+Q4
zL(Sp4w|gAUhp4L3KR;P*5A1puRnods9UF4WrkmxEc^?1yIJoA@j>9+FmtjA^6a+CH
z57TYjtEuwM=%DfE(Fwz?EUv&Nn2;riOd_K~<Opif8tH)UpABT-zFDr5=SyO{cy=%o
z=Ajs?^f!W4^y;cqMexW3v%0zxPxLMK0grJ#$H5zJV!9X4LgZYA=JQ{ADYh?^-d4Xm
zm(m&Wj(6E;s^%Bz&n|K(FDG_YXSN^hHSA%;{ekxxYS$#He>x5N8NWT-Y+=hIvH#$(
zRP(cVKlY8~XrwS13Tti<yZH5*Z-EA)19Fc|{;K{SZ0I>TEAn$-^FxR94AL}(;``i$
zR6Kf4a0yHnA7ug+M|aTLT*w1B7H4zO)=&o}cDx>{EF!IBifV}a#PlmP*T9H}E-@tC
zg~>$<E=oC&EdNnmuuY9ALZ}oPq*GO6J)*y^JRsCBRm-k+iV}w+aOd6q{GY=FRHR>v
z_D*Ah0-8&O?9B<P>{=!pqQ26k<+h8N9=>T5Eu?x*@qQ7sw>$T(tqI4G9U(6kIIc%K
za!;z9N6SlJ?Rq{7dgD3TQSb@!Z^Wn9gV#z9ZHsufMfa^;2gm;#Ep&}mf4+dN6`Ed7
zEka8WmSb$+GQbIHp8r~_RSMDYDXYnWO+cd6HYC_m5r(WBz@}|30%?!{3a~R`B5^as
zAt$DH<D$<9@Bv1wg1}2ar05JCKs2~}mwtPPXP33KQ67av=r)+?k1DmPCySm-IU_^p
z9?|1^-kagdrlpu$qDQO7_xDFfcoMcH+pvA97Avi8b909f!C64TuV~-A<CxY%Y0vxb
z674yTKG=bM0X;k>KG;SY2Uu9SFqeE83R>@TabbC93vPC9vFwpZ=DG&%y@T$ytR$7n
z(uRmaFx_Rp#LhZ1+ZiaWN1cbay?8Q<R^VIf;tVONmsSEqv%kH4ywx>PH=U%WrnRKG
zsR|*&1#Yn)PV%UoWD0G?VyoI<FtD;*LsUgB?z9bU#(Ef)!KXJMwe>z=tyrm@nj1nb
z-$aNkalVqCwvwj>Jc`p0M5q(BvAlDYz~w-J+}GwvNUX^Y4Hiqd>G#jjX&kY2WzKKX
zv3uTZ4|b>%=&|L%Vg?H{6u@N{dL{Zgpw>%<*>9$fs;yq48p7Tw6l>m6DJ!G0Bk4%)
zTb5PivChdhZRTyqwKUW)bs8ql1xAkx=o+Ovkd&Qwi5g~S)<t^mY%anRX^*K!W;a$$
zs|1Z6(9>FAMe@#ZwOXJ!He$~>5k;#Q9<_d(w#ZW$6r!OEZ7?7EqMYn)%-Z9ko0UDH
zgrP9NR5U|Y8Ng6k%8seVQ!*7%wW!r7Sp^YLt5mnnh*bU<ZUm-|yTE%+VJq6kaFI>i
z)I97>d@WGW0+uEOMZRXoD>J_lu8x<H$%lzDd3v102nfvNmQ~umm^se$qPR-%9cnew
zpP?krHxetFdH2tU+7SW!f)a-W;G@4&t4y)Va8xrXOO|ctV3+fXp3Iy4J<+MA(L!A3
zoCQ^v8naKg6{vt1ux7RXJ*#LHN$Tkm6#r%TdvA-i3*%n#rP)<%0F9owPqycQ4=>$G
z&m%w(tM}J@cfVzH>>n0m)^F(z{{2s#v@zwG?SoED@N>MowKMW7`L*XtI^(96O-92$
zz|^lNQJObjCuiTKq@DZ&5&_pI3aD7#Sne1(hquFxvF5aIliLtX*hIkyZ=BC4(he*h
z%>DHTvh|1(ru?#860u9rKCK``Fmm=DlEOLd7L}ZhW)&tXEimE1D?q3X!shS30A;41
zv1Naayr9eX5gD`v`{3E~z+1S*rub$_Cki7hM_@g47pNhV)Enk<x6HiPvwK^yZf=D-
zBQF=Z{TGVU1EaZZ#py~}trbEjzQoi){YR&g;~m}D9qtVJ4MtL#>`{2i>U&GNR3%W2
zn`j9}b$BQXcXY$1y~hU!mHyy!c^zSWgzuZaLDNHya$yRXe0kbeG>AdgwgFPDb^MSH
zKj%IiU?ptrJb-}ku+(rpElGHi1!I!>i#PPEL`wr2(@)tBBjp0$8X9>ZLdCQ|j4u~J
zOMCqVRuEp_9i6hVNRI{kcUu}Y-1BXPusX1eTM{JRkPX&$qkTyp!bsX#XrkilwXdQM
z%6hBglzxxn=jjLQos*(t%c*!uCoUCTy~Fa8YuDtyCeG1oi)o=G)3_kl=l{^ck~0Ka
z_j?J9-%Dy38-E>)u5}8m(5<LUcvs0x%j?GAZ)5}O_z}4jY1{UdEv2e&eYQhN`L-fk
z(tarYKr4)UasXbQleq=Qz9~*;Tc~Ng7V{!4UtBQq_ZulqTbOyZgf@4=rA?Lk?Bdcg
zq;?US*8RoUqmWKFmnHP4t^7b78g8_W#@Hv~rAj7xD<Rm+$(jKDM4Ljgj(39Wbqp)U
zhRc754mo```AQ8C2r)DmLU%umv&&O>haX*?+pf@qd+&_7fFZ!LY_{FAry<MJ&ap5~
zb1E2whS@Jq)I?9cwb|Rsnd#lhs%;{^B-V)&e4VnWuJVO+0m-nhn7|m<KGZctm8Z$t
zU+ArY=Sb^y!C1V*60%8dJ}<jAmionUL~OW~3tb(z)X&ah_^c}?CVD3LX7E#f0<Uy(
zONGeF_xw~De6y@9mEG5^E87tv?~t+sG|@8D#+j1D4y@=om{2Evvt-rB<Tk*S;Jz%Z
z^4y{<%vUjSIsx)Xva)Jf{LZgFYdMXH+30?4)SYkOVdeHy8`#-S2a{2EcxzFM(!}L|
zR*z))U4^`4MP*LX6UVKhZu8ws5Jyh2g3VOT(~@W!ghN$XBifaQ+qihL|8r`Y!Fn+x
z(Y14bmb)<BRE92Oej1{rIWRaBvhM*i89UcoQB5%_P)#e+t*c<p0xR=~yTdv7$rZ%N
zuNJrGycmo}iw$E2{qVNy+t*{|sJu=4+P{kkbQEqGFMM_vLD`3-Xu;xT)lxw!Sx#n~
zh78{zcG8af!>$B$GPL$Kd)amH!vb~GlBqLR>M|s9rHR?eHmY??!_>yNj>m~a^j>}6
z_y0ImABDHkWZ#;Z3GhstwZ_g5Np8Ho@^8|+{#vteQh0SiQ`F`0qB`!s!JjPa)>IC5
zB-H}<rOoJ0!gFTNSIn012H$q3%#V4d|D6f!lwHG-F5m9@zVbh?jFn4=a10lf4Vi=8
z7^_H&$N-1V&bKu~mG!M9fBmCly%>iW?{{O<MZ-n>_f^7?5Jk<}Zy(G7QyBto=Qyv!
zlzSHD;uFS=E|_Eh*jpgs=gE%k?ZI{bc;dbMDaa}IF6n}P3C<okh0DndCNLSeOJvNX
zwlMZH(a*4N4XR~mZLn4!J&8RY-(5a$%eF9BOETxAhRVxN9wG$xV~ud~dJTQDH-Fr=
z=<x8*4;>|33OoXDgMMuBJOV?!x3PylW<PU6{tmTrA}R$rmh9vak+bZw?EMfi+SCM5
z>`|w8YF=LVc&=`F*olULW<=08OKhn)%_(S$y0=chjGMM8nl?w08`IZDeSFIcoLcQK
zmMz_N*X;CFF?eTMm0RI1gMx9<4$CUME<dK<XGF$hH#W8;FG_Q?q!mcjFN2<<atFlj
zeU;r@nItx?VI4Vplgt)c*;H*0T2^g(1YQdA@(5Jb1nG$DMPu$g<s~uF(~FXEx4yhV
zm(oX1yX!UrEp9|(c~xWikR)gG@g|u|t&V`7ihd`vaT!gGk(3^Cm5Pe)NQl-q(%IxM
zc;H`jejtl)SudzaSEU+^JG<O4ts&2?uL^eT^?f*%VNbn;5noBj3c|yy)He~8m|XJB
za1E>ypml$?zE^Cj6zbdj(Pt;Z76mrsFCJJp1)71g57JY$G$)&}HScu~^$&+ZjEqMe
zX%db5riwGw`iLhHM{N+(mn4_@jXLo~Nzr8GC#-{^6%A|5`Nu^0Inp_|o4-?@t$o#6
zzoU=*9U0gH55#}4d$sVwUuiNZ;QXx#{f(Irzuoz-LZerhGI|<pj{sfNA6Ki{?s=uB
zBAC??FQV`v@0tggRKHJu$nW?lSv_aUDsW|x`CIf)&V9sP{v)6g$hZ{|KyxE4m@a!6
znsV}#=!7+G@!o@i1Ujd4<VmY)JX$GG;8<cIUlcB9IbA$02VqoPI@?DoaixRbiVkdS
znfRuv#TqmTy^wl4T(@2I`P?$@G(Hfe*u$gr7L4_8dfD@>N;s9iP^kgkN;D)L-@8u`
zqHNv?ZCie0ERqbggL)m|tadqV@ww(!V8~~7CS=($LIJ4_wqct^&`^hNLHzKZ0fMwU
z$zC6kgwrF0iW2R08cF`+_|GA09@<TrW&PwYyPUe=(nPNpH#)xyg33m3B1doeZ^z8c
z|2(<0Wb5|}r2xAwVu_0H-tAp^O_7Jv(ue=QxSlWzS#@Rxoe4-7Ui}1ZbIk5`ZwvUF
zO$zTWyf~U2k$e9L)R|OS4wp~yUwm9(88-s;t;oF}2u(i>=B2|FX)B#+=f&Qi4+HrV
z5kbl3KzplC3xMH|l=t9M-+hI2SxxGO9mU@*Q}NF9yKV7$1&jN7V~2xNM-RjI$8yra
zL05N-!-4!k-;alU9XX1ORhKM<l2@p(wJ}kzrt|oDmA)B}DctML&-)BGP%~A<hEUgb
zel$BgxucG8`cL3<-V>b$wV3Ay@!Ll{0@1D*;6N5L-iaKHW5$cWDpw%Fg{TRzd-L)l
zDH0@0lkNzWa-d83Td&TD6im>?qL89^Ovv}m@EC{M^q&T*v6AhWk&EeycV9VhSd!xL
zIOlF}m-)AL>q=8Fx;?+g2|Beu)Hp`o5JJOXagqX06%c#n3XsqbZ>i0&29#0Z+>$=`
zDPUS<F}jqSupZHzjvz!5*G(EvirF+qWT?1_zl-n{VOUb(F!w!Z;z>`a%cQwyQ1!B?
z@$gqZ)^zdn#|{cfEP=R{yEo@Y{BE}TW_aD-ndEwK<2vnQRxrKqb+PO@cMCDv&-wLK
z%==FmTM7{x@CXWo1;dK9F?Se*TNT~H;u5_P*6Sb$kw)4IRhl-U7m>xX&Qx0vUnNxi
z>LuODM`Kcu3Ilb}(!qhtU`ElPLbDgm7h_yQEgOSY<NSEDDe`jM-<)5LKSDgs=h<#t
ziG`n~_1~PnnEoY~Zkw3i+!U`6waXbTp`bJwZ|7EX3eNf^w2|A~NU1%NZbZaRGCRTf
zaXd<SFKWIcB|a~{-pGSqS)cA=L;Uj(%deA{5(%2bxPp1;nXjMC7G5TH&AjUGrUWPI
zV511YCHg!t5D8N7SwYTI_gi@!n{;kGj#BYWwJda@R$cBNMa@E*yMLICKKWWyDxoxH
z)@nMFP8IUx6b#l?$R)0S5?AR`d+uoG!>7{rdGf`>_xl&_9*@A_{dmw#4t<+>9la2X
zCSKvR$4u0Rm_^T+tg~5MtI3(sxmc8p6zir+)A9{o4HInDe7G#w8(&d@(vtep!Pq4H
zY~nz?D0g1akcDWIwL;OfuZ5CI*^m@ZftFxX&GqFx6#A31a+IEk`{SH8+RrLAPN>Pf
zg};9|l)EHu4bmvdm3N;r`bWuk`Z#hmxe3m3E7Iv8&3gU-U9G>%+q3tf-(CFk<-Z}H
z3M;oh71-~5JrCmDra91PrG0vYHo7|F`S5*LqF3dsU0=ghW#9jYh?)T*f>3r2JaO$f
z*EUrN#z+bEQM#W~s~*p=*O{4CH`X>3yhjeBrcMtV5);|jh@hQusK1eugFsfM2Cf6u
z+y)uPc-^@SD?T^#B$%Uazj=R_ThoZB*r^0XJ}9Dxf!~(Cs#zN(^wnTpyiYlOw|gJ-
z_9AH5!C~&I=dLRgvzbK4n}j9BUUhgNDWb#uwzJw~@!*2XLvv|qIxyY;(_3cEed5Lb
zEjG<4S87GZ&oSoWauDc7Ofi~R^JoaU^{6dZGGV$943W4B-70Ge+pw=cirEf;*Ht<n
z1-*#xEyAWTH2TE#hgjf;BVt9)UKaL_DN`Ffmb$ZK<<Piulci^DA2a4nXdzfjl3`Wf
ze*|+{)7f@QwDUR-FIG@<yf!LYnwOK<3-dj!VsKQUNsDZqF>SR=&|!*6jK{97Ma<Bq
zpGnoG>QBReD#H`xl`$PfVVqA|gvBz|U9s3&J`&cKVv|7LqdH@RA{hu4JpIR#JhK9e
zSlQ+&ZMU<rH{S3Sa40;$$iuDIv%9nTy!!E}PLx#f3|?4LTvlU#nRMi2>mQHZQ{1I@
zVg0_bR|X3J*nlBCoBSnbT_Z4aKI6-`BE_)%1U-DV1ReA2Zku=FPj$xR*O;D;$<s)(
zh!t#|%@Jtn-wj*BCI2JaZwGla?!6tJdIY3NF{9heGp209U~HK39vI_|hb}0|F8BA@
z&qqLOlr1d%>13RD0#t5Z!>?w?p#^_WFMsoXu;^tm$6|oeJ*`o!NoOr(osg4B*ct?E
z+RqJwP;(}Fcse!?T6L`94hmb$8!f5?4f@#CBV*Fxzr$c8Q9<KuW#v+n(@sBxe5Nsy
zpu@ot8M6(BO*wtAb7e{x2M0?WWiDo^6bidCgbVAU&?M!J9R@RZ6VD|_skC=kz+rXi
zartoXX_Nu(*$fZY1r4u3AHy+7F?UhZy(pnW21S~SymH<v!f<A#aDJLUBE}C(l4YN@
zJ4eXQv<x41+x{#yIh*PlRn29^|1x{4H*0?#*!i|YRpj=YXsr5h-VA#akq`Emh2%e;
z##yT~4B=IbDaIV<5%{UsQ2-mU#bvKGl4WMNOVyKd*3Fy^-^_5g-pRfhbbG+eW`=zT
zF-pNb`NQB9&$#ux!t1QBGhyVqi}{|wSNg@m{o1+Vg02>9kIjk<rl#CpXxXpOZ(m*e
Yt3kB<c1yo3RlD(Zy~2;s#K(pI0)|D?xBvhE

literal 0
HcmV?d00001

diff --git a/pictures/SciLifeLab.jpeg b/pictures/SciLifeLab.jpeg
new file mode 100644
index 0000000000000000000000000000000000000000..70a56790a56b521d5938fef5c3cb657f2938cb4f
GIT binary patch
literal 1539
zcmY+8dpOez7{`CVZ7#XZB#f~!w`?|S{N`3x#~jsDG`9{)tq~8?Cb{M^bRJU3r6rjV
zT|89cD3@^LPCaCbk{VrzP~#-o**WKVdY<#Xf4rB^^M1dt^tp5zP~XFFX8<4w0H7=Z
z=_HU2Y>`*c+$yiBp{}W=h0@l-p||O1qj4B427}W#!szQ`46s<DA=cCsgTq=<EKLYR
z5}BlpqtGa1h)O0AK_w+6RTVW|BvKbL!J1e>5JaUyM2Kudl(n{YR7f^aAS#Ut*~%7H
z5Y?IrS<5&LqCr$5fh27QP>O&ha2^gC0WcH@M}g8VKo0<5aPXg3e+EKMUI7mK2imIx
zFc1!dD}eGa<$pN{g98YZoQAxiwZc{#y5~`@rV%AIuj9?ZyNcVi@TPXVcv1XL*{P09
z^Z&8{2uHwV1dEcn!9WE3ry&5sP;i8Xq32d>x^x0ify+1wjskW9<7C_1hh>eK2?PA*
z7f~shB^=gl{!^A|(WAHI>ZVN6F*b#E!S2BQ=A1mItWA7>eMEAm(0$=>=%Qz_=$)-c
z`zhs7tb)CPq~b+bQE{n2wdJ<aSzn!H!gduKq*a`UpLu1S#P3?624i!17^PqrnPR_L
zP{Ad_585pb)~8S5*^Zab_!^Kq`JSde3ZbfLX?*0)J@kf3?ZBz=*Ci}$EB!Q2B(I!b
zMY(smPtf)mjmB}R+1R&ZfBhf`Z#*G7CM1Vv`k3qtsUj?F>y#Au+kMHvBSfon5rOsn
z1?VEDvlgZP@xsKB`oX*pA<g0EE4Q0nt|F4~{>FjxeQtuUHA(J8OuhLNpF<<>5jnwY
ztbDIwn*~GaWY>jvB36q=c7`I2)?;J;Jz-4bpK%1$E?L=_H`vS-FMN~&E5Z#?(_Czp
z=+u?&qM_rl#g^^rHFTQOH}Q4H!-n7UmIy;bojsf<csW;eurpu~+^W{#BQ7O5MBnO-
znDBeS=lH@yoo$c~Jd$N&1Ys@>8hQUYOJ6Wdna}<v-sZi~$mYnq?NXvHtJ7x~i}!Vj
z5rPg0y3Sj5$0JRL*u1qh7u699jf$7zQs;HAt5-+%hTr?8LwvN5ZI4Lsz~2a69iQO3
zi`|bIXIp)scXZ4t;`{F`LT^q#ZQ@UTIgr;yHhq=;-hh2z8AcH`wCRs<YGz9)#H9gB
zf7Xrxw+H7~X8CGK&#F;NgAa>tM@n$6A@H>;lm|21QDdI>MqgRRG+{J)Vt<8GoB0?%
z^T7I6%<@XS==U9k%5&B)ox(!0QF?hp^f@_iauef3Y|ETO6fRWLvWwgo0H%25Z0UA(
zv#77BFX%h(mHOs&iEy7dx2*g$sp*mzyn4L#HxnsfSJ(5=Q~&D8*;m<r9Pz-jn*>7z
ztU}h<NjGa|)W>>#W}6;eA@@lkPr@WvKbxw#$^2Sd{^y5Eg9PMC-5K$xB^4>qyjq!5
z%)&G`E^;$EX5<O`_tiF-J<lmNak59_v}V9>wxfQHu1%@l-gHigLE&mFnpl>(T)K41
znLAu5*$YyE5EV7AbzO0ZuTyybprERBF=FpzLGTO%Y0>jNIp=sh?>#LyTevQ2=8Pi1
zG51Ye95&G>dLjn#IxNQOleHV>P`6(ZF>#&uG4OfV)Iyw(5c#sM{kp`yTx*hJw-#Aw
z_z=b4f98%V<3vt;ahS;<mwKyA%dI_JB?Q5{HLh`T_*!z}fa7D;x+o)~3N1%L5Cx;M
z&Ra3P&vbZKjrbvab)BCU-ne4X6nJ-f`^@F#BuwjEUKcASNaXU>QhftDR`)PcEg(6F
zafQ^|o8HOZ-Hg}i)tUKqUdZ(DZ9&FC!0syO)LApnsk3Gll~*UZz2X6?u4|3*kATb+
zW>7#JOQ&Y^?v+csZ%nEPf|}Xqp0ghydy}H$kK78xFm9qtOrOrSb-Q$bie7unoo>_K
zt7_a;bJ&e#Jc8mc#tXBQ<upBAJY6;aE4u^3YHjhlsX5F;D?C;YbkV;;`{pBNdK@W0
m3K$-=?N;Po-0vr`ov78BkD16aFK^{%nU|TDXTfAe`r>aNP=@sY

literal 0
HcmV?d00001

diff --git a/script/MP.py b/script/MP.py
deleted file mode 100644
index 07f893f..0000000
--- a/script/MP.py
+++ /dev/null
@@ -1,102 +0,0 @@
-import sys, os, yaml, glob
-import subprocess
-import pandas as pd
-import align
-import common
-
-
-
-def run(global_config, sample_config):
-    sorted_libraries_by_insert = common._sort_libraries_by_insert(sample_config)
-    print sorted_libraries_by_insert
-    run_default(global_config, sample_config,sorted_libraries_by_insert)
-
-
-
-def run_default(global_config, sample_config,sorted_libraries_by_insert):
-    print "running the entire pipeline"
-    print " remove adator"
-    sample_config = _run_trimmomatic(global_config, sample_config,sorted_libraries_by_insert, "01_trimmomatic")
-
-    sorted_libraries_by_insert = sorted(sample_config["libraries"].iteritems(), key=lambda (k,v): v["insert"]) # recompute sorted libraries by insert
-    
-    print " align sequences"
-    if not os.path.exists("02_alignments"):
-        os.makedirs("02_alignments")
-    os.chdir("02_alignments")
-    print sorted_libraries_by_insert
-    sorted_libraries_by_insert =  align._align_reads(global_config, sample_config,  sorted_libraries_by_insert) # align reads
-    print sorted_libraries_by_insert
-    sorted_alignments_by_insert = align._merge_bam_files(global_config, sample_config, sorted_libraries_by_insert) # merge alignments
-    print sorted_libraries_by_insert
-    sorted_alignments_by_insert = align.picard_CGbias(global_config, sample_config,sorted_alignments_by_insert)
-    print sorted_libraries_by_insert
-    sorted_alignments_by_insert = align.picard_collectInsertSizeMetrics(global_config, sample_config,sorted_alignments_by_insert)
-    print sorted_libraries_by_insert
-    sorted_alignments_by_insert = align.picard_markDuplicates(global_config, sample_config,sorted_alignments_by_insert)
-    print sorted_libraries_by_insert
-
-    os.chdir("..")
-    return 0
-
-
-def _run_trimmomatic(global_config, sample_config, sorted_libraries_by_insert, step):
-    print "     running trimmomatic ..."
-    mainDir = os.getcwd()
-    program        = global_config["Tools"]["trimmomatic"]["bin"]
-    program_folder = os.path.dirname(program)
-    adapterFile = os.path.join(program_folder, "adapters", "NexteraMP.fa")
-    if not os.path.exists(adapterFile):
-        print "file {} does not exists: it must be present: you must provide it!!!".format(adapterFile)
-        return sample_config
-
-    runningDir = os.path.join(mainDir, step)
-    if not os.path.exists(runningDir):
-        os.makedirs(runningDir)
-    os.chdir(runningDir)
-    #now I am in running dir, I need to process one by one the libraries
-
-    for library, libraryInfo in sorted_libraries_by_insert:
-        read1=libraryInfo["pair1"]
-        read2=libraryInfo["pair2"]
-        orientation = libraryInfo["orientation"]
-        insert = libraryInfo["insert"]
-        workingOnLibraryDir = os.path.join(runningDir, "{}_MP_{}".format(library,insert))
-        if not os.path.exists(workingOnLibraryDir):
-            os.makedirs(workingOnLibraryDir)
-        os.chdir(workingOnLibraryDir)
-        output_read1_pair = os.path.join(workingOnLibraryDir, "{}_MP_1.fastq.gz".format(library))
-        output_read1_sing = os.path.join(workingOnLibraryDir, "{}_MP_u_1.fastq.gz".format(library))
-        output_read2_pair = os.path.join(workingOnLibraryDir, "{}_MP_2.fastq.gz".format(library))
-        output_read2_sing = os.path.join(workingOnLibraryDir, "{}_MP_u_2.fastq.gz".format(library))
-
-        if os.path.exists(output_read1_pair):
-            print "library {} already computed: skyp this".format(library)
-            libraryInfo["pair1"] = output_read1_pair
-            libraryInfo["pair2"] = output_read2_pair
-            os.chdir("..")
-            continue
-
-        threads = 8
-        if "threads" in sample_config:
-            threads = sample_config["threads"]
-        
-        command = ["java",  "-jar", program, "PE", "-threads", "{}".format(threads),  "-phred33",  read1, read2,  output_read1_pair ,output_read1_sing , output_read2_pair, output_read2_sing ,"ILLUMINACLIP:{}:2:30:10".format(adapterFile), "MINLEN:30" ]
-        #print ' '.join(map(str,command))
-        stdOut = open("trimmomatic.stdOut", "w")
-        stdErr = open("trimmomatic.stdErr", "w")
-        returnValue = subprocess.call(command, stdout=stdOut, stderr=stdErr)
-        returnValue = 0
-        if returnValue != 0:
-            print "error while running command: {}".format(command)
-        else:
-            libraryInfo["pair1"] = output_read1_pair
-            libraryInfo["pair2"] = output_read2_pair
-
-        os.chdir(runningDir)
-    
-    os.chdir(mainDir)
-    print "trimmomatic done"
-    return sample_config
-
-
diff --git a/script/QCcontrol.py b/script/QCcontrol.py
deleted file mode 100644
index d6236a6..0000000
--- a/script/QCcontrol.py
+++ /dev/null
@@ -1,128 +0,0 @@
-import sys, os, yaml, glob
-import subprocess
-import pandas as pd
-from matplotlib import pyplot as plt
-import common
-
-
-def run(global_config, sample_config):
-    sorted_libraries_by_insert = common._sort_libraries_by_insert(sample_config)
-    if "tools" in sample_config:
-        #need to follow the commands listed here
-        for command in sample_config["tools"]:
-            command_fn = getattr( sys.modules[__name__] , "_run_{}".format(command))
-            sample_config = command_fn(global_config, sample_config, sorted_libraries_by_insert)
-    else:
-        #run default pipeline for QC
-        sample_config = _run_fastqc(global_config, sample_config, sorted_libraries_by_insert)
-        sample_config = _run_abyss(global_config, sample_config, sorted_libraries_by_insert)
-
-
-
-
-def _run_fastqc(global_config, sample_config, sorted_libraries_by_insert):
-    print "running fastqc ..."
-    mainDir = os.getcwd()
-    FastqcFolder = os.path.join(os.getcwd(), "fastqc")
-    if not os.path.exists(FastqcFolder):
-        os.makedirs(FastqcFolder)
-    else:
-        print "done (fastqc folder already present, assumed already run)"
-        return sample_config
-    fastq_stdOut = open("fastqc.stdOut", "a")
-    fastq_stdErr = open("fastqc.stdErr", "a")
-    program=global_config["Tools"]["fastqc"]["bin"]
-    program_options=global_config["Tools"]["fastqc"]["options"]
-    for library, libraryInfo in sorted_libraries_by_insert:
-        command = [program]
-        for option in program_options:
-            command.append(option)
-        read1=libraryInfo["pair1"]
-        read2=libraryInfo["pair2"]
-        command.append(read1)
-        if read2 is not None:
-            command.append(read2)
-        print command
-        subprocess.call(command, stdout=fastq_stdOut, stderr=fastq_stdErr)
-    print "fastqc succesfully exectued"
-    return sample_config
-
-
-def _run_abyss(global_config, sample_config, sorted_libraries_by_insert):
-    print "I am running abyss to check kmer content"
-    mainDir = os.getcwd()
-    ABySS_Kmer_Folder = os.path.join(os.getcwd(), "abyss_kmer")
-    if not os.path.exists(ABySS_Kmer_Folder):
-        os.makedirs(ABySS_Kmer_Folder)
-    else:
-        print "done (ABySS_Kmer_Folder folder already present, assumed already run)"
-        os.chdir(ABySS_Kmer_Folder)
-        _plotKmerPlot(1 , 50)
-        os.chdir("..")
-        return
-   
-    os.chdir(ABySS_Kmer_Folder)
-    ABySS_Kmer_stdOut = open("ABySS_Kmer_Folder.stdOut", "a")
-    ABySS_Kmer_stdErr = open("ABySS_Kmer_Folder.stdErr", "a")
-
-    program = global_config["Tools"]["abyss"]["bin"]
-    program = os.path.join(os.path.dirname(program), "ABYSS-P")
-    program_options=global_config["Tools"]["abyss"]["options"]
-    if "abyss" in sample_config:
-        program_options=sample_config["abyss"]
-    
-    threads = 8 # default for UPPMAX
-    if "threads" in sample_config :
-        threads = sample_config["threads"]
-
-    command = "mpirun -np {} {} ".format(threads, program)
-    kmer = sample_config["kmer"]
-    command += "-k {} ".format(kmer)
-    command += "--coverage-hist=histogram.hist -o preUnitgs.fa"
-    for library, libraryInfo in sorted_libraries_by_insert:
-        read1=libraryInfo["pair1"]
-        read2=libraryInfo["pair2"]
-        orientation = libraryInfo["orientation"]
-        if orientation=="innie":
-            command += " {} ".format(read1)
-            if read2 is not None:
-                command += " {} ".format(read2)
-        if orientation == "none":
-            command += " {} ".format(read1)
-    print command
-    subprocess.call(command, shell=True, stdout=ABySS_Kmer_stdOut, stderr=ABySS_Kmer_stdErr)
-    subprocess.call(("rm", "preUnitgs.fa"))
-    _plotKmerPlot(15,200)
-
-    os.chdir("..")
-    return
-
-def _plotKmerPlot(min_limit, max_limit):
-    Kmer_histogram = pd.io.parsers.read_csv("histogram.hist", sep='\t', header=None)
-    Kmer_coverage  = Kmer_histogram[Kmer_histogram.columns[0]].tolist()
-    Kmer_count     = Kmer_histogram[Kmer_histogram.columns[1]].tolist()
-    Kmer_freq      = [Kmer_coverage[i]*Kmer_count[i] for i in range(len(Kmer_coverage))]
-    kmer_freq_peak = Kmer_freq.index(max(Kmer_freq[min_limit:max_limit]))	#coverage peak, disregarding initial peak
-    kmer_freq_peak_value=max(Kmer_freq[min_limit:max_limit])
-    
-    xmax = max_limit
-    ymax = kmer_freq_peak_value + (kmer_freq_peak_value*0.30)
-    
-    plt.plot(Kmer_coverage, Kmer_freq)
-    plt.title('K-mer length = %s' % 54)
-    plt.xlim((0,xmax))
-    plt.ylim((0,ymax))
-    plt.vlines(kmer_freq_peak, 0, kmer_freq_peak_value, colors='r', linestyles='--')
-    plt.text(kmer_freq_peak, kmer_freq_peak_value+2000, str(kmer_freq_peak))
-    plotname = "kmer_coverage.png"
-    plt.savefig(plotname)
-    plt.clf()
-    return 0
-
-
-def _run_jellyfish(global_config, sample_config, sorted_libraries_by_insert):
-    print "Jellyfish still to be implemented"
-
-
-
-
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..f54dca0
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,14 @@
+#!/usr/bin/env python
+
+from setuptools import setup
+
+setup(name='de_novo_scilife',
+      version='0.5',
+      description= 'A same automated analysis pipeline for de novo assembly analysis',
+      author='Francesco Vezzi',
+      author_email='francesco.vezzi@scilifelab.se',
+      url='https://github.com/vezzi/de_novo_scilife',
+      scripts=['de_novo_scilife/deNovo_pipeline.py'],
+      packages=['de_novo_scilife'],
+      namespace_packages=["de_novo_scilife"],
+)
diff --git a/utils/OpGen_util.py b/utils/OpGen_util.py
new file mode 100644
index 0000000..2bb0c15
--- /dev/null
+++ b/utils/OpGen_util.py
@@ -0,0 +1,395 @@
+import sys, os, yaml, glob
+import subprocess
+import argparse
+from collections import Counter
+import pandas as pd
+import csv
+
+
+def main(args):
+    workingDir     = os.getcwd()
+    assembly       = args.assembly
+    minCtgLength   = args.min_contig
+    
+    assembly        = _build_new_reference(assembly, minCtgLength)
+    if args.only_reference > 0:
+        return
+    ## now generate stats
+    (contigsLengthDict, contigsSequencesDict) = _compute_assembly_stats(assembly, args.genome_size)
+    if args.only_stats > 0:
+        return
+
+    #problems = find_problems_in_maps(args.opgen_report, contigsLengthDict, contigsSequencesDict)
+
+    produce_consensus(args.opgen_report, contigsLengthDict, contigsSequencesDict, args.output)
+    
+    return
+
+
+
+def produce_consensus(opgen_report, contigsLengthDict, contigsSequencesDict, output):
+    print opgen_report
+    # starting to parse opgen report, since the csv file is unstructured I need to parse it
+    placement        = "{}.placement".format(opgen_report)
+    stats            = "{}.stats".format(opgen_report)
+    gaps_overlaps    = "{}.gaps".format(opgen_report)
+    unplaced_contigs = "{}.unplaced".format(opgen_report)
+    with open(opgen_report, 'rb') as unstructured_file:
+        placement_file          = open(placement, 'w')
+        stats_file              = open(stats, 'w')
+        gaps_overlaps_file      = open(gaps_overlaps, 'w')
+        unplaced_contigs_file   = open(unplaced_contigs, 'w')
+        current_writing_file    = placement_file
+        for row in unstructured_file:
+            if "N50" in row:
+                current_writing_file = stats_file
+            if "Gaps" in row:
+                current_writing_file = gaps_overlaps_file
+            if "Unplaced" in row:
+                current_writing_file = unplaced_contigs_file
+            current_writing_file.write(row)
+        current_writing_file.close()
+        placement_file.close()
+        stats_file.close()
+        gaps_overlaps_file.close()
+
+   # I need a list with one element for each base in my map. THe problem is that the OpGen report does not tell the length of the maps, therefore I need to find the maximum limit between the placments part and the gapped part
+    Maps            = {}
+    Maps_length     = {}
+    ContigsToMaps   = {}
+    MapsToContigs   = {}
+    with open(placement, 'rb') as csvfile:
+        opgenCSV = csv.reader(csvfile, delimiter='\t')
+        #Chromosome, Start, End, Contig, Start, End, Orientation
+        header = opgenCSV.next()
+        for row in opgenCSV:
+            if len(row) > 0:
+                #print ', '.join(row)
+                (Map, Map_Start, Map_End, Contig, Ctg_Start, Ctg_End, Orientation) = (row[0], int(row[1]), int(row[2]), row[3], int(row[4]), int(row[5]), row[6])
+                Contig = Contig.split(" ")[0]
+                if not ContigsToMaps.has_key(Contig):
+                    ContigsToMaps[Contig] = [[Map, Map_Start, Map_End, Ctg_Start, Ctg_End, Orientation]]
+                else:
+                    ContigsToMaps[Contig].append([Map, Map_Start, Map_End, Ctg_Start, Ctg_End, Orientation])
+                if not Maps_length.has_key(Map):
+                    Maps_length[Map]   = Map_End
+                    MapsToContigs[Map] = [[Map_Start, Map_End, Contig, Ctg_Start, Ctg_End, Orientation]]
+                else:
+                    if Map_End > Maps_length[Map]:
+                        Maps_length[Map] = Map_End
+                        MapsToContigs[Map].append([Map_Start, Map_End, Contig, Ctg_Start, Ctg_End, Orientation])
+                    else:
+                        print "{} {}".format(Map_End, Maps_length[Map])
+
+    with open(gaps_overlaps, 'rb') as csvfile:
+        opgenCSV = csv.reader(csvfile, delimiter='\t')
+        #Map     Type    Map_Start   Map_End     Length
+        header = opgenCSV.next()
+        header = opgenCSV.next()
+        for row in opgenCSV:
+            if len(row) > 0:
+                (Map, Type, Map_Start, Map_End, Length) = (row[0], row[1], int(row[2]), int(row[3]), int(row[4]))
+                if not Maps_length.has_key(Map):
+                    Maps_length[Map] = Map_End
+                else:
+                    if Map_End > Maps_length[Map]:
+                        Maps_length[Map] = Map_End
+
+    # now Maps_length contains the lenght of the Maps... thank you so much OpGen!!!!!
+    multipleHittedPositions = 0
+    rescuedBases            = 0
+    conflictingBases        = 0
+    for Map in Maps_length:
+        Maps[Map] = ["n"] * Maps_length[Map]
+        print "now working with Map {}".format(Map)
+        if MapsToContigs[Map] is not None: # if this map has some contigs aligning
+            for hit in MapsToContigs[Map]:
+                start_on_Map = hit[0]
+                end_on_Map   = hit[1]
+                Ctg          = hit[2]
+                start_on_Ctg = hit[3]
+                end_on_Ctg   = hit[4]
+                orientation  = hit[5]
+                #extract the seuqence
+                sequence     = contigsSequencesDict[Ctg][start_on_Ctg:end_on_Ctg]
+                if orientation == '-1':
+                    sequence = revcom(sequence)
+                letters = list(sequence)
+                index = start_on_Map
+                
+                for base in letters:
+                    if len(Maps[Map]) <= index:
+                        Maps[Map].append("n");
+                    if Maps[Map][index] is not "n":
+                        multipleHittedPositions += 1
+                        if Maps[Map][index] is "N" and base is not "N":
+                            Maps[Map][index] = base
+                            rescuedBases += 1
+                        elif Maps[Map][index] is not "N" and base is not "N":
+                            conflictingBases += 1
+                    else:
+                        Maps[Map][index] = base
+                    index += 1
+                #print hit
+    print "position in th emap covered more than once {}".format(multipleHittedPositions)
+    print "rescued bases {}".format(rescuedBases)
+    print "conflicting bases {}".format(conflictingBases)
+    print_contigs(Maps, output)
+
+
+def revcom(s):
+    return complement(s[::-1])
+
+def complement(s):
+    letters      = list(s)
+    complemented = []
+    for base in letters:
+        if base is "A" or base is "a":
+            complemented.append("T")
+        elif base is 'C' or base is 'c':
+            complemented.append("G")
+        elif base is 'G' or base is 'g':
+            complemented.append("C")
+        elif base is 'T' or base is 't':
+            complemented.append("A")
+        elif base is 'N' or base is 'n':
+            complemented.append("N")
+        else:
+            complemented.append("N")
+
+    return ''.join(complemented)
+
+
+def print_contigs(Maps, output):
+    with open("{}.fasta".format(output), "w") as final_assembly:
+        for Map, sequence in Maps.iteritems():
+            final_assembly.write(">{}\n".format(Map))
+            final_assembly.write("{}\n".format("".join(sequence)))
+
+
+
+def find_problems_in_maps(opgen_report, contigsLengthDict, contigsSequencesDict):
+    ###TODO: this function is still to be defined.
+    print opgen_report
+    # startint to parse opgen report, since the csv file is unstructured I need to parse it
+    placement        = "{}.placement".format(opgen_report)
+    stats            = "{}.stats".format(opgen_report)
+    gaps_overlaps    = "{}.gaps".format(opgen_report)
+    unplaced_contigs = "{}.unplaced".format(opgen_report)
+    with open(opgen_report, 'rb') as unstructured_file:
+        placement_file          = open(placement, 'w')
+        stats_file              = open(stats, 'w')
+        gaps_overlaps_file      = open(gaps_overlaps, 'w')
+        unplaced_contigs_file   = open(unplaced_contigs, 'w')
+        current_writing_file    = placement_file
+        for row in unstructured_file:
+            if "N50" in row:
+                current_writing_file = stats_file
+            if "Gaps" in row:
+                current_writing_file = gaps_overlaps_file
+            if "Unplaced" in row:
+                current_writing_file = unplaced_contigs_file
+            current_writing_file.write(row)
+        current_writing_file.close()
+        placement_file.close()
+        stats_file.close()
+        gaps_overlaps_file.close()
+
+    # I need a list with one element for each base in my map. THe problem is that the OpGen report does not tell the length of the maps, therefore I need to find the maximum limit between the placments part and the gapped part
+    Maps            = {}
+    Maps_length     = {}
+    ContigsToMaps   = {}
+    with open(placement, 'rb') as csvfile:
+        opgenCSV = csv.reader(csvfile, delimiter='\t')
+        #Chromosome, Start, End, Contig, Start, End, Orientation
+        header = opgenCSV.next()
+        for row in opgenCSV:
+            if len(row) > 0:
+                #print ', '.join(row)
+                (Map, Map_Start, Map_End, Contig, Ctg_Start, Ctg_End, Orientation) = (row[0], int(row[1]), int(row[2]), row[3], int(row[4]), int(row[5]), row[6])
+                Contig = Contig.split(" ")[0]
+                if not ContigsToMaps.has_key(Contig):
+                    ContigsToMaps[Contig] = [[Map, Map_Start, Map_End, Ctg_Start, Ctg_End, Orientation]]
+                else:
+                    ContigsToMaps[Contig].append([Map, Map_Start, Map_End, Ctg_Start, Ctg_End, Orientation])
+                if not Maps_length.has_key(Map):
+                    Maps_length[Map] = Map_End
+                else:
+                    if Map_End > Maps_length[Map]:
+                        Maps_length[Map] = Map_End
+                    else:
+                        print Map_End + " " + Maps_length[Map]
+
+    with open(gaps_overlaps, 'rb') as csvfile:
+        opgenCSV = csv.reader(csvfile, delimiter='\t')
+        #Map     Type    Map_Start   Map_End     Length
+        header = opgenCSV.next()
+        header = opgenCSV.next()
+        for row in opgenCSV:
+            if len(row) > 0:
+                (Map, Type, Map_Start, Map_End, Length) = (row[0], row[1], int(row[2]), int(row[3]), int(row[4]))
+                if not Maps_length.has_key(Map):
+                    Maps_length[Map] = Map_End
+                else:
+                    if Map_End > Maps_length[Map]:
+                        Maps_length[Map] = Map_End
+    # now Maps_length contains the lenght of the Maps... thank you so much OpGen!!!!!
+
+    # print contigs that align in more than one map
+    ##### now decide if these contigs are suspicious (need to be broken or they are ok)
+    ##### if a contig maps in two different places in the same way, then this is a real repeat. If there are two clear alignments then I must suggest to breack the contig
+    contigsInMultipleMaps = 0
+    contigsWithErrors     = 0
+    for Contig in ContigsToMaps:
+        length = len(ContigsToMaps[Contig])
+        if length > 1:
+            contigsInMultipleMaps += 1
+            contigsCoordinates     = []
+            print Contig + " --> "
+            for entry in ContigsToMaps[Contig]:
+                contigsCoordinates.append([entry[3],entry[4], (entry[3] - entry[3])/contigsLengthDict[Contig] ]) # save only start, end, and length of alignment
+                print entry
+            ### check if this contig is an erroneus contig or a real repeat
+    print "number of contigs aligning in multiple maps is {}".format(contigsInMultipleMaps)
+
+
+
+
+
+
+    for Map in Maps_length:
+        Maps[Map] = [0] * Maps_length[Map]
+
+
+
+
+
+
+def _compute_assembly_stats(assembly, genomeSize):
+    stats_file_name = ".".join([assembly, "statistics", "txt"])
+    contigsLengthDict   = {}
+    contigsSequencesDict= {}
+    contigsLength       = []
+    totalLength         = 0
+    numContigs          = 0
+    maxContigLength     = 0
+    N50                 = 0
+    N80                 = 0
+    numNs               = 0
+    
+    with open(assembly, "r") as ref_fd:
+        fasta_header     = ref_fd.readline()
+        header           = (fasta_header.split(">")[1]).rstrip()
+        sequence         = ""
+        for line in ref_fd:
+            if line.startswith(">"):
+                contigsLength.append(len(sequence))
+                contigsLengthDict[header]    = len(sequence)
+                contigsSequencesDict[header] = sequence
+                totalLength              += len(sequence)
+                numContigs               += 1
+                counter                   = Counter(sequence)
+                numNs                    += (counter['n'] + counter['N'])
+                sequence                  = ""
+                header                    = (line.split(">")[1]).rstrip()
+            else:
+                sequence+=line.rstrip()
+        contigsLength.append(len(sequence))
+        contigsLengthDict[header] = len(sequence)
+        contigsSequencesDict[header] = sequence
+        totalLength              += len(sequence)
+        numContigs               += 1
+        counter                   = Counter(sequence)
+        numNs                    += (counter['n'] + counter['N'])
+
+    if os.path.exists(stats_file_name):
+        print "assembly stast file {} already created".format(stats_file_name)
+        return (contigsLengthDict, contigsSequencesDict)
+
+
+    percentageNs = float(numNs)/totalLength
+    contigsLength.sort()
+    contigsLength.reverse()
+
+    
+    teoN50 = genomeSize * 0.5
+    teoN80 = genomeSize * 0.8
+    testSum = 0
+    N50 = 0
+    N80 = 0
+    maxContigLength   = contigsLength[0]
+    for con in contigsLength:
+        testSum += con
+        if teoN50 < testSum:
+            if N50 == 0:
+                N50 = con
+        if teoN80 < testSum:
+            N80 = con
+            break
+
+    with open(stats_file_name, "w") as stats_file_name_fd:
+        stats_file_name_fd.write("#scaffolds : {}\n".format(numContigs))
+        stats_file_name_fd.write("totalLength : {}\n".format(totalLength))
+        stats_file_name_fd.write("maxContigLength : {}\n".format(maxContigLength))
+        stats_file_name_fd.write("N50 : {}\n".format(N50))
+        stats_file_name_fd.write("N80 : {}\n".format(N80))
+        stats_file_name_fd.write("percentageNs : {}\n".format(percentageNs))
+
+    return (contigsLengthDict, contigsSequencesDict)
+
+
+
+def _build_new_reference(assembly, minCtgLength):
+    new_assembly_name    = os.path.basename(assembly)
+    (basename, type, extention) = new_assembly_name.split(os.extsep)
+    new_assembly_name    = basename + ".{}.{}.{}".format(type, minCtgLength, extention)
+    new_assembly_name =  os.path.abspath(os.path.basename(new_assembly_name))
+
+    if os.path.exists(new_assembly_name):
+        print "assembly {} already created".format(new_assembly_name)
+        return new_assembly_name
+    
+    contigID = 1
+    with open(new_assembly_name, "w") as new_ref_fd:
+        with open(assembly, "r") as ref_fd:
+            fasta_header    = ref_fd.readline()
+            sequence        = ""
+            for line in ref_fd:
+                line = line
+                if line.startswith(">"):
+                    if len(sequence) >= minCtgLength:
+                        new_ref_fd.write(">contig{}\n".format(contigID))
+                        new_ref_fd.write(sequence)
+                    sequence        = ""
+                    contigID        += 1
+                else:
+                    sequence+=line
+            if len(sequence) >= minCtgLength:
+                new_ref_fd.write(">contig{}".format(contigID))
+                new_ref_fd.write(sequence)
+    return new_assembly_name
+
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--assembly', help="assembly sequence", type=str)
+    parser.add_argument('--genome-size', help="expected genome size", default=0, type=int)
+    parser.add_argument('--min-contig', help="minimum contig length", default=2000, type=int)
+    parser.add_argument('--only-reference', help="generates only the new refernce (i.e., only scaffolds longer than min-contig)", default=0, type=int)
+    parser.add_argument('--only-stats', help="generates only assembly stats", default=0, type=int)
+
+    parser.add_argument('--opgen-report', help="op gen placment report", type=str)
+    parser.add_argument('--find-problems-in-maps', help="generates only the new refernce (i.e., only scaffolds longer than min-contig)", default=0, type=int)
+    parser.add_argument('--produce-consensus', help="generates only the new refernce (i.e., only scaffolds longer than min-contig)", default=0, type=int)
+
+    parser.add_argument('--output', help="aoutput header name", default="opgen_scaffolded_assembly", type=str)
+    args = parser.parse_args()
+    if args.genome_size == 0:
+        print "genome size must be specified"
+        sys.exit("error")
+
+    main(args)
+
+
+
diff --git a/utils/run_assembly_evaluation.py b/utils/produce_assembly_report.py
similarity index 73%
rename from utils/run_assembly_evaluation.py
rename to utils/produce_assembly_report.py
index 5a96255..9f5d6f5 100644
--- a/utils/run_assembly_evaluation.py
+++ b/utils/produce_assembly_report.py
@@ -11,54 +11,18 @@
 def main(args):
     workingDir = os.getcwd()
     assemblers = sum(args.assemblers, [])
-    if not os.path.exists(args.sample_config):
-        sys.exit("Error: file {} does not exists".format(args.sample_config))
-    
-    with open(os.path.abspath(args.sample_config)) as sample_config_handle:
-        sample_config = yaml.load(sample_config_handle)
-    global_config = os.path.abspath(args.global_config)
+    if not os.path.exists(args.validation_dir):
+        sys.exit("Error: directory {} does not exists".format(args.validation_dir))
     if not os.path.exists(args.assemblies_dir):
-        sys.exit("Error: directory {} does not exists".format(assemblies_dir))
-    assemblies_dir = os.path.abspath(args.assemblies_dir)
-
-    processed = 0
-
-    for assembler in assemblers:
-        assemblerValidationDir = os.path.join(workingDir, assembler)
-        if not os.path.exists(assemblerValidationDir):
-            os.makedirs(assemblerValidationDir)
-        os.chdir(assemblerValidationDir)
-        #I am in the assembler specific validation directory
-        if "output" not in sample_config:
-            sys.exit("Error: sample cofig must contain output field")
-        outputName          = sample_config["output"]
-        assemblerReference = os.path.join(assemblies_dir, assembler, "{}.scf.fasta".format(outputName))
-        print assemblerReference
-        if not os.path.exists(assemblerReference):
-            print "for assembler {} no assembly is available, skypping this validation".format(assembler)
-            os.chdir("..")
-            continue
-        processed += 1
-        #otherwise I have everyhitng I need
-        sample_config["reference"] = assemblerReference
-
-        stream = file('{}_{}.yaml'.format(outputName, assembler), 'w')
-        yaml.dump(sample_config, stream)    # Write a YAML representation of data to 'document.yaml'.
-        stream.close()
-
-        command = "python  ~/DE_NOVO_PIPELINE/de_novo_scilife/script/deNovo_pipeline.py --global-config {} --sample-config {}_{}.yaml".format(global_config, outputName, assembler)
-        subprocess.call(command, shell=True)
-        os.chdir("..")
-    if processed == 0:
-        for assembler in assemblers:
-            command = ["rm", "-r", assembler]
-            subprocess.call(command)
-        sys.exit("Error: no assembly available, probably wrong path specification")
-
-
+        sys.exit("Error: directory {} does not exists".format(args.assemblies_dir))
+   
+    validation_dir    = os.path.abspath(args.validation_dir) # save valiadation directory
+    assemblies_dir    = os.path.abspath(args.assemblies_dir) # save assemblies directory
+    outputName        = args.output
+    minContigLength   = args.minContigLength
+    genomeSize        = args.genomeSize
+    processed         = 0 # count how many assembler I am going to process
 
-    ##Now everything is done
-    validationDirectory = os.getcwd()
     if not os.path.exists("LaTeX"):
         os.makedirs("LaTeX")
     os.chdir("LaTeX")
@@ -67,29 +31,29 @@ def main(args):
     assemblyStats = []
     for assembler in assemblers:
         #compute assembly statistics
-        if os.path.exists(os.path.join(assemblies_dir, assembler, "{}.scf.fasta".format(outputName))):
-            assemblySeq = os.path.join(assemblies_dir, assembler, "{}.scf.fasta".format(outputName))
-            assemblyStats.append(computeAssemblyStats(assembler, assemblySeq, sample_config["minCtgLength"], sample_config["genomeSize"]))
+        assemblySeq = os.path.join(assemblies_dir, assembler, "{}.scf.fasta".format(outputName))
+        if os.path.exists(assemblySeq):
+            assemblyStats.append(computeAssemblyStats(assembler, assemblySeq, minContigLength, genomeSize))
    
     latex_document = _insert_stat_table(latex_document, assemblyStats)
     #now copy QAstast
-    if not os.path.exists("pictures"):
-        os.makedirs("pictures")
+    if not os.path.exists("QA_pictures"):
+        os.makedirs("QA_pictures")
     for assembler in assemblers:
-        if not os.path.exists(os.path.join("pictures", assembler)):
-            os.makedirs(os.path.join("pictures", assembler))
+        if not os.path.exists(os.path.join("QA_pictures", assembler)):
+            os.makedirs(os.path.join("QA_pictures", assembler))
     #directory structure created
     for assembler in assemblers:
-        if os.path.exists(os.path.join(assemblies_dir, assembler, "{}.scf.fasta".format(outputName))):
+        if os.path.exists(os.path.join(validation_dir, assembler, "reference", "{}.scf.fasta".format(outputName))):
             latex_document = _new_section(latex_document, assembler)
-            Original_CoverageDistribution200 = os.path.join(validationDirectory, assembler, "QAstats", "Coverage_distribution_noOutliers.png")
-            Original_GC_vs_Coverage          = os.path.join(validationDirectory, assembler, "QAstats", "GC_vs_Coverage_noOutliers.png")
-            Original_GC_vs_CtgLength         = os.path.join(validationDirectory, assembler, "QAstats", "GC_vs_CtgLength.png")
-            Original_MedianCov_vs_CtgLength  = os.path.join(validationDirectory, assembler, "QAstats", "MedianCov_vs_CtgLength_noOutliers.png")
-            Copied_CoverageDistribution200   = os.path.join("pictures", assembler, "Coverage_distribution_noOutliers.png")
-            Copied_GC_vs_Coverage            = os.path.join("pictures", assembler, "GC_vs_Coverage_noOutliers.png")
-            Copied_GC_vs_CtgLength           = os.path.join("pictures", assembler, "GC_vs_CtgLength.png")
-            Copied_MedianCov_vs_CtgLength    = os.path.join("pictures", assembler, "MedianCov_vs_CtgLength_noOutliers.png")
+            Original_CoverageDistribution200 = os.path.join(validation_dir, assembler, "QAstats", "Coverage_distribution_noOutliers.png")
+            Original_GC_vs_Coverage          = os.path.join(validation_dir, assembler, "QAstats", "GC_vs_Coverage_noOutliers.png")
+            Original_GC_vs_CtgLength         = os.path.join(validation_dir, assembler, "QAstats", "GC_vs_CtgLength.png")
+            Original_MedianCov_vs_CtgLength  = os.path.join(validation_dir, assembler, "QAstats", "MedianCov_vs_CtgLength_noOutliers.png")
+            Copied_CoverageDistribution200   = os.path.join("QA_pictures", assembler, "Coverage_distribution_noOutliers.png")
+            Copied_GC_vs_Coverage            = os.path.join("QA_pictures", assembler, "GC_vs_Coverage_noOutliers.png")
+            Copied_GC_vs_CtgLength           = os.path.join("QA_pictures", assembler, "GC_vs_CtgLength.png")
+            Copied_MedianCov_vs_CtgLength    = os.path.join("QA_pictures", assembler, "MedianCov_vs_CtgLength_noOutliers.png")
             sh.copy(Original_CoverageDistribution200, Copied_CoverageDistribution200)
             sh.copy(Original_GC_vs_Coverage , Copied_GC_vs_Coverage )
             sh.copy(Original_GC_vs_CtgLength , Copied_GC_vs_CtgLength )
@@ -102,52 +66,59 @@ def main(args):
             latex_document = _insert_QA_figure(latex_document,  pictures, "QA pictures")
     # now FRCurve
     latex_document = _new_section(latex_document, "FRCurve")
-    FRCurves = []
+
+    if not os.path.exists("FRCurves"):
+        os.makedirs("FRCurves")
+    os.chdir("FRCurves")
+    names = ["_FRC" , "COMPR_MP_FRC" , "COMPR_PE_FRC" , "HIGH_COV_PE_FRC" , "HIGH_NORM_COV_PE_FRC" ,"HIGH_OUTIE_MP_FRC" , "HIGH_OUTIE_PE_FRC" , "HIGH_SINGLE_MP_FRC" , "HIGH_SINGLE_PE_FRC" , "HIGH_SPAN_MP_FRC" , "HIGH_SPAN_PE_FRC" ,"LOW_COV_PE_FRC" , "LOW_NORM_COV_PE_FRC" , "STRECH_MP_FRC" , "STRECH_PE_FRC"]
+    for name in names:
+        FRCurves = []
+        for assembler in assemblers:
+            FRCurve_Orig_name = os.path.join(validation_dir, assembler, "FRCurve", "{}{}.txt".format(outputName, name))
+            FRCurves.append([assembler, FRCurve_Orig_name])
+        FRCname = _plotFRCurve("{}_{}".format(outputName, name),FRCurves)
+        FRCurves = []
+    #plot last FRCurve
     for assembler in assemblers:
-        if os.path.exists(os.path.join(assemblies_dir, assembler, "{}.scf.fasta".format(outputName))):
-            FRCurves.append([assembler, os.path.join(validationDirectory, assembler, "FRCurve", "{}_FRC.txt".format(outputName))])
+        FRCurves.append([assembler, os.path.join(validation_dir, assembler, "FRCurve", "{}_FRC.txt".format(outputName))])
     FRCname = _plotFRCurve(outputName,FRCurves)
-    latex_document = _new_figure(latex_document, FRCname, "Feature Response Curve compute on all Features")
+    print FRCname
+    os.chdir("..")
+    latex_document = _new_figure(latex_document, "FRCurves/{}".format(FRCname), "Feature Response Curve compute on all Features")
     latex_document = _latexFooter(latex_document)
     latex_document = _latexFooter(latex_document)
 
-    if args.generatePDF == 1:
-        with open("{0}.tex".format(outputName),'w') as f:
-            f.write(latex_document)
-        command = ["pdflatex",  "{0}.tex".format(outputName)]
-        latex_stdOut = open("latex.stdOut", "a")
-        latex_stdErr = open("latex.stdErr", "a")
-        subprocess.call(command, stdout=latex_stdOut , stderr=latex_stdErr)
+    with open("{0}.tex".format(outputName),'w') as f:
+        f.write(latex_document)
 
     os.chdir("..")
     # now prepare delivery folder
     if not os.path.exists("{}_delivery_report".format(outputName)):
         os.makedirs("{}_delivery_report".format(outputName))
     os.chdir("{}_delivery_report".format(outputName))
-    #copy pdf report
-    if args.generatePDF == 1:
-        sh.copy(os.path.join(validationDirectory, "LaTeX", "{}.pdf".format(outputName)), "{}.pdf".format(outputName))
+
     #now copy QA tables
     for assembler in assemblers:
-        if os.path.exists(os.path.join(assemblies_dir, assembler, "{}.scf.fasta".format(outputName) )):
+        if os.path.exists(os.path.join(assemblies_dir, assembler,  "{}.scf.fasta".format(outputName) )):
             if not os.path.exists(assembler):
                 os.makedirs(assembler)
             if not os.path.exists(os.path.join(assembler, "assembly")):
                 os.makedirs(os.path.join(assembler, "assembly"))
-            if os.path.exists(os.path.join(assemblies_dir, assembler, "{}.scf.fasta".format(outputName))):
-                sh.copy(os.path.join(assemblies_dir, assembler, "{}.scf.fasta".format(outputName)), os.path.join(assembler, "assembly",  "{}.scf.fasta".format(outputName)))
-            if os.path.exists(os.path.join(assemblies_dir, assembler, "{}.ctg.fasta".format(outputName))):
-                sh.copy(os.path.join(assemblies_dir, assembler, "{}.ctg.fasta".format(outputName)), os.path.join(assembler, "assembly",  "{}.ctg.fasta".format(outputName)))
+            
+            sh.copy(os.path.join(assemblies_dir, assembler, "{}.scf.fasta".format(outputName)), os.path.join(assembler, "assembly",  "{}.scf.fasta".format(outputName)))
+            sh.copy(os.path.join(assemblies_dir, assembler, "{}.ctg.fasta".format(outputName)), os.path.join(assembler, "assembly",  "{}.ctg.fasta".format(outputName)))
             if not os.path.exists(os.path.join(assembler, "QA_table")):
                 os.makedirs(os.path.join(assembler, "QA_table"))
-            sh.copy(os.path.join(validationDirectory, assembler, "QAstats", "Contigs_Cov_SeqLen_GC.csv"), os.path.join(assembler, "QA_table", "Contigs_Cov_SeqLen_GC.csv"))
+            sh.copy(os.path.join(validation_dir, assembler, "QAstats", "Contigs_Cov_SeqLen_GC.csv"), os.path.join(assembler, "QA_table", "Contigs_Cov_SeqLen_GC.csv"))
             if not os.path.exists(os.path.join(assembler, "FRCurves")):
                 os.makedirs(os.path.join(assembler, "FRCurves"))
             names = ["_FRC" , "COMPR_MP_FRC" , "COMPR_PE_FRC" , "HIGH_COV_PE_FRC" , "HIGH_NORM_COV_PE_FRC" ,"HIGH_OUTIE_MP_FRC" , "HIGH_OUTIE_PE_FRC" , "HIGH_SINGLE_MP_FRC" , "HIGH_SINGLE_PE_FRC" , "HIGH_SPAN_MP_FRC" , "HIGH_SPAN_PE_FRC" ,"LOW_COV_PE_FRC" , "LOW_NORM_COV_PE_FRC" , "STRECH_MP_FRC" , "STRECH_PE_FRC"]
             for name in names:
-                FRCurve_Orig_name = os.path.join(validationDirectory, assembler, "FRCurve", "{}{}.txt".format(outputName, name))
+                FRCurve_Orig_name = os.path.join(validation_dir, assembler, "FRCurve", "{}{}.txt".format(outputName, name))
                 FRCurve_Dest_name = os.path.join(assembler, "FRCurves", "{}{}.txt".format(outputName, name))
                 sh.copy(FRCurve_Orig_name,FRCurve_Dest_name)
+    # now copy FRCurve pictures
+    sh.copytree("../LaTeX/FRCurves", "FRCurves")
     os.chdir("..")
 
     return
@@ -372,11 +343,18 @@ def _latexHeader(sampleName, assemblers):
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
-    parser.add_argument('--assemblies-dir', help="Directory where assemblies are stored (one per folder, e.g., assembler/NAME.scf.fasta)", type=str)
-    parser.add_argument('--assemblers', action='append', nargs='+', help="List of assemblers to be evalueted")
-    parser.add_argument('--sample-config', help="Sample config. reference field will be over-written", type=str)
-    parser.add_argument('--global-config', help='foo help')
-    parser.add_argument('--generatePDF', default=0, type=int)
+    parser.add_argument('--validation-dir', type=str, required=True, help="Directory where validation is stored (one assembler per folder)")
+    parser.add_argument('--assemblies-dir', type=str, required=True, help="Directory where assemblies are stored (one assembler per folder)")
+    parser.add_argument('--assemblers',     type=str, required=True, help="List of assemblers to be evalueted", action='append', nargs='+')
+    parser.add_argument('--sample-config',  type=str, help="sample config, it is needed to extract several informations")
+    parser.add_argument('--global-config',  type=str, help="global configuration file")
+    parser.add_argument('--output',         type=str, required=True, help="output header used to store all results (i.e., output.scf.fasta)")
+    parser.add_argument('--genomeSize',     type=int, required=True, help="expected genome size (same as specified in the validation phase)")
+    parser.add_argument('--minContigLength',type=int, default=2000, help="minimum contig length (usually the same used to validate assembly). Default value set to 2000")
     args = parser.parse_args()
     
-    main(args)
\ No newline at end of file
+    main(args)
+
+
+
+#python ~/DE_NOVO_PIPELINE/de_novo_scilife/utils/run_assembly_evaluation.py --assembues-dir /proj/b2013064/nobackup/vezzi/C.Wheat/03_ASSEMBLY/ --assemblers allpaths abyss soapdenovo
\ No newline at end of file
diff --git a/utils/reverse_complement.py b/utils/reverse_complement.py
new file mode 100644
index 0000000..ebd95cb
--- /dev/null
+++ b/utils/reverse_complement.py
@@ -0,0 +1,51 @@
+import sys, os, yaml, glob
+import subprocess
+import argparse
+import string
+import gzip
+from subprocess import Popen, PIPE
+
+
+
+
+def main(args):
+    workingDir  = os.getcwd()
+
+    proc = Popen(["zcat", "{}".format(args.fastq)], stdout=PIPE, bufsize=1)
+    with gzip.open('{}.gz'.format(args.output), 'wb') as output_f:
+        for header in iter(proc.stdout.readline, b''):
+            header   = header.rstrip()
+            sequence = proc.stdout.readline().rstrip()
+            comment  = proc.stdout.readline().rstrip()
+            quality  = proc.stdout.readline().rstrip()
+        
+            sequence_rc = rc(sequence)
+            quality_rev = reverse(quality)
+
+            output_f.write("{}\n".format(header))
+            output_f.write("{}\n".format(sequence_rc))
+            output_f.write("{}\n".format(comment))
+            output_f.write("{}\n".format(quality_rev))
+
+    proc.communicate()
+
+    return
+
+def reverse(quality):
+    seq_rev = quality[::-1]
+    return seq_rev
+
+
+def rc(dna):
+    complements = string.maketrans('acgtrymkbdhvnACGTRYMKBDHVN', 'tgcayrkmvhdbnTGCAYRKMVHDBN')
+    rcseq = dna.translate(complements)[::-1]
+    return rcseq
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Reverse complent the fastq file given as input')
+    parser.add_argument('--fastq', help="fastq file to reverse complement", type=str)
+    parser.add_argument('--output', help="Header name for the output file", type=str)
+    args = parser.parse_args()
+
+    main(args)
\ No newline at end of file
diff --git a/utils/run_MP_analysis.py b/utils/run_MP_analysis.py
new file mode 100644
index 0000000..dcd1302
--- /dev/null
+++ b/utils/run_MP_analysis.py
@@ -0,0 +1,117 @@
+import sys, os, yaml, glob
+import subprocess
+import argparse
+import re
+
+
+def main(args):
+    projectFolder    = os.getcwd()
+    samples_data_dir = args.sample_data_dir
+    projectName      = os.path.basename(os.path.normpath(samples_data_dir))
+    for sample_dir_name in [dir for dir in os.listdir(samples_data_dir) if os.path.isdir(os.path.join(samples_data_dir, dir))]:
+        sample_folder = os.path.join(os.getcwd(), sample_dir_name)
+        if not os.path.exists(sample_folder):
+            os.makedirs(sample_folder)
+        os.chdir(sample_folder)
+        ## now I am in the folder, i can run at the same time QC and MP anlaysis
+
+        pipeline = "MP"
+        tools    = ["trimmomatic", "fastqc", "abyss", "align"]
+        
+        
+        sample_YAML_name = os.path.join(sample_folder,  "{}_{}.yaml".format(sample_dir_name, pipeline))
+        sample_YAML      = open(sample_YAML_name, 'w')
+       
+        sample_YAML.write("pipeline:\n")
+        sample_YAML.write(" {}\n".format(pipeline))
+        sample_YAML.write("tools:\n")
+        sample_YAML.write(" {}\n".format(tools))
+        sample_YAML.write("output: {}\n".format(sample_dir_name)) ##TODO: output must became sampleName
+        sample_YAML.write("projectName: {}\n".format(projectName))
+        sample_YAML.write("kmer: 35\n")
+        sample_YAML.write("threads: 16\n")
+        sample_YAML.write("genomeSize: \n")
+        sample_YAML.write("adapters: {}\n".format(args.adapter))
+            
+        if args.reference is not "":
+            sample_YAML.write("reference: {}\n".format(args.reference))
+        sample_YAML.write("libraries:\n")
+        
+        sample_data_dir = os.path.join(samples_data_dir,sample_dir_name)
+        flowcells_dirs  = [os.path.join(sample_data_dir,flowcell)  for flowcell in os.listdir(sample_data_dir) if os.path.isdir(os.path.join(sample_data_dir,flowcell))] # full path to flowcell
+
+        sample_files = []
+        for flowcell in flowcells_dirs:
+            sample_files.extend([os.path.join(flowcell, f) for f in os.listdir(flowcell) if (os.path.isfile(os.path.join(flowcell,f)) and re.search('.gz$',f))])
+        # now sample_files contains all the file sequenced for this sample
+
+        pair1_file = ""
+        pair2_file = ""
+        single     = ""
+        library    = 1
+        for file in sample_files:
+            sample_YAML.write(" lib{}:\n".format(library))
+            if "_1.fastq.gz" in file:
+                pair1_file = file
+                pair2_file = re.sub("_1.fastq.gz", "_2.fastq.gz", file)
+            elif "_2.fastq.gz" in file:
+                pair2_file = file
+                pair1_file = re.sub("_2.fastq.gz", "_1.fastq.gz", file)
+
+            sample_YAML.write("  pair1: {}\n".format(pair1_file))
+            sample_YAML.write("  pair2: {}\n".format(pair2_file))
+            sample_YAML.write("  orientation: {}\n".format(args.orientation))
+            sample_YAML.write("  insert: {}\n".format(args.insert))
+            sample_YAML.write("  std: {}\n".format(args.std))
+            sample_files.remove(pair1_file)
+            sample_files.remove(pair2_file)
+            library += 1
+
+        sample_YAML.close
+        submit_job(sample_YAML_name, args.global_config, sample_dir_name , pipeline, args.env) # now I can submit the job to slurm
+        os.chdir(projectFolder)
+
+def submit_job(sample_config, global_config, output,  pipeline, env):
+    
+    workingDir = os.getcwd()
+    slurm_file = os.path.join(workingDir, "{}_{}.slurm".format(output,pipeline))
+    slurm_handle = open(slurm_file, "w")
+
+    slurm_handle.write("#! /bin/bash -l\n")
+    slurm_handle.write("set -e\n")
+    slurm_handle.write("#SBATCH -A b2013064\n")
+    slurm_handle.write("#SBATCH -o {}_{}.out\n".format(output,pipeline))
+    slurm_handle.write("#SBATCH -e {}_{}.err\n".format(output,pipeline))
+    slurm_handle.write("#SBATCH -J {}_{}.job\n".format(output,pipeline))
+    slurm_handle.write("#SBATCH -p node -n 16\n")
+    slurm_handle.write("#SBATCH -t 1-00:00:00\n")
+    slurm_handle.write("#SBATCH --mail-user francesco.vezzi@scilifelab.se\n")
+    slurm_handle.write("#SBATCH --mail-type=ALL\n")
+    
+    slurm_handle.write("\n\n")
+    slurm_handle.write("source activate {}\n".format(env))
+    slurm_handle.write("module load abyss/1.3.5\n")
+    slurm_handle.write("load_modules\n")
+    slurm_handle.write("deNovo_pipeline.py --global-config {} --sample-config {}\n\n".format(global_config,sample_config))
+    slurm_handle.close()
+    
+    command=("sbatch", slurm_file)
+    print command
+    #subprocess.call(command)
+    
+
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--reference'      , type=str, default="", help="path to the reference file")
+    parser.add_argument('--adapter'        , type=str, required=True, help="path to the file containing the adaptor sequence to be removed")
+    parser.add_argument('--global-config'  , type=str, required=True, help="global configuration file")
+    parser.add_argument('--sample-data-dir', type=str, required=True, help="Path to directory (usually INBOX) containing the project (one dir per sample, scilife structure project/sample/flowcell/)")
+    parser.add_argument('--orientation'    , type=str, required=True, help="orientation of the libraries")
+    parser.add_argument('--insert'         , type=str, required=True, help="expected insert size of the libraries")
+    parser.add_argument('--std'            , type=str, required=True, help="expected stdandard variation of the insert size of the libraries")
+    parser.add_argument('--env'            , type=str, default="DeNovoPipeline", help="name of the virtual enviorment (default is DeNovoPipeline)")
+    args = parser.parse_args()
+
+    main(args)
\ No newline at end of file