Skip to content

Commit

Permalink
Merge PR #106 (@ihodes/@smondet, mouse genomes)
Browse files Browse the repository at this point in the history
This closes #103 also.
  • Loading branch information
smondet committed Dec 29, 2015
2 parents 67cb931 + c770284 commit de0cfd1
Show file tree
Hide file tree
Showing 7 changed files with 56 additions and 21 deletions.
2 changes: 2 additions & 0 deletions src/lib/build_machine.ml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ let create
~host ~run_program ~destination_path:(meta_playground // "hg19-reference-genome")
| `B37decoy -> Data_providers.pull_b37decoy
~host ~run_program ~destination_path:(meta_playground // "hs37d5-reference-genome")
| `mm10 -> Data_providers.pull_mm10
~host ~run_program ~destination_path:(meta_playground // "mm10-reference-genome")
)
~host
~toolkit:(
Expand Down
9 changes: 9 additions & 0 deletions src/lib/download_reference_genomes.ml
Original file line number Diff line number Diff line change
Expand Up @@ -216,3 +216,12 @@ let pull_hg18 ~host ~(run_program : Machine.run_function) ~destination_path =
wget_gunzip ~host ~run_program dbsnp_hg18_url
~destination:(destination_path // "dbsnp.vcf") in
Reference_genome.create "hg18" fasta ~dbsnp

let mm10_url =
"ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA_000001635.6_GRCm38.p4/GCA_000001635.6_GRCm38.p4_genomic.fna.gz"

let pull_mm10 ~host ~(run_program : Machine.run_function) ~destination_path =
let fasta =
wget_gunzip ~host ~run_program hg18_url
~destination:(destination_path // "mm10.fasta") in
Reference_genome.create "mm10" fasta
5 changes: 5 additions & 0 deletions src/lib/download_reference_genomes.mli
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,8 @@ val pull_hg19 :
host:Common.KEDSL.Host.t ->
run_program:Run_environment.Machine.run_function ->
destination_path:string -> Reference_genome.t

val pull_mm10 :
host:Common.KEDSL.Host.t ->
run_program:Run_environment.Machine.run_function ->
destination_path:string -> Reference_genome.t
6 changes: 3 additions & 3 deletions src/lib/reference_genome.ml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
open Common

type specification =
[`B37 | `B38 | `hg19 | `hg18 | `B37decoy ]
[`B37 | `B38 | `hg19 | `hg18 | `B37decoy | `mm10 ]

(** A reference genome has a name (for display/matching) and a
cluster-dependent path.
Expand Down Expand Up @@ -36,12 +36,12 @@ let on_host ~host ?cosmic ?dbsnp ?gtf ?cdna name path =

let name t = t.name
let path t = t.location#product#path
let cosmic_path_exn t =
let cosmic_path_exn t =
let msg = sprintf "cosmic_path_exn of %s" t.name in
let cosmic = Option.value_exn ~msg t.cosmic in
cosmic#product#path

let dbsnp_path_exn t =
let dbsnp_path_exn t =
let msg = sprintf "dbsnp_path_exn of %s" t.name in
let trgt = Option.value_exn ~msg t.dbsnp in
trgt#product#path
Expand Down
8 changes: 4 additions & 4 deletions src/lib/reference_genome.mli
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
open Common

type specification =
[`B37 | `B38 | `hg19 | `hg18 | `B37decoy ]
[`B37 | `B38 | `hg19 | `hg18 | `B37decoy | `mm10 ]

type t = private {
name : string;
Expand All @@ -47,8 +47,8 @@ val create :

val on_host :
host:KEDSL.Host.t ->
?cosmic:string ->
?dbsnp:string ->
?cosmic:string ->
?dbsnp:string ->
?gtf:string->
?cdna:string->
string -> string -> t
Expand All @@ -63,7 +63,7 @@ val cosmic_path_exn : t -> string
val dbsnp_path_exn : t -> string
val gtf_path_exn : t -> string
val cdna_path_exn : t -> string

(** {5 Targets} *)

val fasta: t -> KEDSL.file_workflow
Expand Down
44 changes: 33 additions & 11 deletions src/lib/region.ml
Original file line number Diff line number Diff line change
Expand Up @@ -40,32 +40,32 @@ let to_samtools_option r =
| Some s -> sprintf "-r %s" s
| None -> ""

let to_gatk_option r =
let to_gatk_option r =
match to_samtools_specification r with
| Some s -> sprintf "--intervals %s" s
| None -> ""

let parse_samtools s =
match String.split ~on:(`Character ':') s with
| [] -> assert false
| [one] -> `Chromosome one
| [one; two] ->
| [one] -> `Chromosome one
| [one; two] ->
begin match String.split ~on:(`Character '-') two with
| [left; right] ->
| [left; right] ->
begin match Int.of_string left, Int.of_string right with
| Some b, Some e -> `Chromosome_interval (one, b, e)
| _ -> failwithf "Cannot parse %S into 2 loci" two
end
| _ -> failwithf "Not one '-' in %S" two
end
end
| _ -> failwithf "Not one or zero ':' in %S" s


let cmdliner_term () =
let open Cmdliner in
Term.(
pure (function
| None -> `Full
| None -> `Full
| Some s -> parse_samtools s)
$ Arg.(
value & opt (some string) None
Expand Down Expand Up @@ -128,13 +128,35 @@ let all_chromosomes_hg19 = [
`Chromosome "chrX";
`Chromosome "chrY";
`Chromosome "chrM";
]

let all_chromosomes_mm10 = [
`Chromosome "1";
`Chromosome "2";
`Chromosome "3";
`Chromosome "4";
`Chromosome "5";
`Chromosome "6";
`Chromosome "7";
`Chromosome "8";
`Chromosome "9";
`Chromosome "10";
`Chromosome "11";
`Chromosome "12";
`Chromosome "13";
`Chromosome "14";
`Chromosome "15";
`Chromosome "16";
`Chromosome "17";
`Chromosome "18";
`Chromosome "19";
`Chromosome "X";
`Chromosome "Y";
]

let major_contigs ~reference_build =
match reference_build with
let major_contigs ~reference_build =
match reference_build with
| (`B37 | `B37decoy) -> all_chromosomes_b37
| `B38 -> all_chromosomes_b37
| (`hg19 | `hg18) -> all_chromosomes_hg19



| `mm10 -> all_chromosomes_mm10
3 changes: 0 additions & 3 deletions src/lib/run_environment.ml
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,3 @@ module Machine = struct
let work_dir t = t.work_dir

end



0 comments on commit de0cfd1

Please sign in to comment.