|
| 1 | +#!/usr/bin/env Rscript |
| 2 | + |
| 3 | +# Author_and_contribution: Niklas Mueller-Boetticher; created template |
| 4 | +# Author_and_contribution: Florian Heyl (@heylf); created code |
| 5 | + |
| 6 | +# H_E.json and H_E.tiff not public. Request for access is still unanswered. |
| 7 | + |
| 8 | +suppressPackageStartupMessages(library(optparse)) |
| 9 | +suppressPackageStartupMessages(library(SpatialExperiment)) |
| 10 | +suppressPackageStartupMessages(library(SingleCellExperiment)) |
| 11 | +suppressPackageStartupMessages(library(WeberDivechaLCdata)) |
| 12 | +suppressPackageStartupMessages(library(Matrix)) |
| 13 | + |
| 14 | +option_list <- list( |
| 15 | + make_option( |
| 16 | + c("-o", "--out_dir"), |
| 17 | + type = "character", default = NULL, |
| 18 | + help = "Output directory to write files to." |
| 19 | + ) |
| 20 | +) |
| 21 | + |
| 22 | +description <- "Load data (Visium) for Locus_coeruleus from Lukas M. Weber at al. (2022); |
| 23 | +The gene expression landscape of the human locus coeruleus revealed |
| 24 | +by single-nucleus and spatially-resolved transcriptomics" |
| 25 | + |
| 26 | +opt_parser <- OptionParser( |
| 27 | + usage = description, |
| 28 | + option_list = option_list |
| 29 | +) |
| 30 | +opt <- parse_args(opt_parser) |
| 31 | + |
| 32 | +out_dir <- opt$out_dir |
| 33 | + |
| 34 | +args = commandArgs(trailingOnly=TRUE) |
| 35 | + |
| 36 | +# Load data |
| 37 | +spe <- WeberDivechaLCdata_Visium() |
| 38 | + |
| 39 | +coords <- as.data.frame(spe@int_colData$spatialCoords) |
| 40 | +colnames(coords) <- c('x', 'y') |
| 41 | +coords_rownames <- rownames(spe@int_colData$spatialCoords) |
| 42 | + |
| 43 | +counts <- spe@assays@data$counts |
| 44 | + |
| 45 | +counts_func <- function(x){ |
| 46 | + fields <- unlist(strsplit(x, "_")) |
| 47 | + if ( length(fields) == 4 ){ |
| 48 | + return(paste(fields[1:3], collapse = "_")) |
| 49 | + } else { |
| 50 | + return(paste(fields[1:4], collapse = "_")) |
| 51 | + } |
| 52 | +} |
| 53 | +counts_lc <- unlist(lapply(colnames(counts), counts_func)) |
| 54 | + |
| 55 | +LC_samples <- unique(spe@colData$sample_id) |
| 56 | + |
| 57 | +for ( dir in LC_samples ){ |
| 58 | + dir <- paste0(out_dir, "/", dir) |
| 59 | + if ( dir.exists(dir) == FALSE ){ |
| 60 | + dir.create(dir, recursive=TRUE) |
| 61 | + } |
| 62 | +} |
| 63 | + |
| 64 | +print("Create output ...") |
| 65 | + |
| 66 | +patient_list <- c() |
| 67 | +sample_list <- c() |
| 68 | +directory_list <- c() |
| 69 | + |
| 70 | +# Write coordinates.tsv, observations.tsv, features.tsv, counts.mtx and labels.tsv |
| 71 | +for (lc in LC_samples){ |
| 72 | + |
| 73 | + print(lc) |
| 74 | + dir <- paste0(out_dir, "/", lc) |
| 75 | + |
| 76 | + # Write coordinates.tsv |
| 77 | + coords_subset <- coords[which(spe@colData$sample_id == lc),] |
| 78 | + rownames(coords_subset) <- coords_rownames[which(spe@colData$sample_id == lc)] |
| 79 | + write.table(coords_subset, file = paste0(dir, "/coordinates.tsv"), col.names = NA, |
| 80 | + sep = "\t", quote = FALSE, row.names = TRUE) |
| 81 | + |
| 82 | + # Count matrix has rows = genes/features and cols = cells/observations |
| 83 | + counts_subset <- counts[,which(counts_lc == lc)] |
| 84 | + |
| 85 | + # Transpose to have rows = cells/observations |
| 86 | + counts_subset <- t(counts_subset) |
| 87 | + writeMM(counts_subset, file = paste0(dir, "/counts.mtx")) |
| 88 | + |
| 89 | + observations_subset <- spe@colData[which(counts_lc == lc),] |
| 90 | + rownames(observations_subset) <- lapply(rownames(observations_subset), function(x){tail(unlist(strsplit(x,"_")),1)}) |
| 91 | + write.table(observations_subset, file = paste0(dir, "/observations.tsv"), col.names = NA, sep = "\t", quote = FALSE) |
| 92 | + |
| 93 | + labels <- spe@colData$annot_region[which(spe@colData$sample_id == lc)] |
| 94 | + labels[which(labels == TRUE)] <- "LC" |
| 95 | + labels[which(labels == FALSE)] <- "non_LC" |
| 96 | + |
| 97 | + labels_df <- data.frame(label = labels) |
| 98 | + rownames(labels_df) <- rownames(observations_subset) |
| 99 | + write.table(labels_df, file = paste0(dir, "/labels.tsv"), col.names = NA, sep = "\t", quote = FALSE) |
| 100 | + |
| 101 | + # Fill metadata |
| 102 | + patient_list <- c(patient_list, as.character(unique(spe@colData[which(spe@colData$sample_id == lc),]$donor_id))) |
| 103 | + sample_list <- c(sample_list, lc) |
| 104 | + directory_list <- c(directory_list, dir) |
| 105 | + |
| 106 | + # Write features.tsv |
| 107 | + features <- as.data.frame(spe@rowRanges) |
| 108 | + rownames(features) <- spe@rowRanges$gene_id |
| 109 | + write.table(features, file = paste0(dir,"/features.tsv"), col.names = NA, sep = "\t", quote = FALSE) |
| 110 | +} |
| 111 | + |
| 112 | +## Metadata files |
| 113 | +samples_df <- data.frame( |
| 114 | + patient = patient_list, |
| 115 | + sample = sample_list, |
| 116 | + position = rep(NA, length(patient_list)), # Not sure what position means |
| 117 | + replicate = rep(NA, length(patient_list)), # If they have replicated then it is really badly named |
| 118 | + directory = directory_list, |
| 119 | + n_clusters = rep(2, length(patient_list)), |
| 120 | + stringsAsFactors = FALSE |
| 121 | +) |
| 122 | +row.names(samples_df) <- NULL |
| 123 | +write.table(samples_df, file = file.path(out_dir, "samples.tsv"), sep = "\t", col.names = NA, quote = FALSE) |
| 124 | + |
| 125 | +technology = "Visium" |
| 126 | +json <- file(file.path(out_dir, "experiment.json")) |
| 127 | +writeLines(c(paste0('{"technology": "', technology, '"}')), json) |
| 128 | +close(json) |
| 129 | + |
| 130 | +print("...finished") |
0 commit comments