Skip to content

Commit 2e81727

Browse files
authored
Merge pull request #195 from SpatialHackathon/data_Locus_coeruleus
Adding data for issue #38
2 parents baab711 + 9f43a03 commit 2e81727

File tree

2 files changed

+138
-0
lines changed

2 files changed

+138
-0
lines changed
+130
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
#!/usr/bin/env Rscript
2+
3+
# Author_and_contribution: Niklas Mueller-Boetticher; created template
4+
# Author_and_contribution: Florian Heyl (@heylf); created code
5+
6+
# H_E.json and H_E.tiff not public. Request for access is still unanswered.
7+
8+
suppressPackageStartupMessages(library(optparse))
9+
suppressPackageStartupMessages(library(SpatialExperiment))
10+
suppressPackageStartupMessages(library(SingleCellExperiment))
11+
suppressPackageStartupMessages(library(WeberDivechaLCdata))
12+
suppressPackageStartupMessages(library(Matrix))
13+
14+
option_list <- list(
15+
make_option(
16+
c("-o", "--out_dir"),
17+
type = "character", default = NULL,
18+
help = "Output directory to write files to."
19+
)
20+
)
21+
22+
description <- "Load data (Visium) for Locus_coeruleus from Lukas M. Weber at al. (2022);
23+
The gene expression landscape of the human locus coeruleus revealed
24+
by single-nucleus and spatially-resolved transcriptomics"
25+
26+
opt_parser <- OptionParser(
27+
usage = description,
28+
option_list = option_list
29+
)
30+
opt <- parse_args(opt_parser)
31+
32+
out_dir <- opt$out_dir
33+
34+
args = commandArgs(trailingOnly=TRUE)
35+
36+
# Load data
37+
spe <- WeberDivechaLCdata_Visium()
38+
39+
coords <- as.data.frame(spe@int_colData$spatialCoords)
40+
colnames(coords) <- c('x', 'y')
41+
coords_rownames <- rownames(spe@int_colData$spatialCoords)
42+
43+
counts <- spe@assays@data$counts
44+
45+
counts_func <- function(x){
46+
fields <- unlist(strsplit(x, "_"))
47+
if ( length(fields) == 4 ){
48+
return(paste(fields[1:3], collapse = "_"))
49+
} else {
50+
return(paste(fields[1:4], collapse = "_"))
51+
}
52+
}
53+
counts_lc <- unlist(lapply(colnames(counts), counts_func))
54+
55+
LC_samples <- unique(spe@colData$sample_id)
56+
57+
for ( dir in LC_samples ){
58+
dir <- paste0(out_dir, "/", dir)
59+
if ( dir.exists(dir) == FALSE ){
60+
dir.create(dir, recursive=TRUE)
61+
}
62+
}
63+
64+
print("Create output ...")
65+
66+
patient_list <- c()
67+
sample_list <- c()
68+
directory_list <- c()
69+
70+
# Write coordinates.tsv, observations.tsv, features.tsv, counts.mtx and labels.tsv
71+
for (lc in LC_samples){
72+
73+
print(lc)
74+
dir <- paste0(out_dir, "/", lc)
75+
76+
# Write coordinates.tsv
77+
coords_subset <- coords[which(spe@colData$sample_id == lc),]
78+
rownames(coords_subset) <- coords_rownames[which(spe@colData$sample_id == lc)]
79+
write.table(coords_subset, file = paste0(dir, "/coordinates.tsv"), col.names = NA,
80+
sep = "\t", quote = FALSE, row.names = TRUE)
81+
82+
# Count matrix has rows = genes/features and cols = cells/observations
83+
counts_subset <- counts[,which(counts_lc == lc)]
84+
85+
# Transpose to have rows = cells/observations
86+
counts_subset <- t(counts_subset)
87+
writeMM(counts_subset, file = paste0(dir, "/counts.mtx"))
88+
89+
observations_subset <- spe@colData[which(counts_lc == lc),]
90+
rownames(observations_subset) <- lapply(rownames(observations_subset), function(x){tail(unlist(strsplit(x,"_")),1)})
91+
write.table(observations_subset, file = paste0(dir, "/observations.tsv"), col.names = NA, sep = "\t", quote = FALSE)
92+
93+
labels <- spe@colData$annot_region[which(spe@colData$sample_id == lc)]
94+
labels[which(labels == TRUE)] <- "LC"
95+
labels[which(labels == FALSE)] <- "non_LC"
96+
97+
labels_df <- data.frame(label = labels)
98+
rownames(labels_df) <- rownames(observations_subset)
99+
write.table(labels_df, file = paste0(dir, "/labels.tsv"), col.names = NA, sep = "\t", quote = FALSE)
100+
101+
# Fill metadata
102+
patient_list <- c(patient_list, as.character(unique(spe@colData[which(spe@colData$sample_id == lc),]$donor_id)))
103+
sample_list <- c(sample_list, lc)
104+
directory_list <- c(directory_list, dir)
105+
106+
# Write features.tsv
107+
features <- as.data.frame(spe@rowRanges)
108+
rownames(features) <- spe@rowRanges$gene_id
109+
write.table(features, file = paste0(dir,"/features.tsv"), col.names = NA, sep = "\t", quote = FALSE)
110+
}
111+
112+
## Metadata files
113+
samples_df <- data.frame(
114+
patient = patient_list,
115+
sample = sample_list,
116+
position = rep(NA, length(patient_list)), # Not sure what position means
117+
replicate = rep(NA, length(patient_list)), # If they have replicated then it is really badly named
118+
directory = directory_list,
119+
n_clusters = rep(2, length(patient_list)),
120+
stringsAsFactors = FALSE
121+
)
122+
row.names(samples_df) <- NULL
123+
write.table(samples_df, file = file.path(out_dir, "samples.tsv"), sep = "\t", col.names = NA, quote = FALSE)
124+
125+
technology = "Visium"
126+
json <- file(file.path(out_dir, "experiment.json"))
127+
writeLines(c(paste0('{"technology": "', technology, '"}')), json)
128+
close(json)
129+
130+
print("...finished")
+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
channels:
2+
- conda-forge
3+
- bioconda
4+
dependencies:
5+
- r-base=4.3.1
6+
- bioconductor-weberdivechalcdata=1.4.0
7+
- r-optparse=1.7.3
8+
- r-matrix=1.6.1

0 commit comments

Comments
 (0)