# Gene trends clustering
gene_trends <- adata$varm[['gene_trends_SU-21-2784_B15_GGTTACGTCAAGCAGG-1']]
pseudotime_vec <- as.numeric(colnames(gene_trends))
rownames(gene_trends) <- rownames(epi)
geneVar <- rowVars(as.matrix(gene_trends))
hvg <- geneVar[order(geneVar, decreasing = T)][1:2000] %>% names()
epi_markers <- scRef_markers %>%
filter(cluster == 'Tumor_Epithelial' & p_val_adj < 0.05 & avg_log2FC > 2) %>%
pull(gene)
genes_of_interest <- intersect(hvg, epi_markers)
# genes_of_interest <- unique(c(unlist(upreg_genes), unlist(dnreg_genes)))
filtered_gene_trends <- gene_trends[genes_of_interest, ]
scaled_gene_trends <- scale(t(filtered_gene_trends))
scaled_gene_trends_pca <- pca(scaled_gene_trends)
elbow <- findElbowPoint(scaled_gene_trends_pca$variance)
genes_seurat <- CreateSeuratObject(scaled_gene_trends)
genes_seurat[['pca']] <- CreateDimReducObject(
embeddings = as.matrix(scaled_gene_trends_pca$rotated),
loadings = as.matrix(scaled_gene_trends_pca$loadings),
stdev = sqrt(scaled_gene_trends_pca$variance),
assay = 'RNA',
key = 'pca_'
)
genes_seurat <- FindNeighbors(genes_seurat, reduction = 'pca', dims = 1:elbow)
genes_seurat <- RunUMAP(genes_seurat, reduction = 'pca', dims = 1:elbow)
genes_seurat <- FindClusters(genes_seurat, algorithm = 4)
new_idents <- paste0("GeneSet", seq(1:9))
names(new_idents) <- c(2, 4, 7, 6, 5, 3, 8, 1, 9)
genes_seurat <- RenameIdents(genes_seurat, new_idents)
genes_seurat$geneset <- Idents(genes_seurat)
qsave(genes_seurat, "outputs/Epithelial_Analysis/genes_seurat.qs")
genes_seurat <- qread("outputs/Epithelial_Analysis/genes_seurat.qs")
## Plotting
clusters <- genes_seurat@meta.data %>%
dplyr::select(geneset) %>%
rownames_to_column() %>%
`colnames<-`(c("gene", "cluster"))
clusters$cluster <- as.character(clusters$cluster)
trends <- t(scaled_gene_trends) %>%
data.frame() %>%
rownames_to_column('gene') %>%
melt() %>%
`colnames<-`(c("gene", "pseudotime", 'expression')) %>%
mutate(pseudotime = gsub("^X", "", pseudotime)) %>%
mutate(
pseudotime = as.numeric(pseudotime),
expression = as.numeric(expression)
) %>%
merge(clusters, by = 'gene')
genes_seurat$geneset <- factor(genes_seurat$geneset, levels = new_idents)
ann <- data.frame(
pseudotime = as.numeric(rownames(scaled_gene_trends)),
row.names = rownames(scaled_gene_trends)
)
ann_cols = list(
pseudotime = viridis(dim(scaled_gene_trends)[1]),
geneset = setNames(brewer.pal(9, "Set1"), levels(genes_seurat$geneset))
)
row_ann <- genes_seurat@meta.data %>% dplyr::select(geneset)
pheatmap(
t(scaled_gene_trends)[order(genes_seurat$geneset), ],
annotation_names_row = T,
annotation_row = row_ann,
annotation_col = ann,
annotation_colors = ann_cols,
show_colnames = F,
show_rownames = F,
fontsize = 5,
cluster_cols = F,
cluster_rows = F
)
ggplot(trends, aes(x = pseudotime, y = expression, group = gene)) +
geom_line() +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
facet_wrap(~cluster)
library(org.Hs.eg.db)
library(AnnotationDbi)
geneid <- mapIds(
org.Hs.eg.db,
keys = colnames(genes_seurat),
column = "ENTREZID",
keytype = "SYMBOL",
multiVals = "first"
) %>%
data.frame() %>%
`colnames<-`('geneid')
genes_seurat <- AddMetaData(genes_seurat, geneid)
hallmarks_enrich <- genes_seurat@meta.data %>%
dplyr::select(geneset, geneid) %>%
rownames_to_column(var = 'gene') %>%
group_by(geneset) %>%
# reframe(enrichment = enrichWP(geneid, organism = "Homo sapiens")@result)
reframe(enrichment = enricher(gene, TERM2GENE = term2gene)@result) %>%
unnest(cols = c(enrichment)) %>%
mutate(ID = gsub("HALLMARK_", "", ID))
plot_df <- hallmarks_enrich %>%
filter(p.adjust < 0.05) %>%
dplyr::select(geneset, p.adjust, ID, Count)
ggplot(
plot_df,
aes(
x = Count,
y = reorder_within(ID, within = geneset, by = Count),
fill = p.adjust
)
) +
geom_bar(stat = 'identity') +
scale_y_reordered() +
scale_fill_gradient(low = "red", high = "blue") +
facet_wrap(~geneset, ncol = 1, scales = 'free') +
theme_bw() +
theme(
axis.text.y = element_text(size = 7, face = 'bold'),
legend.position = 'left'
) +
ylab(NULL) +
xlab("Gene Count")
cellType_pseudotime <- epi@meta.data %>%
dplyr::select(clusters, palantir_pseudotime) %>%
`colnames<-`(c("cellType", "pseudotime")) %>%
mutate(y_coord = -4)
exp <- GetAssayData(epi, assay = 'Spatial', layer = 'scale.data')['GKN1', ]
exp <- merge(data.frame(exp), cellType_pseudotime, by = 0)
exp_trend <- trends %>% filter(gene == 'GKN1')
ggplot() +
geom_line(data = exp_trend, mapping = aes(x = pseudotime, y = expression))
# theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
# geom_jitter(data = exp, height = 0.5, size = 0.5, mapping = aes(x = pseudotime, y = exp, color = cellType))