Skip to content

Commit

Permalink
cough cough couhg
Browse files Browse the repository at this point in the history
  • Loading branch information
syueqiao committed Sep 29, 2024
1 parent ab30394 commit daafa5e
Showing 1 changed file with 25 additions and 0 deletions.
25 changes: 25 additions & 0 deletions R/get_protein_encoding.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
library(tidyverse)
library(biomaRt)
###get protein encoding only genes from expression matrices

ensembl <- useMart("ensembl",dataset="hsapiens_gene_ensembl")
filters = listFilters(ensembl)
myResult <- getBM(attributes = c("ensembl_gene_id", "gene_biotype", "hgnc_symbol"), mart=ensembl)
protein_coding <- filter(myResult, gene_biotype == "protein_coding")
protein_coding_list <- protein_coding$hgnc_symbol
#where cl_cpm_fil_t_cor is a matrix with col and row names of the gene symbols
all_hek293t_subset <- cl_cpm_fil_t_cor[rownames(cl_cpm_fil_t_cor)%in%protein_coding_list,colnames(cl_cpm_fil_t_cor)%in%protein_coding_list]

cl_cpm_fil_t_cor_small_hek293t <- all_hek293t_subset[1:100, 1:100]
write.table(all_hek293t_subset, "../output/all_hek293t_subset.csv", sep = ",", quote = F)

#for input k562
k562 <- read.table("../data/perturb_rbp_coexp.csv/perturb_rbp_coexp.csv", sep = ",", header = T)
k562_genes <- colnames(k562)
k562$genes <- k562_genes
rownames(k562) <- k562[,17893]

all_k562_subset <- k562[rownames(k562)%in%protein_coding_list,colnames(k562)%in%protein_coding_list]

all_k562_subset_small <- all_k562_subset[1:10, 1:10]
write.table(all_k562_subset, "../output/all_k562_subset.csv", sep = ",", quote = F)

0 comments on commit daafa5e

Please sign in to comment.