--- output: html_document --- # Protocol to Build Interaction Network from CORUM complexes ```{r setup, include=F} # Set knitr global properties knitr::opts_chunk$set(echo=T, message=F, warning=F) ``` ## Load common functions defined in Utils.R ```{r load Utils.R} source("Utils.R") ``` ## Package list needed igraph: for function ecount() biomaRt: for function useMart() ```{r package list} CRAN.packages <- c("igraph") bioconductor.packages <- c("biomaRt") ``` ## Packages installation ```{r package installation} install.packages.if.necessary(CRAN.packages, bioconductor.packages) ``` ## Set data directory, create it if necessary ```{r set data dir} data.dir <- "networks" if (!file.exists(data.dir)) { dir.create(data.dir) } ``` ## Human Complexes from CORUM databases ### Spoke vs. Matrix Model Protein complexes are often identified by affinity-purification experiments in which a single protein (bait) is pulled down (purified) using an antibody together with all the proteins (preys) that belong to the same multi-protein complex. The actual topology of the binding of proteins in the complex is not revealed by such an experiment. In order to incorporate this information into binary networks pairwise interactions can be assigned between the members of the complex by using either a matrix or a spoke model. In the matrix model, all possible pairwise interactions among the members of the complex (bait and preys) are assumed. In the spoke model, interactions are only assigned between the bait and each prey. Download http://mips.helmholtz-muenchen.de/genre/proj/corum/allComplexes.csv as a flat file. Create an interaction network from the uniprot ids, with your prefered parser. 2 models can be used to create a network from data on complexes: spoke and matrix models. The information on the "bait" used to identify the complexes not being available, we create a network following the matrix model. ```{r download and build df} corum.url <- "http://mips.helmholtz-muenchen.de/corum/download/coreComplexes.txt.zip" path.to.file <- download.if.necessary(corum.url, data.dir) internal.file <- tools::file_path_sans_ext(basename(path.to.file)) unzipped.file <- unz(path.to.file, internal.file) text.lines <- readLines(unzipped.file) data <- strsplit(text.lines, split='\t') corum.test <- data.frame(do.call(rbind, tail(data, -1)), stringsAsFactors=F) colnames(corum.test) <- data[[1]] remove(data) ``` ### Select human data and Protein Uniprot identifiers Levels: Bovine Dog Hamster Human Mammalia MINK Mouse null Pig Rabbit Rat (Include Mammalian?) We just keep the column with the Uniprot identifiers for human complexes ```{r select interesting data from df} corum.test.human <- corum.test[corum.test['Organism'] == "Human", "subunits(UniProt IDs)"] ``` ### Build pairwise interactions from the complexes ```{r build pairwise interactions df} vector.a <- character() vector.b <- character() for (i in 1:length(corum.test.human)){ complex.all.genes <- unlist(strsplit(corum.test.human[i], split=";")) # There are some protein ID that are coming with parenthesis. We remove those parenthesis. complex.all.genes <- gsub("[()]", "", complex.all.genes) if (length(complex.all.genes) > 1){ pairwise_interactions <- combn(complex.all.genes, 2) vector.a <- c(vector.a, pairwise_interactions[1, ]) vector.b <- c(vector.b, pairwise_interactions[2, ]) } } network.corum.UniID <- data.frame(Uni.ID1=vector.a, Uni.ID2=vector.b, stringsAsFactors=F) ``` ### Connexion to BiomaRt and find synonyms ```{r mart} mart <- useMart(biomart="ensembl", dataset="hsapiens_gene_ensembl") syn.col1 <- getBM(attributes=c("uniprotswissprot", "hgnc_symbol"), filters="uniprotswissprot", values=network.corum.UniID[, 1], mart=mart) syn.col2 <- getBM(attributes=c("uniprotswissprot", "hgnc_symbol"), filters="uniprotswissprot", values=network.corum.UniID[, 2], mart=mart) ``` ### remove synonyms ```{r remove synnyms} syn2 <- rbind(syn.col1, syn.col2) syn2 <- syn2[!duplicated(syn2), ] ``` ### merging tables and cleaning ```{r merging, cleaning and conversion to matrix} # first column net <- merge(network.corum.UniID, syn2, by.x="Uni.ID1", by.y="uniprotswissprot") # second column net <- merge(net, syn2, by.x="Uni.ID2", by.y="uniprotswissprot") # select the column of interest with the new names net <- net[, c(3, 4)] # remove the lines that have "NA" or blank colnames(net) <- c("Symbol.A", "Symbol.B") net <- clean.network(net) ``` ### Graph conversion, simplification and export to file ```{r build igraph} net <- build.network(as.matrix(net)) vcount(net) ecount(net) write.graph(net, "Complexes.gr", format="ncol", weights=NULL) ```