# Match array probes with gene IDs from biomaRt # efg, Stowers Institute, 3 Feb 2006 # UNIX: change "BasePath" based on Windows/UNIX platform switch(.Platform$OS.type, windows = BasePath <- "U:", unix = BasePath <- "/n/projects", stop("unsupported OS platform") ) filename <- paste(BasePath, "/efg/camda/2006/arrayprobes.csv", sep="") array.probes <- read.csv(filename, as.is=TRUE) probe.list <- array.probes$probe length(probe.list) length(unique(probe.list)) GeneInfo.List <- NULL library(biomaRt) mart <- martConnect() for (i in 1:length(probe.list)) { probe <- probe.list[i] cat(i, probe, "\n") # show progress flush.console() # Assume embl ID GeneInfo <- getGene(id=probe,type="embl",species="hsapiens",mart=mart) if ( is.na(GeneInfo@table$symbol[1]) ) { # IF embl ID fails, try as refseq GeneInfo <- getGene(id=probe,type="refseq",species="hsapiens",mart=mart) } # Can be more than one gene ID. "unlist( lapply ..." is ugly way to fix for (k in 1:length(GeneInfo@table$symbol) ) { GeneInfo.List <- rbind( GeneInfo.List, c(probe, unlist( lapply(GeneInfo@table, "[", k)[c(1,3,4,5,6,7,2)]) )) } } colnames(GeneInfo.List)[c(1,2)] <- c("probe", "gene") write.csv(GeneInfo.List, row.names=F, file="ArrayProbeGeneInfo.csv") martDisconnect(mart)