# efg, 18 Jan 2006 # trims blanks and tabs from the beginnings and ends of strings, John Fox, R-Help trim <- function(s) { s <- gsub('^[[:space:]]+', '', s) gsub('[[:space:]]+$', '', s) } #setwd("U:/camda/2006/snp_data/SNP Data") library(RODBC) # available under Linux? connection <- odbcConnectExcel("CAMDA_SNP description.xls") worksheets <- sqlTables(connection)$TABLE_NAME worksheet.set <- gsub("\\$", "", worksheets) combined <- NULL for (i in 1:length(worksheet.set)) { item <- worksheet.set[i] cat(i, item, "\n") worksheet <- sqlFetch(connection, item) # get rid of lines with NA for [Celera SNP] worksheet <- worksheet[!is.na(worksheet[,1]),] # Add new column one "haplotype" worksheet <- cbind( rep(item, nrow(worksheet)), worksheet) names(worksheet)[1] <- "haplotype" # Leave original files "untouched" and make fixes here: # Fix inconsistency in extra trailing blanks names(worksheet) <- trim( names(worksheet) ) # Fix inconsistent naming of last two columns if (names(worksheet)[9] == "MF Caucasian") { names(worksheet)[9] <- "MF (Caucasian)" } if (names(worksheet)[8] == "Chrom loc") { names(worksheet)[8] <- "Chromosome" } # Force case consistency names(worksheet)[7] <- toupper( names(worksheet)[7] ) print(names(worksheet)) combined <- rbind(combined, worksheet) } close(connection) write.csv(combined, file="SNP-Description.csv", row.names=F)