#Not good files in /incoming/mab #bed0ind.tar.gz #bed0int.tar.gz #DMX.X.bed.tar.gz getdata: pushd ~/public_html/; wget -m ftp://ftp.stowers-institute.org/incoming/mab/; popd # rename the chromosomes for ucsc, and distribute into multiple files, one per choromosome %_x: %. : .filt.int.gff3 # usage make DMF3 perl -pe 's/^(dmel_mito)/#$1/; next if m/^#/; $_ = "chr" . $_; ' $< | ./distlines "m/^(chr\\S+)\\tS+\\t(\\S+)/i ? \"\$1_\$2_${file}.gff3\" : 'ignoreme'" $@ done } #perl -pe 's/^(dmel_mito)/#$1/; next if m/^#/; $_ = "chr" . $_; ' DMF2.filt.int.gff3 | ./distlines 'm/^(chr\S+)/i ? "$1.gff3" : "ignoreme"' perl -pe 's/^(dmel_mito)/#$1/; next if m/^#/; $_ = "chr" . $_; ' DM{F,M}*.filt.int.gff3 | ./distlines 'm/^(chr\S+)/i ? "$1.gff3" : "ignoreme"' # combine all chromosomes together # cat chr*.gff3 > allchr.gff3 # create seperate files, one each for exon, intron, mRNA ./distlines 'm/^\S+\t\S+\t(\S+)/ ? "$1.gff3" : die "bad"' chr*.gff3 # edit gff3 column 9 to be just the Parent attribute, if any (for ucsc browser, which will group on it) - i ran a few of these - get it right next time! perl -pi -e 's/\t(\S*);(Name=\w*$)/\t$2;$1/' intron.gff3 exon.gff3 mRNA.gff3 perl -p -e 's/\t(.*);(Parent=[^;]+)(.*)/\t$2;$1$3' intron.gff3 exon.gff3 mRNA.gff3 perl -p -e 's/\t(\S*)Parent=([^;]+)(\S*)/\t$2' intron.gff3 exon.gff3 perl -pi -e 's/\t\S*\;ID=(.*);.*/\t$1/' mRNA.gff3 ################################################################################ #get just those features in region perl -ane 'next unless $F[3] >= 4411142 && $F[4] <= 4411765; print' chr2L.gff3 > chr2L4411142-441765.gff3 # extract fasta record for these individual reads perl -ne ' if (my $x = m/>EO2B94C02JRNMB/ ... m/^>/) {print unless $x =~/\d+E0/}' DMF2.fna > EO2B94C02JRNMB.fna perl -ne ' if (my $x = m/>EO2B94C02FLGQ7/ ... m/^>/) {print unless $x =~/\d+E0/}' DMF2.fna > EO2B94C02FLGQ7.fna