#!/usr/bin/perl use Bio::SeqIO; #================================= #Script to remove sequences with redundant names from fasta files #briandotoakleyatarsdotusdadotgov #k.purdy at warwick.ac dot uk #================================= my $usage = "\nusage: $0 filename\n\n"; my $input_file = $ARGV[0] or die $usage; #---Open output files---- $out_file = Bio::SeqIO->new(-file => ">uniq_seqs.fas",-format => 'fasta'); #----Read in list of all sequences------ my $seq_in = Bio::SeqIO->new( -format => 'fasta',-file => $input_file); #---push into array for next step-------- while ($seq_obj = $seq_in->next_seq()) { push (@all_seqs, $seq_obj); } #----reduce list to unique values if any duplicates exist---- my %seen = (); foreach $seq(@all_seqs) { push(@uniq_seqs, $seq) unless $seen{$seq->id()}++; } #----print out sequences------ foreach $seq(@uniq_seqs) { $out_file->write_seq($seq); } print "\nDone. Sequences written to 'uniq_seqs.fas'\n\n";