#!/usr/bin/perl
use strict;

my %id_seq;
my %seq_ids;

open(IN,"PG_16S_rRNA_unique_name.mafft.trimmed.fasta")||die;
my $firstline=<IN>;chomp $firstline; $firstline=~s/^>//;$firstline=~s/[\r]//;
my $id=$firstline;my $seq;
while(my $line=<IN>){
	if($line=~/^>/){
		$seq=~s/[-\r]//g;
		$id_seq{$id}=$seq;
		push(@{$seq_ids{$seq}},$id);
		chomp $line; $line=~s/^>//;$line=~s/[\r]//;$id=$line;$seq="";
	}else{
		chomp $line;
		$seq.=$line;
	}
}
close IN;
		$seq=~s/[-\r]//g;
		$id_seq{$id}=$seq;
		push(@{$seq_ids{$seq}},$id);


open(NAM,">id_name.txt")||die;
open(SUM,">summary.txt")||die;
open(OUT,">trim_unique_name.fasta")||die;
open(OUT1,">trim_unique_id.fasta")||die;

my $u=0;



foreach my $seq (keys %seq_ids){
	my $ids1=join(";",@{$seq_ids{$seq}});
	my @tmp=split(";",$ids1); my $copy=scalar(@tmp);
	my $ids=join(";",(sort{$a cmp $b} @tmp));
	$u++;
	print OUT ">UT".$u." ".$ids."\n".$seq."\n";
	print OUT1 ">UT".$u."\n".$seq."\n";
	print NAM "UT".$u."\t".$ids."\n";
	print SUM "Uniqueseq.".$u."\t".$copy."\t".$ids."\n";
}
close OUT; 

