use strict;
use warnings;
# initial source in sequence order
my $source = 'horse:cow:dog:cat';
# hash used to store results (weed out dups)
my %hash;
function(\%hash, $source);
# and now the results
foreach (keys %hash) {
print "$_\n";
}
# generate array of combinations
# (hash keys are the results)
sub function {
my $hash_ref = shift;
my $source = shift;
# skip it if we've been here
return if $hash_ref->{$source};
$hash_ref->{$source}++;
# remove one of each part from the whole
if ((my @parts = split(/:/, $source)) > 1) {
for (my $i = 0; $i < @parts; $i++) {
my @parts_copy = split(/:/, $source);
splice(@parts_copy, $i, 1);
function($hash_ref, join(':',@parts_copy));
}
}
return;
}
An alternative:
use Algorithm::Loops qw( NestedLoops );
my $source = 'horse:cow:dog:cat';
{
my @parts = split(/:/, $source);
my $iter = NestedLoops(
[
[ 0..$#parts ],
( sub { [ $_+1..$#parts ] } ) x $#parts,
],
{ OnlyWhen => 1 },
);
my @s;
print(join(':', map $parts[$_], @s), "\n")
while @s = $iter->();
}
The above outputs
horse horse:cow horse:cow:dog horse:cow:dog:cat horse:cow:cat horse:dog horse:dog:cat horse:cat cow cow:dog cow:dog:cat cow:cat dog dog:cat cat
0 = 0b0000 --> {nothing}
3 = 0b0011 --> 'horse:cow'
13 = 0b1101 --> 'horse:dog:cat'
The algorithm then simply becomes a loop over 1 .. 2**@kw -1, testing the bits for each number.
Here's my first implementation of it. It's probably not as efficient as possible yet.
sub rhesa {
# initial source in sequence order
my $source = 'horse:cow:dog:cat';
my @kw = split /:/, $source;
my @res;
for my $i( 1 .. 2**@kw - 1 ) {
my @ar; my $t;
while( $i > 0 ) {
push @ar, $kw[$t] if $i & 1;
$i >>= 1; $t++;
}
push @res, join ':', @ar;
}
return @res;
}
I'm a bit irritated with the number of temporary variables, but I can't think of anything prettier just now. Hope it helps :)
BTW, a simple [mod://Benchmark] comparison showed a 200% speed increase over your version.
Here's my somewhat golfed version:
sub rhesa2 {
my $source = shift;
my @kw = split /:/, $source;
map {
my (@ar, $t);
do {
($_ & 1) and push @ar, $kw[$t];
$t++;
} while ($_ >>= 1);
join ':', @ar
} ( 1 .. 2**@kw - 1 )
}
my @res = rhesa2('horse:cow:dog:cat');
use strict;
use warnings;
use Algorithm::Loops qw( NestedLoops );
use Benchmark qw( cmpthese );
sub _ruzam {
my ($source, $hash) = @_;
return if $hash->{$source};
my @parts = split(/:/, $source);
return if @parts < 1;
++$hash->{$source};
for my $i (0..$#parts) {
my @parts_copy = @parts;
splice(@parts_copy, $i, 1);
_ruzam(join(':', @parts_copy), $hash);
}
}
sub ruzam {
my ($source) = @_;
my %hash;
_ruzam($source, \%hash);
return keys %hash;
}
sub ikegami1 {
my ($source) = @_;
my @parts = split(/:/, $source);
return NestedLoops(
[
[ 0..$#parts ],
( sub { [ $_+1..$#parts ] } ) x $#parts,
],
{ OnlyWhen => 1 },
sub { join(':', map $parts[$_], @_) },
);
}
sub ikegami2 {
my ($source) = @_;
my @parts = split(/:/, $source);
my @rv;
for my $comb (1..2**@parts-1) {
push @rv, join ':',
map $parts[$_],
grep $comb & (1<<$_),
0..$#parts;
}
return @rv;
}
sub rhesa {
my ($source) = @_;
my @parts = split /:/, $source;
my @rv;
for my $i( 1 .. 2**@parts - 1 ) {
my @ar;
my $t = 0;
while( $i > 0 ) {
push @ar, $parts[$t] if $i & 1;
$i >>= 1; $t++;
}
push @rv, join ':', @ar;
}
return @rv;
}
{
local our $source = 'horse:cow:dog:cat';
my $expected = 'cat|cow|cow:cat|cow:dog|cow:dog:cat|dog|dog:cat|horse|horse:cat|horse:cow|horse:cow:cat|horse:cow:dog|horse:cow:dog:cat|horse:dog|horse:dog:cat';
foreach (qw( ruzam ikegami1 ikegami2 rhesa )) {
printf("%-9s ", "$_:");
my $rv = join '|',
sort
do { no strict 'refs'; \&{$_} }->($source);
if ($rv eq $expected) {
print("ok");
} else {
print("bad ($rv)");
}
print("\n");
}
print("\n");
cmpthese(-3, {
# ruzam => q{ use strict; use warnings; my @rv = ruzam our $source; 1 },
# ikegami1 => q{ use strict; use warnings; my @rv = ikegami1 our $source; 1 },
# ikegami2 => q{ use strict; use warnings; my @rv = ikegami2 our $source; 1 },
# rhesa => q{ use strict; use warnings; my @rv = rhesa our $source; 1 },
ruzam => q{ my @rv = ruzam $source; 1 },
ikegami1 => q{ my @rv = ikegami1 $source; 1 },
ikegami2 => q{ my @rv = ikegami2 $source; 1 },
rhesa => q{ my @rv = rhesa $source; 1 },
});
}
__END__
ruzam: ok
ikegami1: ok
ikegami2: ok
rhesa: ok
Rate ikegami1 ruzam ikegami2 rhesa
ikegami1 2438/s -- -17% -66% -68%
ruzam 2943/s 21% -- -58% -61%
ikegami2 7082/s 191% 141% -- -6%
rhesa 7560/s 210% 157% 7% --
The regexp on the other hand... the approach certainly is neat, i'll give you that ;) ++ for speed and ingenuity, but ouch does my head spin!
sub ikegami3x {
local $_ = ":$_[0]:";
my $parts = tr/:/:/ - 2; # take 2 here instead of -1 later
my $re = '(?{ "" })'
. '(:[^:]*)(?=:)(?{ $^R . $^N })'
. '(?:.*(:[^:]*)(?=:)(?{ $^R . $^N })' x $parts
. ')?' x $parts
. '(?{ push @rv, substr($^R, 1) })'
. '(?!)';
{ use re 'eval'; $re = qr/$re/; }
local our @rv;
/$re/;
return @rv;
}
sub rhesa2 {
my @kw = split /:/, $_[0];
map {
my @ar;
my $t = 0; # initialize $t
do {
($_ & 1) and push @ar, $kw[$t];
$t++;
} while ($_ >>= 1);
join ':', @ar;
} ( 1 .. 2**@kw - 1 );
}
source: horse:cow:dog:cat
Rate ruzam ikegami3 ikegami2 ikegami3x rhesa2 rhesa
ruzam 4620/s -- -61% -61% -61% -66% -66%
ikegami3 11764/s 155% -- -0% -1% -12% -14%
ikegami2 11819/s 156% 0% -- -1% -12% -13%
ikegami3x 11935/s 158% 1% 1% -- -11% -13%
rhesa2 13444/s 191% 14% 14% 13% -- -2%
rhesa 13657/s 196% 16% 16% 14% 2% --
source: horse
Rate ikegami3 ikegami3x ruzam ikegami2 rhesa rhesa2
ikegami3 40841/s -- -1% -37% -62% -64% -72%
ikegami3x 41226/s 1% -- -37% -62% -64% -72%
ruzam 65317/s 60% 58% -- -39% -43% -55%
ikegami2 107178/s 162% 160% 64% -- -6% -26%
rhesa 114470/s 180% 178% 75% 7% -- -21%
rhesa2 145232/s 256% 252% 122% 36% 27% --
source: horse:cat
Rate ruzam ikegami3 ikegami3x ikegami2 rhesa rhesa2
ruzam 26853/s -- -2% -7% -48% -53% -58%
ikegami3 27324/s 2% -- -5% -47% -52% -58%
ikegami3x 28732/s 7% 5% -- -45% -50% -55%
ikegami2 51965/s 94% 90% 81% -- -9% -19%
rhesa 57233/s 113% 109% 99% 10% -- -11%
rhesa2 64472/s 140% 136% 124% 24% 13% --
source: horse:cow:cat
Rate ruzam ikegami3 ikegami3x ikegami2 rhesa rhesa2
ruzam 10772/s -- -41% -42% -58% -61% -64%
ikegami3 18305/s 70% -- -1% -28% -33% -38%
ikegami3x 18436/s 71% 1% -- -27% -33% -38%
ikegami2 25353/s 135% 39% 38% -- -7% -15%
rhesa 27363/s 154% 49% 48% 8% -- -8%
rhesa2 29753/s 176% 63% 61% 17% 9% --
source: horse:cow:dog:cat:mouse
Rate ruzam rhesa ikegami2 rhesa2 ikegami3x ikegami3
ruzam 1632/s -- -67% -68% -73% -75% -75%
rhesa 5021/s 208% -- -3% -17% -24% -24%
ikegami2 5159/s 216% 3% -- -14% -22% -22%
rhesa2 6023/s 269% 20% 17% -- -9% -9%
ikegami3x 6614/s 305% 32% 28% 10% -- -0%
ikegami3 6634/s 307% 32% 29% 10% 0% --
The name of the variable doesn't match the value it contains in
my $parts = tr/:/:/ - 2; # take 2 here instead of -1 later
and it doesn't give you anything. That was a step in the wrong direction.
Cheers - L~R
My basic idea is to map the array indices to bits in a binary number. If a bit is on, you take that element out of the source array.
This is precisely the same approach that I used in Data::PowerSet, for indeed, what the OP is looking for is the power set of the list.
• another intruder with the mooring in the heart of the Perl
Here's my stab at it. I took out the hash because my method doesn't generate duplicates. Instead I just return the list of results:
#!/usr/bin/perl -w
use strict;
use warnings;
# initial source in sequence order
my $source = 'horse:cow:dog:cat';
function($source);
# and now the results
foreach (function($source)) {
print "$_\n";
}
# generate array of combinations
sub function {
my @t = split /:/, shift;
my @res=(shift @t);
for my $i (@t) {
@res=($i, @res, map{$_.':'.$i} @res);
}
return @res;
}
--roboticus
source: horse:cow:dog:cat
Rate ruzam ikegami3 rhesa rhesa2 roboticus
ruzam 4655/s -- -62% -66% -66% -80%
ikegami3 12238/s 163% -- -10% -11% -47%
rhesa 13612/s 192% 11% -- -1% -41%
rhesa2 13742/s 195% 12% 1% -- -40%
roboticus 22986/s 394% 88% 69% 67% --
source: horse
Rate ikegami3 ruzam rhesa rhesa2 roboticus
ikegami3 41518/s -- -36% -64% -72% -84%
ruzam 64752/s 56% -- -43% -56% -75%
rhesa 113875/s 174% 76% -- -22% -56%
rhesa2 146152/s 252% 126% 28% -- -43%
roboticus 257311/s 520% 297% 126% 76% --
source: horse:cat
Rate ruzam ikegami3 rhesa rhesa2 roboticus
ruzam 27050/s -- -3% -53% -59% -72%
ikegami3 27836/s 3% -- -52% -57% -71%
rhesa 57415/s 112% 106% -- -12% -40%
rhesa2 65371/s 142% 135% 14% -- -32%
roboticus 96089/s 255% 245% 67% 47% --
source: horse:cow:cat
Rate ruzam ikegami3 rhesa rhesa2 roboticus
ruzam 10920/s -- -43% -61% -64% -76%
ikegami3 19183/s 76% -- -32% -37% -59%
rhesa 28259/s 159% 47% -- -7% -39%
rhesa2 30244/s 177% 58% 7% -- -35%
roboticus 46412/s 325% 142% 64% 53% --
source: horse:cow:dog:cat:mouse
Rate ruzam rhesa2 rhesa ikegami3 roboticus
ruzam 1855/s -- -67% -68% -72% -82%
rhesa2 5676/s 206% -- -2% -14% -45%
rhesa 5781/s 212% 2% -- -13% -44%
ikegami3 6614/s 257% 17% 14% -- -36%
roboticus 10353/s 458% 82% 79% 57% --
source: horse:cow:dog:cat:rat:mouse
Rate ruzam rhesa2 rhesa ikegami3 roboticus
ruzam 799/s -- -70% -70% -77% -85%
rhesa2 2659/s 233% -- -0% -24% -50%
rhesa 2667/s 234% 0% -- -24% -50%
ikegami3 3521/s 340% 32% 32% -- -34%
roboticus 5371/s 572% 102% 101% 53% --
Nice! If you want a huge speed boost, replace
@rv=($i, @rv, map{$_.':'.$i} @rv);
with
push @rv, $i, map{$_.':'.$i} @rv;
Thanks for the tip on the speed boost. Once I get the benchmarking stuff installed I'll play with it. While I do like your suggestion, I prefer the order that my method generates--all one-word combinations first, then the two-word combinations, etc.)
Re: shoddy code. Yeah, I guess so, consider me properly admonished. ++ for calling me on it and keeping me honest. When I thunk up the technique to use, I just erased the original function body and started whacking away at it. I didn't think to clarify things by using better variable names. (Of course, I just came off of a little [id://554392|golfing trip] so my head was in "trim keystrokes" mode.</lame_excuse_mode>) Now, I guess the proper thing to do is to clean it up a little and insert your suggestion, so here goes:
<pedagogical_mode>
sub function {
my @parts = split /:/, shift;
# Null is the complete list of combinations for
# an empty word list
my @combinations=();
# Sequentially (recursively with tail recursion
# removed) rebuild the combination list adding one
# new word each iteration
for my $new_word (@parts) {
# Given a complete set of combinations for a
# given list of words, we can add a new word to
# the list and generate all new valid combinations
# by concatenating to the original list:
push @combinations,
# the new word (a single word is a valid
# combination)
$new_word,
# and the original list with the new word
# glommed onto the end of each member
map {$_.':'.$new_word} @combinations
;
}
return @combinations;
}
</pedagogical_mode>--roboticus
source: horse:cow:dog:cat
Rate ruzam roboticus roboticus2
ruzam 4695/s -- -80% -86%
roboticus 23049/s 391% -- -33%
roboticus2 34607/s 637% 50% --
source: horse
Rate ruzam roboticus2 roboticus
ruzam 64954/s -- -71% -76%
roboticus2 223392/s 244% -- -16%
roboticus 266354/s 310% 19% --
source: horse:cat
Rate ruzam roboticus roboticus2
ruzam 26625/s -- -72% -76%
roboticus 96711/s 263% -- -13%
roboticus2 111706/s 320% 16% --
source: horse:cow:cat
Rate ruzam roboticus roboticus2
ruzam 11023/s -- -77% -82%
roboticus 46987/s 326% -- -24%
roboticus2 62225/s 465% 32% --
source: horse:cow:dog:cat:mouse
Rate ruzam roboticus roboticus2
ruzam 1913/s -- -82% -89%
roboticus 10826/s 466% -- -39%
roboticus2 17652/s 823% 63% --
source: horse:cow:dog:cat:rat:mouse
Rate ruzam roboticus roboticus2
ruzam 798/s -- -85% -91%
roboticus 5253/s 559% -- -43%
roboticus2 9221/s 1056% 76% --
I've found a good few ways to make it slower, though! 8^)
--roboticus
sub qm {
my ($glob) = @_;
my @rv;
$glob =~ s/(\w+)/{$1,}/g;
for my $combo (glob($glob))
{
$combo =~ s/^:+//;
next unless length($combo);
push @rv, join ':', split /:+/, $combo;
}
return @rv;
}
and is horribly slow as well.
BTW, it's interesting to note the change in benchmark results when the input is a long list of null strings:
$source = ':'x20;
-QM
--
Quantum Mechanics: The dreams stuff is made of
perlmonks.org content © perlmonks.org and grinder, ikegami, Limbic~Region, liverpole, QM, rhesa, roboticus, ruzam
prlmnks.org © 2006 edmund von der burg (eccles & toad)
v 0.03