The number of combinations for a 4 choose 2 is 6. But what if I have a set in which 2 of the elements are the same? Of course I could do a unique check for each item but this is too computationally expensive with larger sets (as is calculating all the possible combinations in the first place). Knowing that my sets have many duplicates, I am trying to find an algorithm that can take advantage of this to reduce the amount of work it takes to generate all the combinations I care about.
Normally, [1',1'',2,3]
choose 2 yields [1',1''] [1',2] [1',3] [1'',2] [1'',3] [2,3]
. I know though that [1',2]
== [1'',2]
and [1',3]
== [1'',3]
so the number of combinations I am interested in is only 4.
To be clear, I am trying to generate the actual unique combinations, not just the number of them that exist, and without having to check each generated set against previous sets.
Update Here is a working implementation in javascript using the lodash utility library for convenience:
let _ = require('lodash');
let _items_1 = ['A','A','A','B','B','C'];
let items_1 = {'A':3, 'B':2, 'C':1};
function UniqueCombinations(set, n) {
let combinations = [];
let props = Object.getOwnPropertyNames(set);
for (let p = 0; p < props.length; p++) {
for (let i = Math.min(set[props[p]], n); i > 0 ; i--) {
// if (_.sum(_.values(_.pick(set, props.slice(p+1)))) < n-i) continue;
if (n-i > 0) {
let rest = UniqueCombinations(_.pick(set, props.slice(p+1)), n-i)
for (let c = 0; c < rest.length; c++) {
let combination = {};
if (i > 0) combination[props[p]] = i;
Object.assign(combination, rest[c]);
combinations.push(combination);
}
} else {
let combination = {};
combination[props[p]] = i;
combinations.push(combination);
}
}
}
return combinations;
}
let combinations = UniqueCombinations(items_1, 3);
console.log(combinations);
combinations.forEach(combination => console.log(_.transform(combination, function(result, value, key) {
result.push(...key.repeat(value).split(''));
}, [])));
[ { A: 3 }, { A: 2, B: 1 }, { A: 2, C: 1 }, { A: 1, B: 2 }, { A: 1, B: 1, C: 1 }, { B: 2, C: 1 } ]
[ 'A', 'A', 'A' ] [ 'A', 'A', 'B' ] [ 'A', 'A', 'C' ] [ 'A', 'B', 'B' ] [ 'A', 'B', 'C' ] [ 'B', 'B', 'C' ]
This is by no means the cleanest or most performant implementation but a fine starting place for anyone else looking to do this.