Dor*_*gen 18 php arrays sorting performance
TLDR: 如何在php中找到多维数组排列以及如何针对更大的数组进行优化?
这是这个问题的延续: 如何在php中找到多维数组排列
我们有排序数组的脚本,想法是找到数组的唯一排列,找到这个排列的规则是:
- 输入数组包含一组数组.
- 每个内部数组都包含唯一元素.
- 每个内部阵列可以具有不同的长度和不同的值.
- 输出数组必须包含完全相同的值.
- 输出内部数组必须在同一个键上具有唯一值.
- 如果没有解决方案,
ie.: null则允许使用通配符 .- 通配符可以在同一个键上复制.
- 解决方案应该具有尽可能少的通配符.
- 算法应该能够在不到180秒的时间内处理高达30x30的阵列.
到目前为止我有这个解决方案:
function matrix_is_solved(array $matrix) {
foreach (array_keys(current($matrix)) as $offset) {
$column = array_filter($raw = array_column($matrix, $offset));
if (count($column) != count(array_unique($column))) return false;
}
return true;
}
function matrix_generate_vectors(array $matrix) {
$vectors = [];
$columns = count(current($matrix));
$gen = function ($depth=0, $combo='') use (&$gen, &$vectors, $columns) {
if ($depth < $columns)
for ($i = 0; $i < $columns; $i++)
$gen($depth + 1, $i . $combo);
else
$vectors[] = array_map('intval', str_split($combo));
};
$gen();
return $vectors;
}
function matrix_rotate(array $matrix, array $vector) {
foreach ($matrix as $row => &$values) {
array_rotate($values, $vector[$row]);
}
return $matrix;
}
function matrix_brute_solve(array $matrix) {
matrix_make_square($matrix);
foreach (matrix_generate_vectors($matrix) as $vector) {
$attempt = matrix_rotate($matrix, $vector);
if (matrix_is_solved($attempt))
return matrix_display($attempt);
}
echo 'No solution';
}
function array_rotate(array &$array, $offset) {
foreach (array_slice($array, 0, $offset) as $key => $val) {
unset($array[$key]);
$array[$key] = $val;
}
$array = array_values($array);
}
function matrix_display(array $matrix = null) {
echo "[\n";
foreach ($matrix as $row => $inner) {
echo " $row => ['" . implode("', '", $inner) . "']\n";
}
echo "]\n";
}
function matrix_make_square(array &$matrix) {
$pad = count(array_keys($matrix));
foreach ($matrix as &$row)
$row = array_pad($row, $pad, '');
}
$tests = [
[ ['X'], ['X'] ],
[ ['X'], ['X'], ['X'] ],
[ [ 'X', '' ], [ '', 'X' ] ],
[ ['X', 'Y', 'Z'], ['X', 'Y'], ['X']],
[ ['X', 'Y'], ['X', 'Y'], ['X', 'Y'] ]
];
array_map(function ($matrix) {
matrix_display($matrix);
echo "solved by:" . PHP_EOL;
matrix_brute_solve($matrix);
echo PHP_EOL;
}, $tests);
Run Code Online (Sandbox Code Playgroud)
这在小阵列上非常有效,但是麻烦在于迭代阵列运动的所有可能性,而对于像6x6这样的阵列来说,在时间和空间上计算得太多了!O(nn)
解决方案实际上非常简单.检查唯一字符的数量,这是输出数组中的值的数量.下面的代码几乎可以立即执行您想要的操作.
困难的部分是删除通配符.如果您想要100%确定性,那么您只能使用强力执行.下面的解决方案将尝试通过按顺序多次切换位置来删除所有通配符.
这类似于Google 在其OR工具中处理旅行商问题的方式.您需要找到准确性和速度之间的最佳组合.通过在下面的函数中设置更高的循环计数,成功的机会增加.但它会慢一些.
/* HELPERS */
function ShowNice($output) {
//nice output:
echo '<pre>';
foreach($output as $key=>$val) {
echo '<br />' . str_pad($key,2," ",STR_PAD_LEFT) . ' => [';
$first = true;
foreach($val as $char) {
if (!$first) {
echo ', ';
}
echo "'".$char."'";
$first = false;
}
echo ']';
}
echo '</pre>';
}
function TestValid($output, $nullchar) {
$keys = count($output[0]);
for ($i=0;$i<$keys;$i++) {
$found = [];
foreach($output as $key=>$val) {
$char = $val[$i];
if ($char==$nullchar) {
continue;
}
if (array_key_exists($char, $found)) {
return false; //this char was found before
}
$found[$char] = true;
}
}
return true;
}
$input = [
0 => ['X', 'Y', 'Z', 'I', 'J'],
1 => ['X', 'Y', 'Z', 'I'],
2 => ['X', 'Y', 'Z', 'I'],
3 => ['X', 'Y', 'Z', 'I'],
4 => ['X', 'Y', 'Z'],
5 => ['X', 'Y', 'Z']
];
//generate large table
$genLength = 30; //max double alphabet
$innerLength = $genLength;
$input2 = [];
for($i=0;$i<$genLength;$i++) {
$inner = [];
if (rand(0, 1)==1) {
$innerLength--;
}
for($c=0;$c<$innerLength;$c++) {
$ascii = 65 + $c; //upper case
if ($ascii>90) {
$ascii += 6; //lower case
}
$inner[] = chr($ascii);
}
$input2[] = $inner;
}
//generate large table with different keys
$genLength = 10; //max double alphabet
$innerLength = $genLength;
$input3 = [];
for($i=0;$i<$genLength;$i++) {
$inner = [];
if (rand(0, 1)==1) {
//comment o make same length inner arrays, but perhaps more distinct values
//$innerLength--;
}
$nr = 0;
for($c=0;$c<$innerLength;$c++) {
$ascii = 65 + $c + $nr; //upper case
if ($ascii>90) {
$ascii += 6; //lower case
}
//$inner[] = chr($ascii);
$inner[] = $c+$nr+1;
//increase nr?
if (rand(0, 2)==1) {
$nr++;
}
}
$input3[] = $inner;
}
//generate table with numeric values, to show what happens
$genLength = 10; //max double alphabet
$innerLength = $genLength;
$input4 = [];
for($i=0;$i<$genLength;$i++) {
$inner = [];
for($c=0;$c<$innerLength;$c++) {
$inner[] = $c+1;
}
$input4[] = $inner;
}
$input5 = [
0 => ['X', 'Y'],
1 => ['X', 'Y'],
2 => ['X', 'Y'],
];
$input6 = [
0 => ['X', 'Y', 'Z', 'I', 'J'],
1 => ['X', 'Y', 'Z', 'I'],
2 => ['X', 'Y', 'Z', 'I'],
3 => ['X', 'Y', 'Z', 'I'],
4 => ['X', 'Y', 'Z']
];
$input7 = [
['X', 'Y', 'A', 'B'],
['X', 'Y', 'A', 'C']
];
$input8 = [
['X', 'Y', 'A'],
['X', 'Y', 'B'],
['X', 'Y', 'C']
];
$input9 = [
['X', 'Z', 'Y', 'A', 'E', 'D'],
['X', 'Z', 'Y', 'A', 'B'],
['X', 'Z', 'Y', 'A', 'C'],
['X', 'Z', 'Y', 'A', 'D'],
['X', 'Z', 'Y', 'A', 'D'],
['X', 'Z', 'Y', 'A', 'D']
];
/* ACTUAL CODE */
CreateOutput($input, 1);
function CreateOutput($input, $loops=0) {
echo '<h2>Input</h2>';
ShowNice($input);
//find all distinct chars
//find maxlength for any inner array
$distinct = [];
$maxLength = 0;
$minLength = -1;
$rowCount = count($input);
$flipped = [];
$i = 1;
foreach($input as $key=>$val) {
if ($maxLength<count($val)) {
$maxLength = count($val);
}
if ($minLength>count($val) || $minLength==-1) {
$minLength = count($val);
}
foreach($val as $char) {
if (!array_key_exists($char, $distinct)) {
$distinct[$char] = $i;
$i++;
}
}
$flipped[$key] = array_flip($val);
}
//keep track of the count for actual chars
$actualChars = $i-1;
$nullchar = '_';
//add null values to distinct
if ($minLength!=$maxLength && count($distinct)>$maxLength) {
$char = '#'.$i.'#';
$distinct[$nullchar] = $i; //now it only gets add when a key is smaller, not if all are the same size
$i++;
}
//if $distinct count is small then rowcount, we need more distinct
$addForRowcount = (count($distinct)<$rowCount);
while (count($distinct)<$rowCount) {
$char = '#'.$i.'#';
$distinct[$char] = $i;
$i++;
}
//flip the distinct array to make the index the keys
$distinct = array_flip($distinct);
$keys = count($distinct);
//create output
$output = [];
$start = 0;
foreach($input as $key=>$val) {
$inner = [];
for ($i=1;$i<=$keys;$i++) {
$index = $start + $i;
if ($index>$keys) {
$index -= $keys;
}
if ($index>$actualChars) {
//just add the null char
$inner[] = $nullchar;
} else {
$char = $distinct[$index];
//check if the inner contains the char
if (!array_key_exists($char, $flipped[$key])) {
$char = $nullchar;
}
$inner[] = $char;
}
}
$output[] = $inner;
$start++;
}
echo '<h2>First output, unchecked</h2>';
ShowNice($output);
$newOutput = $output;
for ($x=0;$x<=$loops;$x++) {
$newOutput = MoveLeft($newOutput, $nullchar);
$newOutput = MoveLeft($newOutput, $nullchar, true);
$newOutput = SwitchChar($newOutput, $nullchar);
}
echo '<h2>New output</h2>';
ShowNice($newOutput);
//in $newoutput we moved all the invalid wildcards to the end
//now we need to test if the last row has wildcards
if (count($newOutput[0])<count($output[0])) {
$output = $newOutput;
}
echo '<h2>Best result ('.(TestValid($output, $nullchar)?'VALID':'INVALID').')</h2>';
ShowNice($output);
return $output;
}
function MoveLeft($newOutput, $nullchar, $reverse=false) {
//see if we can remove more wildcards
$lastkey = count($newOutput[0])-1;
$testing = true;
while ($testing) {
$testing = false; //we decide if we go another round ob_deflatehandler
$test = $newOutput;
$lastkey = count($newOutput[0])-1;
$start = 0;
$end = count($test);
if ($reverse) {
$start = count($test)-1;
$end = -1;
}
for($key = $start;$key!=$end;$key += ($reverse?-1:1) ) {
$val = $test[$key];
$org = array_values($val);
foreach($val as $i=>$char) {
if ($char!=$nullchar) {
continue; //we only test wildcards
}
$wildcardAtEnd=true;
for($x=$i+1;$x<=$lastkey;$x++) {
$nextChar = $val[$x];
if ($nextChar!=$nullchar) {
$wildcardAtEnd = false;
break;
}
}
if ($wildcardAtEnd===true) {
continue; //the char next to it must not be wildcard
}
//remove the wildcard and add it to the base64_encode
unset($val[$i]);
$val[] = $nullchar;
$test[$key] = array_values($val); //correct order
if (TestValid($test, $nullchar)) {
//we can keep the new one
$newOutput = $test;
$testing = true; //keep testing, but start over to make sure we dont miss anything
break 2; //break both foreach, not while
}
$test[$key] = $org; //reset original values before remove for next test
}
}
}
$allWildCards = true;
while ($allWildCards) {
$lastkey = count($newOutput[0])-1;
foreach($newOutput as $key=>$val) {
if ($val[$lastkey]!=$nullchar) {
$allWildCards = false;
break;
}
}
if ($allWildCards) {
foreach($newOutput as $key=>$val) {
unset($val[$lastkey]);
$newOutput[$key] = array_values($val);
}
$output = $newOutput;
}
}
return $newOutput;
}
function SwitchChar($newOutput, $nullchar) {
$switching = true;
$switched = [];
while($switching) {
$switching = false;
$test = $newOutput;
$lastkey = count($newOutput[0])-1;
foreach($test as $key=> $val) {
foreach($val as $index=>$char) {
$switched[$key][$index][$char] = true;//store the switches we make
//see if can move the char somewhere else
for($i=0;$i<=$lastkey;$i++)
{
if ($i==$index) {
continue;//current pos
}
if (isset($switched[$key][$i][$char])) {
continue; //been here before
}
$org = array_values($val);
$switched[$key][$i][$char] = true;
$t = $val[$i];
$val[$index] = $t;
$val[$i] = $char;
$test[$key] = array_values($val);
if (TestValid($test, $nullchar)) {
//echo '<br />VALID: ' . $key . ' - ' . $index . ' - ' . $i . ' - ' . $t . ' - ' . $char;
$newOutput = MoveLeft($test, $nullchar);
$switching = true;
break 3;//for and two foreach
}
//echo '<br />INVALID: ' . $key . ' - ' . $index . ' - ' . $i . ' - ' . $t . ' - ' . $char;
$val = $org;
$test[$key] = $org;
}
}
}
}
return $newOutput;
}
Run Code Online (Sandbox Code Playgroud)
结果:
Input
0 => ['X', 'Y', 'A']
1 => ['X', 'Y', 'B']
2 => ['X', 'Y', 'C']
First output, unchecked
0 => ['X', 'Y', 'A', '_', '_']
1 => ['Y', '_', 'B', '_', 'X']
2 => ['_', '_', 'C', 'X', 'Y']
New output
0 => ['X', 'Y', 'A', '_', '_']
1 => ['Y', 'B', 'X', '_', '_']
2 => ['C', 'X', 'Y', '_', '_']
Best result (VALID)
0 => ['X', 'Y', 'A']
1 => ['Y', 'B', 'X']
2 => ['C', 'X', 'Y']
Run Code Online (Sandbox Code Playgroud)
经过一番摆弄后,我想出了以下代码。
这个想法是识别冲突的元素并将它们交换到不再是问题的列。对于不适用的情况,将进行随机选择。该代码以递归方式工作,因此存在需要很长时间才能完成的边缘情况。
极端边缘情况是所有行都包含完全相同的值的输入。
<?php
declare(strict_types=1);
final class SwapSolver
{
/**
* @param array $input
*
* @return array
*/
public function solve(array $input): array
{
$input = array_values($input);
return $this->swapDuplicates($this->prepare($input, $this->getMinRowLength($input)));
}
/**
* @param array $input
*
* @return array
*/
private function swapDuplicates(array $input): array
{
$unswappable = [];
foreach ($this->duplicates($input) as $position) {
list($r, $a) = $position;
$swapped = false;
foreach ($this->swapCandidates($input, $r, $a, true) as $b) {
$input[$r] = $this->swap($input[$r], $a, $b);
$swapped = true;
break;
}
if (!$swapped) {
$unswappable[] = $position;
}
}
// still unswappable
$unswappable = array_values(array_filter($unswappable, function (array $position) use ($input): bool {
return $this->isDuplicate($input, ...$position);
}));
// tie breaker
if (count($unswappable) > 0) {
list($r, $a) = $unswappable[mt_rand(0, count($unswappable) - 1)];
$candidates = [];
foreach ($this->swapCandidates($input, $r, $a, false) as $b) {
$candidates[] = $b;
}
$input[$r] = $this->swap($input[$r], $a, $candidates[mt_rand(0, count($candidates) - 1)]);
return $this->swapDuplicates($input);
}
return $input;
}
/**
* @param array $input
*
* @return \Generator
*/
private function duplicates(array &$input): \Generator
{
foreach ($input as $r => $row) {
foreach ($row as $c => $value) {
if ($this->isDuplicate($input, $r, $c)) {
yield [$r, $c];
}
}
}
}
/**
* @param array $input
* @param int $row
* @param int $column
*
* @return bool
*/
private function isDuplicate(array $input, int $row, int $column): bool
{
$candidate = $input[$row][$column];
if (is_null($candidate)) {
return false;
}
foreach (array_column($input, $column) as $r => $value) {
if ($r !== $row && $value === $candidate) {
return true;
}
}
return false;
}
/**
* @param array $input
* @param int $row
* @param int $column
* @param bool $strict
*
* @return \Generator
*/
private function swapCandidates(array &$input, int $row, int $column, bool $strict): \Generator
{
foreach ($input[$row] as $c => $dst) {
if ((!$strict || !in_array($input[$row][$column], array_column($input, $c), true))
&& (is_null($dst) || !in_array($dst, array_column($input, $column), true))
) {
yield $c;
}
}
}
/**
* @param array $row
* @param int $from
* @param int $to
*
* @return array
*/
private function swap(array $row, int $from, int $to): array
{
$tmp = $row[$to];
$row[$to] = $row[$from];
$row[$from] = $tmp;
return $row;
}
/**
* @param array $input
* @param int $padSize
*
* @return array
*/
private function prepare(array $input, int $padSize): array
{
return array_map(function (array $row) use ($padSize): array {
$row = array_pad($row, $padSize, null);
shuffle($row);
return $row;
}, $input);
}
/**
* @param array $input
*
* @return int
*/
private function getMinRowLength(array $input): int
{
return max(
...array_values(array_count_values(array_merge(...$input))),
...array_map('count', $input)
);
}
}
Run Code Online (Sandbox Code Playgroud)
用法:
<?php
$solver = new SwapSolver();
$solution = $solver->solve($input);
Run Code Online (Sandbox Code Playgroud)
更多代码位于: https: //github.com/Yoshix/so-47261385