gio*_*o79 7 php nlp parentheses multidimensional-array
想要将带有嵌套括号的文本转换为嵌套数组.以下是NLP解析器的示例输出:
(TOP (S (NP (PRP I)) (VP (VBP love) (NP (NP (DT a) (JJ big) (NN bed)) (PP (IN of) (NP (NNS roses))))) (. .)))
Run Code Online (Sandbox Code Playgroud)
(来源:我喜欢一张大床玫瑰.)
想把它变成一个嵌套数组,所以看起来像这样
TOP
S
NP
PRP I
VP
VBP Love
Run Code Online (Sandbox Code Playgroud)
等等
发现这个php花括号成阵列,但这不是嵌套数组
rod*_*ehm 20
代码解释:
<?php
class ParensParser
{
// something to keep track of parens nesting
protected $stack = null;
// current level
protected $current = null;
// input string to parse
protected $string = null;
// current character offset in string
protected $position = null;
// start of text-buffer
protected $buffer_start = null;
public function parse($string)
{
if (!$string) {
// no string, no data
return array();
}
if ($string[0] == '(') {
// killer outer parens, as they're unnecessary
$string = substr($string, 1, -1);
}
$this->current = array();
$this->stack = array();
$this->string = $string;
$this->length = strlen($this->string);
// look at each character
for ($this->position=0; $this->position < $this->length; $this->position++) {
switch ($this->string[$this->position]) {
case '(':
$this->push();
// push current scope to the stack an begin a new scope
array_push($this->stack, $this->current);
$this->current = array();
break;
case ')':
$this->push();
// save current scope
$t = $this->current;
// get the last scope from stack
$this->current = array_pop($this->stack);
// add just saved scope to current scope
$this->current[] = $t;
break;
/*
case ' ':
// make each word its own token
$this->push();
break;
*/
default:
// remember the offset to do a string capture later
// could've also done $buffer .= $string[$position]
// but that would just be wasting resources…
if ($this->buffer_start === null) {
$this->buffer_start = $this->position;
}
}
}
return $this->current;
}
protected function push()
{
if ($this->buffer_start !== null) {
// extract string from buffer start to current position
$buffer = substr($this->string, $this->buffer_start, $this->position - $this->buffer_start);
// clean buffer
$this->buffer_start = null;
// throw token into current scope
$this->current[] = $buffer;
}
}
}
$string = '(TOP (S (NP (PRP I)) (VP (VBP love) (NP (NP (DT a) (JJ big) (NN bed)) (PP (IN of) (NP (NNS roses))))) (. .)))';
$p = new ParensParser();
$result = $p->parse($string);
var_dump($result);
Run Code Online (Sandbox Code Playgroud)