I'm trying to port a knn (k nearest neighbor search ) on a kd-tree that I wrote in Java to C.
The Java output, as expected:
Nearest to Key: 6.0,5.0,4.0
Key:6.0,5.0,4.0,min distance:0.0
Key:5.0,4.0,3.0,min distance:3.0
Key:7.0,6.0,5.0,min distance:3.0
Key:4.0,3.0,2.0,min distance:12.0
Key:3.0,2.0,1.0,min distance:27.0
Run Code Online (Sandbox Code Playgroud)
Java code, class (Its a quick implementation just to get the algorithm working before I start my port):
Nearest to Key: 6.0,5.0,4.0
Key:6.0,5.0,4.0,min distance:0.0
Key:5.0,4.0,3.0,min distance:3.0
Key:7.0,6.0,5.0,min distance:3.0
Key:4.0,3.0,2.0,min distance:12.0
Key:3.0,2.0,1.0,min distance:27.0
Run Code Online (Sandbox Code Playgroud)
Java knn method:
class kd_tree {
public int DEFAULT_NUMBER_OF_DIMENSIONS = 1;
static int current_number_of_data_points = 0;
static int current_global_median = 0;
kd_tree left;
kd_tree right;
float[] data;
private int k_dimensions;
float distance_to_neighbor;
}
Run Code Online (Sandbox Code Playgroud)
The relevant C code snippets:
/*===========================================================================
Function knn, knn algorithm using kd-tree & minimumNeighbor function.
Description:
==========================================================*/
public static kd_tree[] knn( kd_tree root
, float[] data_point
, int depth
, int k_dimension
, int number_of_nearest_neighbors) {
kd_tree[] all_nearests = new kd_tree[current_number_of_data_points];
kd_tree[] n_nearests = new kd_tree[current_number_of_data_points];
if (root != null) {
int nearests_counter = 0;
/* now lets traverse the tree inorder & calculate distances from the
query point based on Morris Traversal algorithm that does not
use any stacks or no recursion */
kd_tree cur = root;
kd_tree pre;
while (cur != null) {
if (cur.left == null) {
/* debugging System.out.println(cur.data[0]);
calculate distance */
cur.distance_to_neighbor = n_dimensional_euclidean(data_point, cur.data);
all_nearests[nearests_counter] = cur;
nearests_counter++;
cur = cur.right; // move to next right node
} else { // has a left subtree
pre = cur.left;
while (pre.right != null && pre.right != cur) { // find rightmost
pre = pre.right;
}
/* Make current as the right child of its inorder
predecessor */
if (pre.right == null) {
pre.right = cur;
cur = cur.left;
} else {
pre.right = null;
// debugging printf("%d ", current->data);
// calculate distance
cur.distance_to_neighbor = n_dimensional_euclidean( data_point, cur.data);
all_nearests[nearests_counter] = cur;
nearests_counter++;
cur = cur.right;
}
}
}//end while
// base cases
// sort from least to greatest
insertion_sort_based_on_distance(all_nearests);
// return on specified number_of_nearest_neighbors
for (int i = 0; i < number_of_nearest_neighbors; i++) {
n_nearests[i]=all_nearests[i];
}
}
return n_nearests;
}
Run Code Online (Sandbox Code Playgroud)
Relevant knn implementation, kdtree.c:
#include <stdlib.h>
#ifndef KDTREE_H
#define KDTREE_H
/*
* Representation of a kd tree
*/
typedef struct tree_ {
struct tree* left;
struct tree* right;
float* info;
float distance_to_neighbor;
} tree;
// pre-allocated tree nodes array
static tree tree_space[KD_TREE_HEAP_SIZE];
// the knn algorithm will require memory space upto tree size
static tree* processing_space [KD_TREE_HEAP_SIZE];
tree* knn( tree* root
, float data_point[]
, int depth
, const int k_dimensions
, int number_of_nearest_neighbors
, int total_number_of_elements
, tree* n_nearests);
Run Code Online (Sandbox Code Playgroud)
Relevant snippet of main.c:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <float.h>
#include "kdtree.h"
#include "sorting_utility.h"
static int current_number_of_kd_tree_nodes = 0;
static int current_number_of_data_points = 0;
/*===========================================================================
Function knn, knn algorithm using kd-tree.
Description:
==========================================================*/
tree* knn( tree* root
, float data_point[]
, int depth
, const int k_dimensions
, int number_of_nearest_neighbors
, tree* n_nearests)
{
tree all_nearests[current_number_of_kd_tree_nodes];
tree* source_data_end_ptr = NULL;
tree* source_data_start_ptr = NULL;
tree* destination_data_start_ptr = NULL;
tree* destination_data_end_ptr = NULL;
if(NULL != root && NULL != n_nearests)
{
int nearests_counter = 0;
// now lets traverse the tree inorder & calculate distances
// from the query point
tree* cur = root;
tree* pre;
while(NULL != cur)
{
if(NULL == cur->left)
{
cur->distance_to_neighbor = n_dimensional_euclidean(data_point, cur->info);
processing_space[nearests_counter] = *cur;
nearests_counter++;
cur = cur->right; // move to next right node
}
else
{
pre = cur->left;
while(pre->right != NULL && pre->right != cur)
{ // find rightmost
pre = pre->right;
}
/* Make current as the right child of its inorder predecessor */
if(pre->right == NULL)
{
pre->right = cur;
cur = cur->left;
}
else
{
pre->right = NULL;
// calculate distance
cur->distance_to_neighbor = n_dimensional_euclidean(data_point, cur->info);
processing_space[nearests_counter] = *cur;
nearests_counter++;
cur = cur->right;
}
}
} // end while
// JUST FOR DEBUGGING START
printf ("***For debugging before sort:\n");
for(int i = 0; i < current_number_of_kd_tree_nodes; i++)
{
printf("{");
for(int c = 0; c < k_dimensions; c++)
{
if(NULL != processing_space[i].info)
{
printf("%f,", processing_space[i].info[c]);
}
else
{
break;
}
} // end for
printf("} ");
printf("min_distance=%f\n", processing_space[i].distance_to_neighbor);
} // end for
// JUST FOR DEBUGGING END
/* copy relevant range up current_number_of_kd_tree_nodes before sort,
* in order to avoid sorting beyond range that does not have any data */
source_data_start_ptr = &processing_space[0];
destination_data_start_ptr = &all_nearests[0];
destination_data_end_ptr = &all_nearests[current_number_of_kd_tree_nodes - 1];
while(destination_data_start_ptr <= destination_data_end_ptr)
{
*destination_data_start_ptr = *source_data_start_ptr;
source_data_start_ptr++;
destination_data_start_ptr++;
}
// sort based on distance from query point
quick_sort(all_nearests, 0, current_number_of_kd_tree_nodes - 1);
// JUST FOR DEBUGGING START
printf("***For debugging after sort\n");
for (int i = 0; i < current_number_of_kd_tree_nodes; i++)
{
printf("{");
for (int c = 0; c < k_dimensions; c++)
{
if (NULL != all_nearests[i].info)
{
printf ("%f,", all_nearests[i].info[c]);
}
else
{
break;
}
} // end for
printf("} ");
printf("min_distance=%f\n", all_nearests[i].distance_to_neighbor);
} // end for
// JUST FOR DEBUGGING END
/* copy only the n_nearest & ignore/ do NOT copy any empty tree nodes */
// reuse pointers
destination_data_end_ptr = &n_nearests[number_of_nearest_neighbors - 1];
source_data_end_ptr = &all_nearests[current_number_of_kd_tree_nodes - 1];
source_data_start_ptr = all_nearests;
int counter = 0;
while(counter < number_of_nearest_neighbors && source_data_start_ptr < source_data_end_ptr)
{
// do NOT copy any empty tree nodes
if(source_data_start_ptr != NULL && source_data_start_ptr->info != NULL)
{
/* ATTENTION: i checked with debugger & values for
(distance_to_neighbor,info,left,right were not zeroed or empty */
float distance_to_neighbor = source_data_start_ptr->distance_to_neighbor;
float* info = source_data_start_ptr->info;
tree* left = source_data_start_ptr->left;
tree* right = source_data_start_ptr->right;
n_nearests[counter].distance_to_neighbor = distance_to_neighbor;
n_nearests[counter].info = info;
n_nearests[counter].left = left;
n_nearests[counter].right = right;
counter++;
}
source_data_start_ptr++;
}
}
else
{
printf("Error, knn input parameter error");
}
return n_nearests;
} // end function
Run Code Online (Sandbox Code Playgroud)
Output by the C version:
knn (k nearest neighboor)
:5 Nearest neighbors to Key: {6.000000,5.000000,4.000000} are:
***For debugging before sort:
{-1.000000,-2.000000,-3.000000,} min_distance=49.000000
{0.000000,-1.000000,-2.000000,} min_distance=36.000000
{1.000000,0.000000,-1.000000,} min_distance=25.000000
{2.000000,1.000000,0.000000,} min_distance=16.000000
{3.000000,2.000000,1.000000,} min_distance=9.000000
{4.000000,3.000000,2.000000,} min_distance=4.000000
{5.000000,4.000000,3.000000,} min_distance=1.000000
{6.000000,5.000000,4.000000,} min_distance=0.000000
{7.000000,6.000000,5.000000,} min_distance=1.000000
{14.000000,13.000000,12.000000,} min_distance=64.000000
{15.000000,14.000000,13.000000,} min_distance=81.000000
{16.000000,15.000000,14.000000,} min_distance=100.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
***For debugging after sort
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{6.000000,5.000000,4.000000,} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{5.000000,4.000000,3.000000,} min_distance=1.000000
{7.000000,6.000000,5.000000,} min_distance=1.000000
{4.000000,3.000000,2.000000,} min_distance=4.000000
{3.000000,2.000000,1.000000,} min_distance=9.000000
{2.000000,1.000000,0.000000,} min_distance=16.000000
{1.000000,0.000000,-1.000000,} min_distance=25.000000
{0.000000,-1.000000,-2.000000,} min_distance=36.000000
{-1.000000,-2.000000,-3.000000,} min_distance=49.000000
{14.000000,13.000000,12.000000,} min_distance=64.000000
{15.000000,14.000000,13.000000,} min_distance=81.000000
{16.000000,15.000000,14.000000,} min_distance=100.000000
**After function return:**
Key={0,0,0,} 0.000000 min distance
Key={0,0,0,} 0.000000 min distance
Key={0,0,0,} 0.000000 min distance
Key={0,0,0,} 0.000000 min distance
Key={0,0,0,} 0.000000 min distance
Run Code Online (Sandbox Code Playgroud)
I have already tested insertion, inorder traversal, searching, deletion, and finding a minimum neighbor in both Java & C version they are working as expected.
In the C version the knn function returns zeroed values after the function return?
Why are values zero ine the struct , after the call in main function (refer to output area titled "After function return" ?
希望别人能发现明显的东西,真是绝望地拔出我的头发。
存在多个问题:
n_nearest被定义main为指向tree对象的指针数组。但是您将参数定义knn为指向实际tree对象数组的指针,这是不兼容的。参数应定义为tree **n_nearest或tree* n_nearest[],它是指向指针数组的指针。然后,您应该只存储对树元素的引用,而不是复制值。类似地,all_nearests应定义为指针数组,就像在Java中一样,而不是存储树节点副本的对象数组。gcc -Wall -Werror或者这clang -Weverything -Werror是一个节省大量无聊时间的好开始。kd_tree可以在C中定义,因为typedef tree *kd_tree;C程序员发现将指针隐藏在typedef后面令人困惑,在Java程序员中,除了标量值之外,其他所有对象都可以处理指针,而没有问题,因为数组和类从不包含实际结构,仅包含对象的指针。processing_space,则可以避免动态分配临时数组all_nearests,该临时数组是从java中的堆以及在C中的堆栈中分配的。请注意,total_number_of_elements在这种情况下,由于processing_space假定足够大,因此不需要传递保存对所有树节点的引用。n_nearest并all_nearests作为kd_tree具有大小的对象数组current_number_of_data_points,其中许多对象将是null。main不会增加tree_ptr,因此它总是打印的第一个元素n_nearests。这是使用指向结构的指针数组的修改版本,与Java版本更加接近:
#ifndef KDTREE_H
#define KDTREE_H
/*
* Representation of a kd tree
*/
typedef struct tree {
struct tree* left;
struct tree* right;
float* info;
float distance_to_neighbor;
} tree;
// pre-allocated tree nodes array
extern tree tree_space[KD_TREE_HEAP_SIZE];
// the knn algorithm will require memory space upto tree size
extern tree* processing_space[KD_TREE_HEAP_SIZE];
// return the number of closest neighbors, <= number_of_nearest_neighbors
int knn(tree* root,
float data_point[],
int depth,
const int k_dimensions,
int number_of_nearest_neighbors,
int total_number_of_elements,
tree* n_nearests);
Run Code Online (Sandbox Code Playgroud)
实施knn:
#include <stdio.h>
#include <stdlib.h>
#include "kdtree.h"
static int current_number_of_kd_tree_nodes = 0;
static int current_number_of_data_points = 0;
static int tree_compare_distance(const void *a, const void *b) {
tree* ap = *(tree* const *)a;
tree* bp = *(tree* const *)b;
return ((ap->distance_to_neighbor > bp->distance_to_neighbor) -
(ap->distance_to_neighbor < bp->distance_to_neighbor));
}
static void quick_sort_based_on_distance(tree** array, size_t size) {
qsort(array, size, sizeof(*array), tree_compare_distance);
}
/*===========================================================================
Function knn, knn algorithm using kd-tree.
Description:
==========================================================*/
int knn(tree* root,
float data_point[],
int depth,
const int k_dimensions,
int number_of_nearest_neighbors,
tree** n_nearests)
{
/* use the static processing space to avoid defining a potentially huge
array on the stack */
tree** all_nearests = processing_space;
if (root != NULL && n_nearests != NULL) {
int nearests_counter = 0;
// now lets traverse the tree inorder & calculate distances
// from the query point
tree* cur = root;
tree* pre;
while (cur != NULL) {
if (cur->left == NULL) {
// calculate distance
cur->distance_to_neighbor = n_dimensional_euclidean(data_point, cur->info);
all_nearests[nearests_counter] = cur;
nearests_counter++;
cur = cur->right; // move to next right node
} else { // has a left subtree
pre = cur->left;
while (pre->right != NULL && pre->right != cur) { // find rightmost
pre = pre->right;
}
/* Make current as the right child of its inorder predecessor */
if (pre->right == NULL) {
pre->right = cur;
cur = cur->left;
} else {
pre->right = NULL;
// calculate distance
cur->distance_to_neighbor = n_dimensional_euclidean(data_point, cur->info);
all_nearests[nearests_counter] = cur;
nearests_counter++;
cur = cur->right;
}
}
}
// JUST FOR DEBUGGING START
printf ("***For debugging before sort:\n");
for (int i = 0; i < nearests_counter; i++) {
printf("{");
for (int c = 0; c < k_dimensions; c++) {
printf("%f,", all_nearests[i]->info[c]);
}
printf("} ");
printf("min_distance=%f\n", all_nearests[i]->distance_to_neighbor);
}
// JUST FOR DEBUGGING END
// sort based on distance from query point
quick_sort_based_on_distance(all_nearests, nearests_counter);
// JUST FOR DEBUGGING START
printf("***For debugging after sort\n");
for (int i = 0; i < nearests_counter; i++) {
printf("{");
for (int c = 0; c < k_dimensions; c++) {
printf("%f,", all_nearests[i]->info[c]);
}
printf("} ");
printf("min_distance=%f\n", all_nearests[i]->distance_to_neighbor);
}
// JUST FOR DEBUGGING END
// return only specified number_of_nearest_neighbors
// yet do not return elements beyond nearest_counter
if (number_of_nearest_neighbors > nearest_counter)
number_of_nearest_neighbors = nearest_counter;
for (int i = 0; i < number_of_nearest_neighbors; i++) {
n_nearests[i] = all_nearests[i];
}
return number_of_nearest_neighbors;
} else {
printf("Error, knn input parameter error");
return -1;
}
}
The main code is modified too:
```c
#include <stdio.h>
#include "kdtree.h"
int main() {
const int query_size = 10;
const int k_dimensions = 3;
// ...
// get the values and the query point
// ...
printf("knn (k nearest neighboor)\n:");
tree* n_nearests[query_size];
printf("%d nearest neighbors to Key: {%f,%f,%f}\n", query_size,
query_point[0], query_point[1], query_point[2]);
int result_size = knn(root, query_point, 0, k_dimensions, query_size, n_nearests);
// print the computed nearest neighbors
for (int i = 0; i < result_size; i++) {
tree* tree_ptr = n_nearests[i];
if (tree_ptr->info != NULL) {
printf("Key={");
for (int c = 0; c < k_dimensions; c++) {
printf("%s%d", "," + !c, tree_ptr->info[c]);
}
printf("} ");
printf("%f min distance\n", tree_ptr->distance_to_neighbor);
}
}
return 0;
}
Run Code Online (Sandbox Code Playgroud)