我正在使用以下代码来执行reduce side join
/*
* HadoopMapper.java
*
* Created on Apr 8, 2012, 5:39:51 PM
*/
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
// import org.apache.commons.logging.Log;
// import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.contrib.utils.join.*;
/**
*
* @author
*/
public class DataJoin extends Configured implements Tool
{
public static class MapClass extends DataJoinMapperBase
{
protected Text …Run Code Online (Sandbox Code Playgroud) 我想用集群作为K均值描述我的单词矢量这里。我正在使用的代码片段
# Set "k" (num_clusters) to be 1/5th of the vocabulary size, or an
# average of 5 words per cluster
word_vectors = model.syn0
num_clusters = word_vectors.shape[0] / 5
# Initalize a k-means object and use it to extract centroids
kmeans_clustering = KMeans( n_clusters = num_clusters )
idx = kmeans_clustering.fit_predict( word_vectors )
Run Code Online (Sandbox Code Playgroud)
我收到以下错误 TypeError: 'float' object cannot be interpret as an integer
有人可以帮忙吗