Idea/Eclipse--Windows下调试Hadoop

2016-05-20

Hadoop 工具

MRunit
调试hadoop
常见错误

　　我想大家对于java的单元测试junit都很熟悉了吧，这里我就不介绍了。下面主要介绍Hadoop的测试（mrunit）的使用，以及在windows下我们在开发工具（Idea/Eclipse）本地调试和集群模式。本博客项目结构如下：

Maven 项目结构

MRunit

就直接进入主题吧，对于mrunit的使用，首先我们加入依赖包：

mrunit和junit依赖

<!-- mrunit -->
<dependency>
    <groupId>org.apache.mrunit</groupId>
    <artifactId>mrunit</artifactId>
    <version>1.0.0</version>
    <classifier>hadoop2</classifier>
    <scope>test</scope>
</dependency>

<!-- junit -->
<dependency>
    <groupId>junit</groupId>
    <artifactId>junit</artifactId>
    <version>4.12</version>
</dependency>

一个简单的WordCountApp用于统计单词的MapReduce

package com.xxo.mr;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.log4j.Logger;

import java.io.IOException;

/**
 * 通过MapReduce统计单词次数
 * Created by xiaoxiaomo on 2016/5/20.
 */
public class WordCountApp {

    private static Logger logger = Logger.getLogger( WordCountApp.class ) ;

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration conf = new Configuration();
        Job job = Job.getInstance( conf,WordCountApp.class.getSimpleName()) ;
        job.setJarByClass(WordCountApp.class);

        //1. 数据来源
        FileInputFormat.setInputPaths(job, args[0]);
        FileInputFormat.setInputDirRecursive(job, true); //递归

        //2. 使用Mapper计算
        job.setMapperClass(WordCountMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(LongWritable.class);

        //3. 使用Reducer合并计算
        job.setReducerClass(WordCountReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(LongWritable.class);

        //4. 数据写入
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        //5. 执行
        job.waitForCompletion(true) ;
    }

    /**
     * 自定义的Map 需要继承Mapper
     */
    public static class WordCountMapper extends Mapper<LongWritable,Text,Text,LongWritable> {

        Text k2 = new Text() ;
        LongWritable v2 = new LongWritable();

        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

            //1. 获取行信息
            String line = value.toString();
            logger.info("该行数据：" + line);

            //2. 获取行的所用单词
            String[] words = line.split("\t");
            for (String word : words) {
                logger.info( " 设置的键和值：" + word + " - 1");
                k2.set(word.getBytes()) ; //设置键
                v2.set(1);                //设置值
                context.write(k2,v2);
            }
            
        }
    }

    /**
     * 自定义的Reduce 需要继承Reducer
     */
    public static class WordCountReducer extends Reducer<Text , LongWritable ,Text ,LongWritable>{

        //K1 = K3
        LongWritable v3 = new LongWritable() ;
        @Override
        protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
            int sum = 0 ;

            logger.info("Reduce 键key：" + key);
            for (LongWritable value : values) {
                logger.info(" 设置的值：" + value);
                sum +=value.get() ;
            }
            v3.set(sum);
            context.write( key , v3 );
        }
    }

}

下面我们就来写一个简单的mrunit吧，代码如下

import com.google.common.collect.Lists;
import com.xxo.mr.WordCountApp;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mrunit.mapreduce.MapDriver;
import org.apache.hadoop.mrunit.mapreduce.ReduceDriver;
import org.junit.Before;
import org.junit.Test;

import java.io.IOException;
import java.util.ArrayList;

/**
 * mrunit Test
 * Created by xiaoxiaomo on 2016/5/20.
 */
public class WordCountAppTest {

    //单词统计Mapper
	private WordCountApp.WordCountMapper wordCountMapper;

    //单词统计Reducer
	private WordCountApp.WordCountReducer wordCountReducer;

    //Mapper和Reducer的Driver
	private MapDriver<LongWritable, Text, Text, LongWritable>  mapDriver;
	private ReduceDriver<Text, LongWritable, Text, LongWritable> reduceDriver;
	//private MapReduceDriver mrDriver;
	
	@Before
	public void before(){
		this.wordCountMapper = new WordCountApp.WordCountMapper();
		this.wordCountReducer = new WordCountApp.WordCountReducer();
		
		this.mapDriver = MapDriver.newMapDriver(wordCountMapper);
		this.reduceDriver = ReduceDriver.newReduceDriver(wordCountReducer);
        //也可以这样写：同时测试map和reduce
		//this.mrDriver = MapReduceDriver.newMapReduceDriver(wordCountMapper, wordCountReducer);
	}
	
	@Test
	public void testMap() throws IOException {
		//设置输入数据
		this.mapDriver.addInput(new LongWritable(0), new Text("blog\txiaoxiaomo"));
		this.mapDriver.addInput(new LongWritable(0), new Text("xxo\tblog"));
		this.mapDriver.addOutput(new Text("blog"), new LongWritable(1));
		this.mapDriver.addOutput(new Text("xiaoxiaomo"), new LongWritable(1));
		this.mapDriver.addOutput(new Text("xxo"), new LongWritable(1));
        this.mapDriver.addOutput(new Text("blog"), new LongWritable(1));

		this.mapDriver.runTest();
	}
	
	@Test
	public void testReduce() throws IOException{
		ArrayList<LongWritable> values = Lists.newArrayList(new LongWritable(1), new LongWritable(2));
		this.reduceDriver.addInput(new Text("xiaoxiaomo"), values);
		this.reduceDriver.addInput(new Text("blog"), values);

		this.reduceDriver.run();
	}
}

///////运行结果
///////Mapper
//2016-05-21 01:06:01 WordCountApp [INFO] 该行数据：blog	xiaoxiaomo
//2016-05-21 01:06:01 WordCountApp [INFO]  设置的键和值：blog - 1
//2016-05-21 01:06:02 WordCountApp [INFO]  设置的键和值：xiaoxiaomo - 1
//2016-05-21 01:06:02 WordCountApp [INFO] 该行数据：xxo	blog
//2016-05-21 01:06:02 WordCountApp [INFO]  设置的键和值：xxo - 1
//2016-05-21 01:06:02 WordCountApp [INFO]  设置的键和值：blog - 1

///////Reduce
//2016-05-21 01:07:53 WordCountApp [INFO] Reduce 键key：xiaoxiaomo
//2016-05-21 01:07:53 WordCountApp [INFO]  设置的值：1
//2016-05-21 01:07:53 WordCountApp [INFO]  设置的值：2
//2016-05-21 01:07:53 WordCountApp [INFO] Reduce 键key：blog
//2016-05-21 01:07:53 WordCountApp [INFO]  设置的值：1
//2016-05-21 01:07:53 WordCountApp [INFO]  设置的值：2

调试hadoop

准备

首先下载：hadoop2.6.0_util(x64).zip、hadoop2.6.0_util(x32).zip
下载后解压到目录（博主解压到：D:\dev\hadoop\bin\下面）bin下面包含文件
1
2
3
4
5
6
7
hadoop.dll
hadoop.exp
hadoop.lib
hadoop.pdb
libwinutils.lib
winutils.exe
winutils.pdb
配置环境变量（配置好后记得重启你的IDE工具）
HADOOP_HOME=D:\dev\hadoop
PATH=%HADOOP_HOME%\bin

IDEA 调试hadoop

本地调试

一、点击Idea 右上角配置运行环境
二、点击“+”号，添加配置
三、设置运行环境变量和参数
四、运行结果如下：

远程模式

集群模式是本地向集群提交作业。

将集群中的配置文件core-site.xml，hdfs-site.xml，mapred-site.xml，yarn-site.xml文件放在项目的resources目录下

在mapred-site.xml中添加如下内容：

<property>
    <name>mapreduce.app-submission.cross-platform</name>
    <value>true</value>
</property>
<property>
    <name>HadoopMRunit_Winutils_20150520-1.0-SNAPSHOT-jar-with-dependencies.jar</name>
    <value>D:\\dev\\idea\\HadoopMRunit_Winutils_20150520\\target\\HadoopMRunit_Winutils_20150520-1.0-SNAPSHOT-jar-with-dependencies.jar</value>
</property>

配置运行环境
Maven 打包 ：mvn clean install
运行即可

eclipse 调试hadoop

在eclipse中调试hadoop基本和idea一样的，只是设置运行参数的位置不同而已，就不详细讲解了，如图：

常见错误

一、org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Ljava/lang/String;I)Z

2016-05-21 09:39:44 JobSubmitter [INFO] Cleaning up the staging area file:/tmp/hadoop-Jason/mapred/staging/Jason477647952/.staging/job_local477647952_0001
Exception in thread "main" java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Ljava/lang/String;I)Z
	at org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Native Method)
	at org.apache.hadoop.io.nativeio.NativeIO$Windows.access(NativeIO.java:557)
	at org.apache.hadoop.fs.FileUtil.canRead(FileUtil.java:977)
	at org.apache.hadoop.util.DiskChecker.checkAccessByFileMethods(DiskChecker.java:187)
	at org.apache.hadoop.util.DiskChecker.checkDirAccess(DiskChecker.java:174)
	at org.apache.hadoop.util.DiskChecker.checkDir(DiskChecker.java:108)
	at org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.confChanged(LocalDirAllocator.java:285)
	at org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.getLocalPathForWrite(LocalDirAllocator.java:344)
	at org.apache.hadoop.fs.LocalDirAllocator.getLocalPathForWrite(LocalDirAllocator.java:150)
	at org.apache.hadoop.fs.LocalDirAllocator.getLocalPathForWrite(LocalDirAllocator.java:131)
	at org.apache.hadoop.fs.LocalDirAllocator.getLocalPathForWrite(LocalDirAllocator.java:115)
	at org.apache.hadoop.mapred.LocalDistributedCacheManager.setup(LocalDistributedCacheManager.java:131)
	at org.apache.hadoop.mapred.LocalJobRunner$Job.<init>(LocalJobRunner.java:163)
	at org.apache.hadoop.mapred.LocalJobRunner.submitJob(LocalJobRunner.java:731)
	at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:536)
	at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1296)
	at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1293)
	at java.security.AccessController.doPrivileged(Native Method)
	at javax.security.auth.Subject.doAs(Subject.java:415)
	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1628)
	at org.apache.hadoop.mapreduce.Job.submit(Job.java:1293)
	at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1314)
	at com.xxo.mr.WordCount.main(WordCount.java:73)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:601)
	at com.intellij.rt.execution.application.AppMain.main(AppMain.java:140)

分析：从错误信息和源码发现，权限不足
解决办法（两种办法）：

以管理员的身份启动ide开发工具。
还有一种办法就是重写源代码，如下图所示：

二、Permission denied: user=Jason, access=EXECUTE, inode=”/history”:root:supergroup:drwxrwx—

2016-05-21 10:53:18 JobSubmitter [INFO] Cleaning up the staging area /history/Jason/.staging/job_1463827152309_0001
Exception in thread "main" org.apache.hadoop.security.AccessControlException: Permission denied: user=Jason, access=EXECUTE, inode="/history":root:supergroup:drwxrwx---
	at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkFsPermission(FSPermissionChecker.java:271)
	at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.check(FSPermissionChecker.java:257)
	at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkTraverse(FSPermissionChecker.java:208)
	at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:171)
	at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkPermission(FSNamesystem.java:6512)
	at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkPermission(FSNamesystem.java:6494)
	at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkOwner(FSNamesystem.java:6413)
	at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.setPermissionInt(FSNamesystem.java:1719)
	at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.setPermission(FSNamesystem.java:1699)
	at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.setPermission(NameNodeRpcServer.java:614)
	at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.setPermission(ClientNamenodeProtocolServerSideTranslatorPB.java:443)
	at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
	at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:619)
	at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:962)
	at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2039)
	at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2035)
	at java.security.AccessController.doPrivileged(Native Method)
	at javax.security.auth.Subject.doAs(Subject.java:415)
	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1628)
	at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2033)

	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:525)
	at org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:106)
	at org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:73)
	at org.apache.hadoop.hdfs.DFSClient.setPermission(DFSClient.java:2326)
	at org.apache.hadoop.hdfs.DistributedFileSystem$24.doCall(DistributedFileSystem.java:1286)
	at org.apache.hadoop.hdfs.DistributedFileSystem$24.doCall(DistributedFileSystem.java:1282)
	at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
	at org.apache.hadoop.hdfs.DistributedFileSystem.setPermission(DistributedFileSystem.java:1282)
	at org.apache.hadoop.fs.FileSystem.mkdirs(FileSystem.java:599)
	at org.apache.hadoop.mapreduce.JobSubmitter.copyAndConfigureFiles(JobSubmitter.java:182)
	at org.apache.hadoop.mapreduce.JobSubmitter.copyAndConfigureFiles(JobSubmitter.java:390)
	at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:483)
	at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1296)
	at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1293)
	at java.security.AccessController.doPrivileged(Native Method)
	at javax.security.auth.Subject.doAs(Subject.java:415)
	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1628)
	at org.apache.hadoop.mapreduce.Job.submit(Job.java:1293)
	at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1314)
	at com.xxo.mr.WordCountApp.main(WordCountApp.java:47)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:601)
	at com.intellij.rt.execution.application.AppMain.main(AppMain.java:140)
Caused by: org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException): Permission denied: user=Jason, access=EXECUTE, inode="/history":root:supergroup:drwxrwx---
	at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkFsPermission(FSPermissionChecker.java:271)

分析：线上/history/文件权限不足

[root@xxo03 up]# hdfs dfs -ls /history/
Found 3 items
drwx--x--x   - Jason supergroup          0 2016-05-21 18:43 /history/Jason
drwxrwx--x   - root  supergroup          0 2016-05-11 05:48 /history/history
drwx--x--x   - root  supergroup          0 2016-05-11 06:17 /history/root

解决办法：添加权限给本地用户（Jason）

[root@xxo03 /]# hdfs dfs -chmod -R a+x /history
[root@xxo03 /]# hdfs dfs -ls /
Found 5 items
-rw-r--r--   1 root supergroup      57925 2016-05-09 07:40 /hadoop.log
drwxrwx--x   - root supergroup          0 2016-05-21 18:43 /history
drwxr-xr-x   - root supergroup          0 2016-05-11 06:25 /in
drwxr-xr-x   - root supergroup          0 2016-05-11 07:11 /out
drwxr-xr-x   - root supergroup          0 2016-05-11 06:17 /tmp

项目源码下载：
http://download.csdn.net/detail/tang__xuandong/9527054

上一篇：Flume--日志收集

下一篇：Flume--kafka source启动异常