Idea/Eclipse--Windows下调试Hadoop

  我想大家对于java的单元测试junit都很熟悉了吧,这里我就不介绍了。下面主要介绍Hadoop的测试(mrunit)的使用,以及在windows下我们在开发工具(Idea/Eclipse)本地调试和集群模式。本博客项目结构如下:

Maven 项目结构

MRunit

就直接进入主题吧,对于mrunit的使用,首先我们加入依赖包:

  • mrunit和junit依赖

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    <!-- mrunit -->
    <dependency>
    <groupId>org.apache.mrunit</groupId>
    <artifactId>mrunit</artifactId>
    <version>1.0.0</version>
    <classifier>hadoop2</classifier>
    <scope>test</scope>
    </dependency>

    <!-- junit -->
    <dependency>
    <groupId>junit</groupId>
    <artifactId>junit</artifactId>
    <version>4.12</version>
    </dependency>
  • 一个简单的WordCountApp用于统计单词的MapReduce

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    package com.xxo.mr;

    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    import org.apache.log4j.Logger;

    import java.io.IOException;

    /**
    * 通过MapReduce统计单词次数
    * Created by xiaoxiaomo on 2016/5/20.
    */

    public class WordCountApp {

    private static Logger logger = Logger.getLogger( WordCountApp.class ) ;

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance( conf,WordCountApp.class.getSimpleName()) ;
    job.setJarByClass(WordCountApp.class);

    //1. 数据来源
    FileInputFormat.setInputPaths(job, args[0]);
    FileInputFormat.setInputDirRecursive(job, true); //递归

    //2. 使用Mapper计算
    job.setMapperClass(WordCountMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);

    //3. 使用Reducer合并计算
    job.setReducerClass(WordCountReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    //4. 数据写入
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    //5. 执行
    job.waitForCompletion(true) ;
    }

    /**
    * 自定义的Map 需要继承Mapper
    */

    public static class WordCountMapper extends Mapper<LongWritable,Text,Text,LongWritable> {

    Text k2 = new Text() ;
    LongWritable v2 = new LongWritable();

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

    //1. 获取行信息
    String line = value.toString();
    logger.info("该行数据:" + line);

    //2. 获取行的所用单词
    String[] words = line.split("\t");
    for (String word : words) {
    logger.info( " 设置的键和值:" + word + " - 1");
    k2.set(word.getBytes()) ; //设置键
    v2.set(1); //设置值
    context.write(k2,v2);
    }

    }
    }

    /**
    * 自定义的Reduce 需要继承Reducer
    */

    public static class WordCountReducer extends Reducer<Text , LongWritable ,Text ,LongWritable>{

    //K1 = K3
    LongWritable v3 = new LongWritable() ;
    @Override
    protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
    int sum = 0 ;

    logger.info("Reduce 键key:" + key);
    for (LongWritable value : values) {
    logger.info(" 设置的值:" + value);
    sum +=value.get() ;
    }
    v3.set(sum);
    context.write( key , v3 );
    }
    }

    }
  • 下面我们就来写一个简单的mrunit吧,代码如下

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    import com.google.common.collect.Lists;
    import com.xxo.mr.WordCountApp;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mrunit.mapreduce.MapDriver;
    import org.apache.hadoop.mrunit.mapreduce.ReduceDriver;
    import org.junit.Before;
    import org.junit.Test;

    import java.io.IOException;
    import java.util.ArrayList;

    /**
    * mrunit Test
    * Created by xiaoxiaomo on 2016/5/20.
    */

    public class WordCountAppTest {

    //单词统计Mapper
    private WordCountApp.WordCountMapper wordCountMapper;

    //单词统计Reducer
    private WordCountApp.WordCountReducer wordCountReducer;

    //Mapper和Reducer的Driver
    private MapDriver<LongWritable, Text, Text, LongWritable> mapDriver;
    private ReduceDriver<Text, LongWritable, Text, LongWritable> reduceDriver;
    //private MapReduceDriver mrDriver;

    @Before
    public void before(){
    this.wordCountMapper = new WordCountApp.WordCountMapper();
    this.wordCountReducer = new WordCountApp.WordCountReducer();

    this.mapDriver = MapDriver.newMapDriver(wordCountMapper);
    this.reduceDriver = ReduceDriver.newReduceDriver(wordCountReducer);
    //也可以这样写:同时测试map和reduce
    //this.mrDriver = MapReduceDriver.newMapReduceDriver(wordCountMapper, wordCountReducer);
    }

    @Test
    public void testMap() throws IOException {
    //设置输入数据
    this.mapDriver.addInput(new LongWritable(0), new Text("blog\txiaoxiaomo"));
    this.mapDriver.addInput(new LongWritable(0), new Text("xxo\tblog"));
    this.mapDriver.addOutput(new Text("blog"), new LongWritable(1));
    this.mapDriver.addOutput(new Text("xiaoxiaomo"), new LongWritable(1));
    this.mapDriver.addOutput(new Text("xxo"), new LongWritable(1));
    this.mapDriver.addOutput(new Text("blog"), new LongWritable(1));

    this.mapDriver.runTest();
    }

    @Test
    public void testReduce() throws IOException{
    ArrayList<LongWritable> values = Lists.newArrayList(new LongWritable(1), new LongWritable(2));
    this.reduceDriver.addInput(new Text("xiaoxiaomo"), values);
    this.reduceDriver.addInput(new Text("blog"), values);

    this.reduceDriver.run();
    }
    }

    ///////运行结果
    ///////Mapper
    //2016-05-21 01:06:01 WordCountApp [INFO] 该行数据:blog xiaoxiaomo
    //2016-05-21 01:06:01 WordCountApp [INFO] 设置的键和值:blog - 1
    //2016-05-21 01:06:02 WordCountApp [INFO] 设置的键和值:xiaoxiaomo - 1
    //2016-05-21 01:06:02 WordCountApp [INFO] 该行数据:xxo blog
    //2016-05-21 01:06:02 WordCountApp [INFO] 设置的键和值:xxo - 1
    //2016-05-21 01:06:02 WordCountApp [INFO] 设置的键和值:blog - 1

    ///////Reduce
    //2016-05-21 01:07:53 WordCountApp [INFO] Reduce 键key:xiaoxiaomo
    //2016-05-21 01:07:53 WordCountApp [INFO] 设置的值:1
    //2016-05-21 01:07:53 WordCountApp [INFO] 设置的值:2
    //2016-05-21 01:07:53 WordCountApp [INFO] Reduce 键key:blog
    //2016-05-21 01:07:53 WordCountApp [INFO] 设置的值:1
    //2016-05-21 01:07:53 WordCountApp [INFO] 设置的值:2

调试hadoop

准备

  1. 首先下载:hadoop2.6.0_util(x64).ziphadoop2.6.0_util(x32).zip

  2. 下载后解压到目录(博主解压到:D:\dev\hadoop\bin\下面)bin下面包含文件

    1
    2
    3
    4
    5
    6
    7
    hadoop.dll
    hadoop.exp
    hadoop.lib
    hadoop.pdb
    libwinutils.lib
    winutils.exe
    winutils.pdb
  3. 配置环境变量(配置好后记得重启你的IDE工具
    HADOOP_HOME=D:\dev\hadoop
    PATH=%HADOOP_HOME%\bin

IDEA 调试hadoop

本地调试

  • 一、点击Idea 右上角配置运行环境
    配置hadoop本地调试环境

  • 二、点击“+”号,添加配置
    创建application

  • 三、设置运行环境变量和参数
    指定main class和运行参数

  • 四、运行结果如下:
    idea debug hadoop 运行结果

远程模式

集群模式是本地向集群提交作业。

  1. 将集群中的配置文件core-site.xmlhdfs-site.xmlmapred-site.xmlyarn-site.xml文件放在项目的resources目录下

  2. 在mapred-site.xml中添加如下内容:

    1
    2
    3
    4
    5
    6
    7
    8
    <property>
    <name>mapreduce.app-submission.cross-platform</name>
    <value>true</value>
    </property>
    <property>
    <name>HadoopMRunit_Winutils_20150520-1.0-SNAPSHOT-jar-with-dependencies.jar</name>
    <value>D:\\dev\\idea\\HadoopMRunit_Winutils_20150520\\target\\HadoopMRunit_Winutils_20150520-1.0-SNAPSHOT-jar-with-dependencies.jar</value>
    </property>
  3. 配置运行环境
    配置main class和运行参数

  4. Maven 打包mvn clean install

  5. 运行即可

eclipse 调试hadoop

  • 在eclipse中调试hadoop基本和idea一样的,只是设置运行参数的位置不同而已,就不详细讲解了,如图:
    eclipse中配置运行参数

常见错误

  • 一、org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Ljava/lang/String;I)Z

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    2016-05-21 09:39:44 JobSubmitter [INFO] Cleaning up the staging area file:/tmp/hadoop-Jason/mapred/staging/Jason477647952/.staging/job_local477647952_0001
    Exception in thread "main" java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Ljava/lang/String;I)Z
    at org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Native Method)
    at org.apache.hadoop.io.nativeio.NativeIO$Windows.access(NativeIO.java:557)
    at org.apache.hadoop.fs.FileUtil.canRead(FileUtil.java:977)
    at org.apache.hadoop.util.DiskChecker.checkAccessByFileMethods(DiskChecker.java:187)
    at org.apache.hadoop.util.DiskChecker.checkDirAccess(DiskChecker.java:174)
    at org.apache.hadoop.util.DiskChecker.checkDir(DiskChecker.java:108)
    at org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.confChanged(LocalDirAllocator.java:285)
    at org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.getLocalPathForWrite(LocalDirAllocator.java:344)
    at org.apache.hadoop.fs.LocalDirAllocator.getLocalPathForWrite(LocalDirAllocator.java:150)
    at org.apache.hadoop.fs.LocalDirAllocator.getLocalPathForWrite(LocalDirAllocator.java:131)
    at org.apache.hadoop.fs.LocalDirAllocator.getLocalPathForWrite(LocalDirAllocator.java:115)
    at org.apache.hadoop.mapred.LocalDistributedCacheManager.setup(LocalDistributedCacheManager.java:131)
    at org.apache.hadoop.mapred.LocalJobRunner$Job.<init>(LocalJobRunner.java:163)
    at org.apache.hadoop.mapred.LocalJobRunner.submitJob(LocalJobRunner.java:731)
    at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:536)
    at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1296)
    at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1293)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:415)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1628)
    at org.apache.hadoop.mapreduce.Job.submit(Job.java:1293)
    at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1314)
    at com.xxo.mr.WordCount.main(WordCount.java:73)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:601)
    at com.intellij.rt.execution.application.AppMain.main(AppMain.java:140)
  • 分析:从错误信息和源码发现,权限不足
    源码文件,校验授权

  • 解决办法(两种办法):

  1. 以管理员的身份启动ide开发工具。
  2. 还有一种办法就是重写源代码,如下图所示:
    重写源码
  • 二、Permission denied: user=Jason, access=EXECUTE, inode=”/history”:root:supergroup:drwxrwx—

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    2016-05-21 10:53:18 JobSubmitter [INFO] Cleaning up the staging area /history/Jason/.staging/job_1463827152309_0001
    Exception in thread "main" org.apache.hadoop.security.AccessControlException: Permission denied: user=Jason, access=EXECUTE, inode="/history":root:supergroup:drwxrwx---
    at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkFsPermission(FSPermissionChecker.java:271)
    at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.check(FSPermissionChecker.java:257)
    at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkTraverse(FSPermissionChecker.java:208)
    at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:171)
    at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkPermission(FSNamesystem.java:6512)
    at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkPermission(FSNamesystem.java:6494)
    at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkOwner(FSNamesystem.java:6413)
    at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.setPermissionInt(FSNamesystem.java:1719)
    at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.setPermission(FSNamesystem.java:1699)
    at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.setPermission(NameNodeRpcServer.java:614)
    at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.setPermission(ClientNamenodeProtocolServerSideTranslatorPB.java:443)
    at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
    at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:619)
    at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:962)
    at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2039)
    at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2035)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:415)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1628)
    at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2033)

    at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
    at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
    at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
    at java.lang.reflect.Constructor.newInstance(Constructor.java:525)
    at org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:106)
    at org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:73)
    at org.apache.hadoop.hdfs.DFSClient.setPermission(DFSClient.java:2326)
    at org.apache.hadoop.hdfs.DistributedFileSystem$24.doCall(DistributedFileSystem.java:1286)
    at org.apache.hadoop.hdfs.DistributedFileSystem$24.doCall(DistributedFileSystem.java:1282)
    at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
    at org.apache.hadoop.hdfs.DistributedFileSystem.setPermission(DistributedFileSystem.java:1282)
    at org.apache.hadoop.fs.FileSystem.mkdirs(FileSystem.java:599)
    at org.apache.hadoop.mapreduce.JobSubmitter.copyAndConfigureFiles(JobSubmitter.java:182)
    at org.apache.hadoop.mapreduce.JobSubmitter.copyAndConfigureFiles(JobSubmitter.java:390)
    at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:483)
    at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1296)
    at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1293)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:415)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1628)
    at org.apache.hadoop.mapreduce.Job.submit(Job.java:1293)
    at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1314)
    at com.xxo.mr.WordCountApp.main(WordCountApp.java:47)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:601)
    at com.intellij.rt.execution.application.AppMain.main(AppMain.java:140)
    Caused by: org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException): Permission denied: user=Jason, access=EXECUTE, inode="/history":root:supergroup:drwxrwx---
    at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkFsPermission(FSPermissionChecker.java:271)
  • 分析:线上/history/文件权限不足

    1
    2
    3
    4
    5
    [root@xxo03 up]# hdfs dfs -ls /history/
    Found 3 items
    drwx--x--x - Jason supergroup 0 2016-05-21 18:43 /history/Jason
    drwxrwx--x - root supergroup 0 2016-05-11 05:48 /history/history
    drwx--x--x - root supergroup 0 2016-05-11 06:17 /history/root
  • 解决办法:添加权限给本地用户(Jason)

    1
    2
    3
    4
    5
    6
    7
    8
    [root@xxo03 /]# hdfs dfs -chmod -R a+x /history
    [root@xxo03 /]# hdfs dfs -ls /
    Found 5 items
    -rw-r--r-- 1 root supergroup 57925 2016-05-09 07:40 /hadoop.log
    drwxrwx--x - root supergroup 0 2016-05-21 18:43 /history
    drwxr-xr-x - root supergroup 0 2016-05-11 06:25 /in
    drwxr-xr-x - root supergroup 0 2016-05-11 07:11 /out
    drwxr-xr-x - root supergroup 0 2016-05-11 06:17 /tmp
  • 项目源码下载:
    http://download.csdn.net/detail/tang__xuandong/9527054      

当前网速较慢或者你使用的浏览器不支持博客特定功能,请尝试刷新或换用Chrome、Firefox等现代浏览器