Hadoop集群中HDFS的API测试案例以及MapReduce的多种提交Job方式案例

时间: 2023-07-18 admin 互联网

Hadoop集群中HDFS的API测试案例以及MapReduce的多种提交Job方式案例

Hadoop集群中HDFS的API测试案例以及MapReduce的多种提交Job方式案例

这两个案例默认是hadoop集群环境已经搭建好以及IDEA环境也已经配置好

1、HDFS客户端测试案例

1.1、pom依赖

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns=".0.0" xmlns:xsi=""xsi:schemaLocation=".0.0 .0.0.xsd"><modelVersion>4.0.0</modelVersion><parent><groupId>org.springframework.boot</groupId><artifactId>spring-boot-starter-parent</artifactId><version>2.5.1</version><relativePath/> <!-- lookup parent from repository --></parent><groupId>com.song</groupId><artifactId>hadoopdemo</artifactId><version>0.0.1-SNAPSHOT</version><name>hadoopdemo</name><description>Demo project for Spring Boot</description><properties><java.version>1.8</java.version></properties><dependencies><dependency><groupId>org.springframework.boot</groupId><artifactId>spring-boot-starter</artifactId><!--     排除的原因  避免出现下面的报错,虽然不影响使用,但是闹心org.apache.hadoop.fs.FileSystem - NativeIO.createDirectoryWithMode error, path = D:\test_data\download, mode = 755org.apache.hadoop.io.nativeio.NativeIOException: 当文件已存在时,无法创建该文件。--><!--            <exclusions>--><!--                <exclusion>--><!--                    <groupId>ch.qos.logback</groupId>--><!--                    <artifactId>logback-classic</artifactId>--><!--                </exclusion>--><!--                <exclusion>--><!--                    <groupId>ch.qos.logback</groupId>--><!--                    <artifactId>logback-core</artifactId>--><!--                </exclusion>--><!--            </exclusions>--></dependency><dependency><groupId>org.springframework.boot</groupId><artifactId>spring-boot-starter-test</artifactId><scope>test</scope></dependency><dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-client</artifactId><version>3.1.3</version></dependency><dependency><groupId>junit</groupId><artifactId>junit</artifactId><version>4.12</version></dependency><dependency><groupId>org.slf4j</groupId><artifactId>slf4j-log4j12</artifactId><version>1.7.30</version></dependency></dependencies><!-- 打成两个包 一个带依赖  一个不带依赖--><build><plugins><plugin><artifactId>maven-compiler-plugin</artifactId><version>3.6.1</version><configuration><source>1.8</source><target>1.8</target></configuration></plugin><plugin><artifactId>maven-assembly-plugin</artifactId><configuration><descriptorRefs><descriptorRef>jar-with-dependencies</descriptorRef></descriptorRefs></configuration><executions><execution><id>make-assembly</id><phase>package</phase><goals><goal>single</goal></goals></execution></executions></plugin></plugins></build>
</project>

1.2、打包方式

 <!-- 打成两个包 一个带依赖  一个不带依赖--><build><plugins><plugin><artifactId>maven-compiler-plugin</artifactId><version>3.6.1</version><configuration><source>1.8</source><target>1.8</target></configuration></plugin><plugin><artifactId>maven-assembly-plugin</artifactId><configuration><descriptorRefs><descriptorRef>jar-with-dependencies</descriptorRef></descriptorRefs></configuration><executions><execution><id>make-assembly</id><phase>package</phase><goals><goal>single</goal></goals></execution></executions></plugin></plugins></build>

1.3、日志配置

在项目的src/main/resources目录下,新建一个文件,命名为“log4j.properties”,在文件中填入以下内容

log4j.rootLogger=INFO, stdout  
log4j.appender.stdout=org.apache.log4j.ConsoleAppender  
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout  
log4j.appender.stdout.layout.ConversionPattern=%d %p [%c] - %m%n  
log4j.appender.logfile=org.apache.log4j.FileAppender  
log4j.appender.logfile.File=target/spring.log  
log4j.appender.logfile.layout=org.apache.log4j.PatternLayout  
log4j.appender.logfile.layout.ConversionPattern=%d %p [%c] - %m%n

1.4、代码实现

package com.song.hadoopdemo.hdfs;import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Arrays;/*** HDFS客户端测试类* 官网地址: .1.3/*/
public class HdfsClientTest {//文件系统private FileSystem fs;/*** 方法调用之前执行** @throws URISyntaxException* @throws IOException* @throws InterruptedException*/@Beforepublic void init() throws URISyntaxException, IOException, InterruptedException {// hdfs://hadoop102:8020 是nameNode的通信地址URI uri = new URI("hdfs://hadoop102:8020");//  获取文件系统配置Configuration configuration = new Configuration();// 定义具有操作权限的用户String user = "song";//  获取客户端对象fs = FileSystem.get(uri, configuration, user);}/*** 方法调用之后执行** @throws IOException*/@Afterpublic void close() throws IOException {// 关闭资源fs.close();}/*** 测试在HDFS上创建目录** @throws IOException*/@Testpublic void testMkdirs() throws IOException {// 在HDFS上创建目录
//        fs.mkdirs(new Path("/huaru"));
//        fs.mkdirs(new Path("/testRemove"));fs.mkdirs(new Path("/testMove"));}/*** 测试本地文件上传到HDFS*/@Testpublic void testPut() throws IOException {/* ** 第一个参数:是否删除源文件* 第二个参数:目标文件存在,是否覆盖* 第三个参数:源文件地址* 第四个参数:目标文件地址* 参数优先级  从左到右 从低到高* hdfs-default.xml  ==>  hdfs-site.xml ==> 在项目资源目录下的配置文件 ==> 代码里面的配置*/
//        fs.copyFromLocalFile(false, true, new Path("D:\\test_data\\input\\wordCount.txt"), new Path("/huaru/upload/wordCount.txt"));
//        fs.copyFromLocalFile(false, true, new Path("D:\\test_data\\input\\wordCount.txt"), new Path("/testRemove/wordCount.txt"));fs.copyFromLocalFile(false, true, new Path("D:\\test_data\\input\\资料.zip"), new Path("/testRemove/资料.zip"));}/*** 测试从HDFS下载到本地** @throws IOException*/@Testpublic void testGet() throws IOException {/* ** 第一个参数:是否删除源文件* 第二个参数:源文件地址* 第三个参数:目标文件地址* 第四个参数:是否开启文件校验* 参数优先级  从左到右 从低到高* hdfs-default.xml  ==>  hdfs-site.xml ==> 在项目资源目录下的配置文件 ==> 代码里面的配置*/fs.copyToLocalFile(false, new Path("/huaru/upload/wordCount.txt"), new Path("D:\\test_data\\download\\wordCount.txt"), false);}/*** 测试删除HDFS中的数据文件*/@Testpublic void testRemove() throws IOException {/* ** 第一个参数:是否删除源文件* 第二个参数:源文件地址*/fs.delete(new Path("/testRemove"), true);}/*** 测试HDFS 移动数据** @throws IOException*/@Testpublic void testMove() throws IOException {// 业务操作/* ** 第一个参数:源文件* 第二个参数:目标文件*/fs.rename(new Path("/huaru/upload/wordCount.txt"), new Path("/testMove"));
//        fs.rename(new Path("/testMove"), new Path("/huaru/upload/wordCount.txt"));}/*** 从HDFS中获取文件详情信息** @throws IOException*/@Testpublic void testListFiles() throws IOException {RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("/"), true);while (listFiles.hasNext()) {LocatedFileStatus fileStatus = listFiles.next();System.out.println("========" + fileStatus.getPath() + "=========");// 文件的读写权限信息 rw-r--r--System.out.println(fileStatus.getPermission());// 文件的拥有者  songSystem.out.println(fileStatus.getOwner());// 文件的分组  supergroupSystem.out.println(fileStatus.getGroup());// 文件的长度 18System.out.println(fileStatus.getLen());// 文件的最后修改时间 1670483788325System.out.println(fileStatus.getModificationTime());// 文件的副本数 3System.out.println(fileStatus.getReplication());// 文件的块大小System.out.println(fileStatus.getBlockSize());// 文件所在快的大小 134217728/1024/1024=128MSystem.out.println(fileStatus.getPath().getName());// 获取块信息 [0,134217728,hadoop104,hadoop103,hadoop102]// 0:代表位置偏移量的起始位置 134217728 代表占用的字节数,hadoop104,hadoop103,hadoop102:代表备份数据的实例服务器BlockLocation[] blockLocations = fileStatus.getBlockLocations();System.out.println(Arrays.toString(blockLocations));}}
}

2、MapReduce提交Job的方式案例

2.1、需求

在给定的文本文件中统计输出每一个单词出现的总次数

2.2、文件格式

ss ss
zz zx
lh zx

2.3、输出格式

lh	1
ss	2
zx	2
zz	1

2.4、提交方式

2.4.1、将程序打成jar包,提交到集群环境上进行测试

2.4.1.1、pom依赖
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns=".0.0" xmlns:xsi=""xsi:schemaLocation=".0.0 .0.0.xsd"><modelVersion>4.0.0</modelVersion><parent><groupId>org.springframework.boot</groupId><artifactId>spring-boot-starter-parent</artifactId><version>2.5.1</version><relativePath/> <!-- lookup parent from repository --></parent><groupId>com.song</groupId><artifactId>hadoopdemo</artifactId><version>0.0.1-SNAPSHOT</version><name>hadoopdemo</name><description>Demo project for Spring Boot</description><properties><java.version>1.8</java.version></properties><dependencies><dependency><groupId>org.springframework.boot</groupId><artifactId>spring-boot-starter</artifactId><!--     排除的原因  避免出现下面的报错,虽然不影响使用,但是闹心org.apache.hadoop.fs.FileSystem - NativeIO.createDirectoryWithMode error, path = D:\test_data\download, mode = 755org.apache.hadoop.io.nativeio.NativeIOException: 当文件已存在时,无法创建该文件。--><!--            <exclusions>--><!--                <exclusion>--><!--                    <groupId>ch.qos.logback</groupId>--><!--                    <artifactId>logback-classic</artifactId>--><!--                </exclusion>--><!--                <exclusion>--><!--                    <groupId>ch.qos.logback</groupId>--><!--                    <artifactId>logback-core</artifactId>--><!--                </exclusion>--><!--            </exclusions>--></dependency><dependency><groupId>org.springframework.boot</groupId><artifactId>spring-boot-starter-test</artifactId><scope>test</scope></dependency><dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-client</artifactId><version>3.1.3</version></dependency><dependency><groupId>junit</groupId><artifactId>junit</artifactId><version>4.12</version></dependency><dependency><groupId>org.slf4j</groupId><artifactId>slf4j-log4j12</artifactId><version>1.7.30</version></dependency></dependencies><!-- 打成两个包 一个带依赖  一个不带依赖--><build><plugins><plugin><artifactId>maven-compiler-plugin</artifactId><version>3.6.1</version><configuration><source>1.8</source><target>1.8</target></configuration></plugin><plugin><artifactId>maven-assembly-plugin</artifactId><configuration><descriptorRefs><descriptorRef>jar-with-dependencies</descriptorRef></descriptorRefs></configuration><executions><execution><id>make-assembly</id><phase>package</phase><goals><goal>single</goal></goals></execution></executions></plugin></plugins></build>
</project>
2.4.1.2、日志配置

在项目的src/main/resources目录下,新建一个文件,命名为“log4j.properties”,在文件中填入以下内容

log4j.rootLogger=INFO, stdout  
log4j.appender.stdout=org.apache.log4j.ConsoleAppender  
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout  
log4j.appender.stdout.layout.ConversionPattern=%d %p [%c] - %m%n  
log4j.appender.logfile=org.apache.log4j.FileAppender  
log4j.appender.logfile.File=target/spring.log  
log4j.appender.logfile.layout=org.apache.log4j.PatternLayout  
log4j.appender.logfile.layout.ConversionPattern=%d %p [%c] - %m%n
2.4.1.3、代码实现

按照MapReduce编程规范,分别编写Mapper,Reducer,Driver

2.4.1.3.1、map阶段
package com.song.hadoopdemo.mapreduce;import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;import java.io.IOException;/*** map阶段*/
public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {Text k = new Text();IntWritable v = new IntWritable(1);@Overrideprotected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {// 1 获取一行String line = value.toString();// 2 切割String[] words = line.split(" ");// 3 输出for (String word : words) {k.set(word);context.write(k, v);}}
}
2.4.1.3.2、reduce阶段
package com.song.hadoopdemo.mapreduce;import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;import java.io.IOException;/*** reduce阶段*/
public class WordCountReducer extends Reducer<Text, IntWritable,Text,IntWritable> {int sum;IntWritable v = new IntWritable();@Overrideprotected void reduce(Text key, Iterable<IntWritable> values,Context context) throws IOException, InterruptedException {// 1 累加求和sum = 0;for (IntWritable count : values) {sum += count.get();}// 2 输出v.set(sum);context.write(key,v);}}
2.4.1.3.3、driver阶段
package com.song.hadoopdemo.mapreduce;import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import java.io.IOException;/*** Demo需求: 在给定的文本文件中统计输出每一个单词出现的总次数* 文本格式如下:* banzhang* xuexi* hadoop* hadoop* <p>* 预期输出格式如下:* banzhang	1* hadoop	2* xuexi	1*** 集群上打包测试命令行:* hadoop jar wc.jar com.song.hadoopdemo.mapreduce.WordCountDriver /testRemove/wordCount.txt /output** /testRemove/wordCount.txt  /output* HDFS的输入和输出路径***/
public class WordCountDriver {public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {// 1 获取配置信息以及获取job对象Configuration conf = new Configuration();Job job = Job.getInstance(conf);// 2 关联本Driver程序的jarjob.setJarByClass(WordCountDriver.class);// 3 关联Mapper和Reducer的jarjob.setMapperClass(WordCountMapper.class);job.setReducerClass(WordCountReducer.class);// 4 设置Mapper输出的kv类型job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(IntWritable.class);// 5 设置最终输出kv类型job.setOutputKeyClass(Text.class);job.setOutputValueClass(IntWritable.class);// 6 设置输入和输出路径  扔服务器上测试FileInputFormat.setInputPaths(job, new Path(args[0]));FileOutputFormat.setOutputPath(job, new Path(args[1]));// 7 提交jobboolean result = job.waitForCompletion(true);System.exit(result ? 0 : 1);}
}
2.4.1.3.4、放到集群上

2.4.1.3.4、执行任务
hadoop jar wc.jar com.song.hadoopdemo.mapreduce.WordCountDriver /testRemove/wordCount.txt /output

2.4.2、在Windows上向集群提交任务进行测试

2.4.2.1、pom依赖
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns=".0.0" xmlns:xsi=""xsi:schemaLocation=".0.0 .0.0.xsd"><modelVersion>4.0.0</modelVersion><parent><groupId>org.springframework.boot</groupId><artifactId>spring-boot-starter-parent</artifactId><version>2.5.1</version><relativePath/> <!-- lookup parent from repository --></parent><groupId>com.song</groupId><artifactId>hadoopdemo</artifactId><version>0.0.1-SNAPSHOT</version><name>hadoopdemo</name><description>Demo project for Spring Boot</description><properties><java.version>1.8</java.version></properties><dependencies><dependency><groupId>org.springframework.boot</groupId><artifactId>spring-boot-starter</artifactId><!--     排除的原因  避免出现下面的报错,虽然不影响使用,但是闹心org.apache.hadoop.fs.FileSystem - NativeIO.createDirectoryWithMode error, path = D:\test_data\download, mode = 755org.apache.hadoop.io.nativeio.NativeIOException: 当文件已存在时,无法创建该文件。--><!--            <exclusions>--><!--                <exclusion>--><!--                    <groupId>ch.qos.logback</groupId>--><!--                    <artifactId>logback-classic</artifactId>--><!--                </exclusion>--><!--                <exclusion>--><!--                    <groupId>ch.qos.logback</groupId>--><!--                    <artifactId>logback-core</artifactId>--><!--                </exclusion>--><!--            </exclusions>--></dependency><dependency><groupId>org.springframework.boot</groupId><artifactId>spring-boot-starter-test</artifactId><scope>test</scope></dependency><dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-client</artifactId><version>3.1.3</version></dependency><dependency><groupId>junit</groupId><artifactId>junit</artifactId><version>4.12</version></dependency><dependency><groupId>org.slf4j</groupId><artifactId>slf4j-log4j12</artifactId><version>1.7.30</version></dependency></dependencies><!-- 打成两个包 一个带依赖  一个不带依赖--><build><plugins><plugin><artifactId>maven-compiler-plugin</artifactId><version>3.6.1</version><configuration><source>1.8</source><target>1.8</target></configuration></plugin><plugin><artifactId>maven-assembly-plugin</artifactId><configuration><descriptorRefs><descriptorRef>jar-with-dependencies</descriptorRef></descriptorRefs></configuration><executions><execution><id>make-assembly</id><phase>package</phase><goals><goal>single</goal></goals></execution></executions></plugin></plugins></build>
</project>
2.4.2.2、日志配置

在项目的src/main/resources目录下,新建一个文件,命名为“log4j.properties”,在文件中填入以下内容

log4j.rootLogger=INFO, stdout  
log4j.appender.stdout=org.apache.log4j.ConsoleAppender  
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout  
log4j.appender.stdout.layout.ConversionPattern=%d %p [%c] - %m%n  
log4j.appender.logfile=org.apache.log4j.FileAppender  
log4j.appender.logfile.File=target/spring.log  
log4j.appender.logfile.layout=org.apache.log4j.PatternLayout  
log4j.appender.logfile.layout.ConversionPattern=%d %p [%c] - %m%n
2.4.2.3、代码实现
2.4.2.3.1、map阶段
package com.song.hadoopdemo.mapreduce;import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;import java.io.IOException;/*** map阶段*/
public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {Text k = new Text();IntWritable v = new IntWritable(1);@Overrideprotected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {// 1 获取一行String line = value.toString();// 2 切割String[] words = line.split(" ");// 3 输出for (String word : words) {k.set(word);context.write(k, v);}}
}
2.4.2.3.2、reduce阶段
package com.song.hadoopdemo.mapreduce;import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;import java.io.IOException;/*** reduce阶段*/
public class WordCountReducer extends Reducer<Text, IntWritable,Text,IntWritable> {int sum;IntWritable v = new IntWritable();@Overrideprotected void reduce(Text key, Iterable<IntWritable> values,Context context) throws IOException, InterruptedException {// 1 累加求和sum = 0;for (IntWritable count : values) {sum += count.get();}// 2 输出v.set(sum);context.write(key,v);}}
2.4.2.3.3、driver阶段

(1)编写Driver代码

package com.song.hadoopdemo.mapreduce;import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import java.io.IOException;/*** @ClassName WordCountByWindowDriver* @Description* @Author swq* @Date 2022/12/8 17:51* @Version 1.0*/
public class WordCountByWindowDriver {public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {// 1 获取配置信息以及封装任务Configuration conf = new Configuration();//设置在集群运行的相关参数-设置HDFS,NAMENODE的地址conf.set("fs.defaultFS", "hdfs://hadoop102:8020");//指定MR运行在Yarn上conf.set("mapreduce.framework.name", "yarn");//指定MR可以在远程集群运行conf.set("mapreduce.app-submission.cross-platform","true");//指定yarn resourcemanager的位置conf.set("yarn.resourcemanager.hostname","hadoop103");Job job = Job.getInstance(conf);// 2 设置jar加载路径job.setJarByClass(WordCountDriver.class);// 3 设置map和reduce类job.setMapperClass(WordCountMapper.class);job.setReducerClass(WordCountReducer.class);// 4 设置map输出job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(IntWritable.class);// 5 设置最终输出kv类型job.setOutputKeyClass(Text.class);job.setOutputValueClass(IntWritable.class);// 6 设置输入和输出路径
//        FileInputFormat.setInputPaths(job, new Path(args[0]));
//        FileOutputFormat.setOutputPath(job, new Path(args[1]));FileInputFormat.setInputPaths(job, new Path("/testRemove/wordCount.txt"));FileOutputFormat.setOutputPath(job, new Path("/out"));// 7 提交boolean result = job.waitForCompletion(true);System.exit(result ? 0 : 1);}
}

(2)将编写完的打成jar包,找一个找一个非中文目录存放,并在Driver中重新设置Jar的引用地址

package com.song.hadoopdemo.mapreduce;import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import java.io.IOException;/*** @ClassName WordCountByWindowDriver* @Description* @Author swq* @Date 2022/12/8 17:51* @Version 1.0*/
public class WordCountByWindowDriver {public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {// 1 获取配置信息以及封装任务Configuration conf = new Configuration();//设置在集群运行的相关参数-设置HDFS,NAMENODE的地址conf.set("fs.defaultFS", "hdfs://hadoop102:8020");//指定MR运行在Yarn上conf.set("mapreduce.framework.name", "yarn");//指定MR可以在远程集群运行conf.set("mapreduce.app-submission.cross-platform","true");//指定yarn resourcemanager的位置conf.set("yarn.resourcemanager.hostname","hadoop103");Job job = Job.getInstance(conf);// 2 设置jar加载路径
//        job.setJarByClass(WordCountDriver.class);job.setJar("C:\\Users\\33229\\Desktop\\jar\\hadoopdemo-0.0.1-SNAPSHOT.jar");// 3 设置map和reduce类job.setMapperClass(WordCountMapper.class);job.setReducerClass(WordCountReducer.class);// 4 设置map输出job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(IntWritable.class);// 5 设置最终输出kv类型job.setOutputKeyClass(Text.class);job.setOutputValueClass(IntWritable.class);// 6 设置输入和输出路径
//        FileInputFormat.setInputPaths(job, new Path(args[0]));
//        FileOutputFormat.setOutputPath(job, new Path(args[1]));FileInputFormat.setInputPaths(job, new Path("/testRemove/wordCount.txt"));FileOutputFormat.setOutputPath(job, new Path("/out"));// 7 提交boolean result = job.waitForCompletion(true);System.exit(result ? 0 : 1);}
}

(3)设置参数


(4)如果使用的是args接受参数,则在原先的基础上添加Program arguments参数配置

 FileInputFormat.setInputPaths(job, new Path(args[0]));FileOutputFormat.setOutputPath(job, new Path(args[1]));