批处理
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.operators.AggregateOperator;
import org.apache.flink.api.java.operators.DataSource;
import org.apache.flink.api.java.operators.FlatMapOperator;
import org.apache.flink.api.java.operators.UnsortedGrouping;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.util.Collector;
/**
* @author 影子
* @create 2022-04-10-15:47
**/
public class BatchWordCount {
public static void main(String[] args) throws Exception {
//1.创建执行环境
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
//2.从文件读取数据
DataSource<String> lineDataSource = env.readTextFile("input/words.txt");
//3.将每行数据进行分词,转换成二元组类型
FlatMapOperator<String, Tuple2<String, Long>> wordAndOneTuple = lineDataSource.flatMap((String line, Collector<Tuple2<String, Long>> out) -> {
//将一行文本进行分词
String[] words = line.split(" ");
//将每个单词转换成二元组输出
for (String word : words) {
out.collect(Tuple2.of(word, 1L));
}
}).returns(Types.TUPLE(Types.STRING, Types.LONG));
//4.按照word进行分组
UnsortedGrouping<Tuple2<String, Long>> wordAndOneGroup = wordAndOneTuple.groupBy(0);
//5.分组内进行聚合统计
AggregateOperator<Tuple2<String, Long>> sum = wordAndOneGroup.sum(1);
//6.打印输出
sum.print();
}
}
![[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-VYrdmeoL-1650032968723)(C:\Users\Admin\AppData\Roaming\Typora\typora-user-images\image-20220410165452008.png)]](https://img-blog.csdnimg.cn/3a3beb1a6d3043ba96064125738771c7.png)
有界流式处理
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
/**
* @author 影子
* @create 2022-04-10-16:16
**/
public class BoundedStreamWordCount {
public static void main(String[] args) throws Exception {
//1.创建流式的执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//2.读取文件
DataStreamSource<String> lineDataStreamSource = env.readTextFile("input/words.txt");
//3.转换计算
SingleOutputStreamOperator<Tuple2<String, Long>> wordsAndOneTuple = lineDataStreamSource.flatMap((String line, Collector<Tuple2<String, Long>> out) -> {
String[] words = line.split(" ");
for (String word : words) {
out.collect(Tuple2.of(word, 1L));
}
}).returns(Types.TUPLE(Types.STRING, Types.LONG));
//4.分组
KeyedStream<Tuple2<String, Long>, String> wordAndOneKeyedStream = wordsAndOneTuple.keyBy(data -> data.f0);
//5.求和
SingleOutputStreamOperator<Tuple2<String, Long>> sum = wordAndOneKeyedStream.sum(1);
//6.打印
sum.print();
//7.启动执行
env.execute();
}
}

无界流式处理
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
/**
* @author 影子
* @create 2022-04-10-16:36
**/
public class StreamWordCount {
public static void main(String[] args) throws Exception {
//1.创建流式执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//2.读取文本流
DataStreamSource<String> lineDataStreamSource = env.socketTextStream("hadoop102", 7777);
//3.转换计算
SingleOutputStreamOperator<Tuple2<String, Long>> wordsAndOneTuple = lineDataStreamSource.flatMap((String line, Collector<Tuple2<String, Long>> out) -> {
String[] words = line.split(" ");
for (String word : words) {
out.collect(Tuple2.of(word, 1L));
}
}).returns(Types.TUPLE(Types.STRING, Types.LONG));
//4.分组
KeyedStream<Tuple2<String, Long>, String> wordAndOneKeyedStream = wordsAndOneTuple.keyBy(data -> data.f0);
//5.求和
SingleOutputStreamOperator<Tuple2<String, Long>> sum = wordAndOneKeyedStream.sum(1);
//6.打印
sum.print();
//7.启动执行
env.execute();
}
}

版权声明:本文为mynameisgt原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。