跳转至

KeyedStream

package com.atguigu.aggreagte;

import com.atguigu.bean.WaterSensor;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class KeybyDemo {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(2);

        DataStreamSource<WaterSensor> sensorDS = env.fromElements(
                new WaterSensor("s1", 1L, 1),
                new WaterSensor("s1", 11L, 11),
                new WaterSensor("s2", 2L, 2),
                new WaterSensor("s3", 3L, 3)
        );

        // 按照 id 分组
        /**
         * TODO keyby: 按照id分组
         * 要点:
         *  1、返回的是 一个 KeyedStream,键控流
         *  2、keyby不是 转换算子, 只是对数据进行重分区, 不能设置并行度
         *  3、分组 与 分区 的关系:
         *    1) keyby是对数据分组,保证 相同key的数据 在同一个分区(子任务)
         *    2) 分区: 一个子任务可以理解为一个分区,一个分区(子任务)中可以存在多个分组(key)
         */
        KeyedStream<WaterSensor, String> sensorKS = sensorDS
                .keyBy(new KeySelector<WaterSensor, String>() {
                    @Override
                    public String getKey(WaterSensor value) throws Exception {
                        return value.getId();
                    }
                });

        sensorKS.print();

        env.execute();
    }
}