How to Pivot data and sum of data where column have alphabets

 How to Pivot data and sum of data where column have alphabets

Input :


Output Required :


Solution:

import org.apache.spark.SparkConf

import org.apache.spark.SparkContext

import org.apache.spark.sql.SparkSession

import org.apache.spark.sql.expressions._

import org.apache.spark.sql.functions._

import org.apache.spark.sql.types._

            // Load data

   val df = spark.read.format("csv").load("file:///C:/DB/data1.txt")

   df.show()

   //remove K from column _c1 

   val df2 = df.withColumn("_c1", regexp_replace(col("_c1"),"K",""))

   df2.show()

   //sum of _c1 data group by _c0 with convert sum into int

   val df3 = df2.groupBy("_c0").agg(sum("_c1").cast("Integer").alias("_c1"))

   df3.show()

   //add K in _c1 column

   val df4 = df3.withColumn("_c1",concat(col("_c1"),lit("K")))

   df4.show()


   //now pivot table 

   val df5 = df4.groupBy().pivot("_c0").agg(first("_c1"))

   df5.show()


Comments