How to Pivot data and sum of data where column have alphabets
Input :
Output Required :
Solution:
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.expressions._
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types._
// Load data
val df = spark.read.format("csv").load("file:///C:/DB/data1.txt")
df.show()
//remove K from column _c1
val df2 = df.withColumn("_c1", regexp_replace(col("_c1"),"K",""))
df2.show()
//sum of _c1 data group by _c0 with convert sum into int
val df3 = df2.groupBy("_c0").agg(sum("_c1").cast("Integer").alias("_c1"))
df3.show()
//add K in _c1 column
val df4 = df3.withColumn("_c1",concat(col("_c1"),lit("K")))
df4.show()
//now pivot table
val df5 = df4.groupBy().pivot("_c0").agg(first("_c1"))
df5.show()
Comments
Post a Comment