Arranging the columns in dataframe by Iterating through the column value in spark scala
Input :
Output :
Solution :
val inputData = Seq(
("/equity/dividends[DIV|4Y]", ""),
("/equity/dividends[DIV|1Y]", "[0D-6M]"),
("/equity/dividends[DIV|1Y]", "[0D-6M],[6M-18M]"),
("/equity/dividends[DIV|2Y]", "[18M-2Y]"),
("/equity/dividends[DIV|2Y]", "[18M-2Y]"),
("/equity/dividends[DIV|3Y]", "[18M-2Y]"),
("/equity/dividends[DIV|2Y]", "[18M-2Y]"),
("/equity/dividends[DIV|1Y]", "[6M-18M]")
)
val df = inputData.toDF("market_Year", "Quarter-Range")
df.show(truncate=false)
df.printSchema()
val dfx = df.withColumn("Quarter-Range",explode(split($"Quarter-Range",",")))
val dfg = dfx.groupBy("market_Year").agg(concat_ws(",", collect_set(when($"Quarter-Range".isNotNull, $"Quarter-Range"))).as("Quarter-Range"))
Comments
Post a Comment