How to join two table and find high and low in AQILevel
Input:
two table,
Output:
Solution:
df = spark.read.format("csv").option("header","True").load("file:///home/cloudera/typevec.txt")
df.show()
df2 = spark.read.format("csv").option("header","True").load("file:///home/cloudera/cityaqi.txt")
df2.show()
dfg = df.groupBy("city").agg(sum("noof vehcile").alias("total nof vehcile"))
dfg.show()
dfu = dfg.join(df2,dfg.city==df2.city,"inner").drop(df2['city']).withColumn("AQI",col("AQI").cast("integer"))
dfu.show()
dfu.createOrReplaceTempView("table")
dfv = dfu.withColumn("AQILevel",expr("case when AQI=(select max(AQI) from table) then 'High' when AQI=(select min(AQI) from table) then 'Low' end")).filter("AQILevel is not null ").drop("AQI")
dfv.show()
Comments
Post a Comment