Find Total working hours of each employee in a day through spark and scala

Find Total working hours of each employee in a day through spark and scala

File name : epid.txt

1,"2020-12-30T09:00:00","IN"

1,"2020-12-30T13:00:00","OUT"

1,"2020-12-30T14:00:00","IN"

1,"2020-12-30T17:00:00","OUT"

1,"2020-12-30T17:30:00","IN"

1,"2020-12-30T20:00:00","OUT"

2,"2020-12-30T10:30:00","IN"

2,"2020-12-30T15:00:00","OUT"

2,"2020-12-30T16:30:00","IN"

2,"2020-12-30T21:30:00","IN"


Solution :

val schemax = new StructType()

.add("eid",IntegerType,true)

.add("ts",TimestampType,true)

.add("event",StringType,true)


val df = spark.read.format("csv").schema(schemax).load("file:///D:/epid.txt")

df.show()

df.printSchema()  


val unixtime = unix_timestamp(col("ts"))

val pro = (unixtime-lag(unixtime,1).over(Window.partitionBy("eid") orderBy("ts"))).alias("time_diff")


val dx = df.select(col("eid"),pro)

.filter(col("event")==="OUT")

.withColumn("time_diff_minutes", col("time_diff") / 60) 

.drop("time_diff")

dx.show()


val result = dx.groupBy("eid")

.agg(sum("time_diff_minutes").alias("time_diff"))

result.show()



Comments