How to connect mysql with pyspark and make some filters

How to connect mysql with pyspark and make some filters

Open mysql and create database and table.

create database empdet;

use empdet;

create table tb1 (name varchar(20),gender varchar(20),age int,salary int);

insert into tb1 values('Akshay','male',30,32000),('Aditi','female',32,35000),('Shah','male',29,34000),('Priya','female',36,40000),('Feriha','female',25,35000);


Open pyspark with dependencies

pyspark --jars mysql-connector-java-5.1.44.jar

df = spark.read.format("jdbc").option("url","jdbc:mysql://localhost/empdet").option("user","root").option("password","cloudera").option("dbtable","tb1").load()

df.show()

dfp = df.filter(col("gender")=='male').filter(col("age")>29).filter(col("salary").between(30000,35000))

dfp.show()

dff=df.filter(col("gender")=='female')

dff.show() 

Comments

Post a Comment