How to connect mysql with pyspark and make some filters
Open mysql and create database and table.
create database empdet;
use empdet;
create table tb1 (name varchar(20),gender varchar(20),age int,salary int);
insert into tb1 values('Akshay','male',30,32000),('Aditi','female',32,35000),('Shah','male',29,34000),('Priya','female',36,40000),('Feriha','female',25,35000);
Open pyspark with dependencies
pyspark --jars mysql-connector-java-5.1.44.jar
df = spark.read.format("jdbc").option("url","jdbc:mysql://localhost/empdet").option("user","root").option("password","cloudera").option("dbtable","tb1").load()
df.show()
dfp = df.filter(col("gender")=='male').filter(col("age")>29).filter(col("salary").between(30000,35000))
dfp.show()
dff=df.filter(col("gender")=='female')
dff.show()
Insert Into tb1*
ReplyDelete