Read CSV,Avro and Parquest File format

from pyspark.sql import SparkSession
from pyspark.sql.functions import col, lit, when

def getsparkSession():
    spark = SparkSession.builder.master("yarn") \
        .appName('Learnomate Example') \
    return spark

spark = getsparkSession()

origin_df ='csv').option('header', 'True').option('delimiter', '|') \

df ='parquet').load(r"C:\Users\ankus\PycharmProjects\pythonProject2\venv\resources\Train.parquet")

df ='avro').load(r"C:\Users\ankus\PycharmProjects\pythonProject2\venv\resources\variants.avro")

Read Data from HDFS 

