from pyspark.sql import SparkSession from pyspark.sql.functions import col, lit, when def getsparkSession(): spark = SparkSession.builder.master("yarn") \ .appName('Learnomate Example') \ .getOrCreate() return spark spark = getsparkSession() origin_df = spark.read.format('csv').option('header', 'True').option('delimiter', '|') \ .load(r"C:\Users\ankus\PycharmProjects\pythonProject2\venv\resources\empdata.csv") origin_df.show() df = spark.read.format('parquet').load(r"C:\Users\ankus\PycharmProjects\pythonProject2\venv\resources\Train.parquet") df.show() df = spark.read.format('avro').load(r"C:\Users\ankus\PycharmProjects\pythonProject2\venv\resources\variants.avro") df.show() Read Data from HDFS origin_df = spark.read.format('csv').option('header', 'True').option('delimiter', '|') \ .load("hdfs://sandbox-hdp.hortonworks.com:8020/input/empdata.csv") origin_df.show()