Blog
Cache and Persist in Pyspark
- March 12, 2023
- Posted by: sayali@learnomate.org
- Category: Hadoop
No Comments
import pyspark from pyspark.sql import SparkSession spark = SparkSession.builder.master("local[1]") \ .appName("SparkByExamples.com")\ .getOrCreate() df = spark.read.csv(r"C:\Users\ankus\PycharmProjects\DecHadoop\Resources\region_country.csv") print(df.is_cached) df.cache() print(df.is_cached) df.persist(pyspark.storagelevel.StorageLevel.MEMORY_AND_DISK) print(df.is_cached) df.show()
import pyspark from pyspark.sql import SparkSession spark = SparkSession.builder.master("local[1]") \ .appName("SparkByExamples.com")\ .getOrCreate() df = spark.read.csv(r"C:\Users\ankus\PycharmProjects\DecHadoop\Resources\region_country.csv") print(df.is_cached) df.cache() print(df.is_cached) df.persist(pyspark.storagelevel.StorageLevel.MEMORY_AND_DISK) print(df.is_cached) df.show()