In PySpark, select() function is used to select single, multiple, column by index, all columns from the list and the nested columns from a DataFrame, PySpark select() is a transformation function hence it returns a new DataFrame with the selected columns.
# Create DataFrame with nested columns
data =
from pyspark.sql.types import StructType,StructField, StringType
schema = StructType()),
StructField('state', StringType(), True),
StructField('gender', StringType(), True)
])
df = spark.createDataFrame(data = data, schema = schema)
df.printSchema()
df.show(truncate=False) # shows all columns

# Select columns by different ways
df.select("name.firstname","name.lastname").show()
df.select(df.name.firstname,df.name.lastname).show()
df.select(df,df).show()
# By using col() function
from pyspark.sql.functions import col
df.select(col("name.firstname"),col("name").lastname).show()


# Select columns by regular expression
df.select(df.colRegex("`^.*name*`")).show()

df.select('*').show()


df.select().show()
