import pandas as pd
person = {
"first": ["Kushagra", "Jane", "John"],
"last": ["Gupta", "Doe", "Doe"],
"email": ["Kushagra225@gmail.com", "JaneDoe123@gmail.com", "John@gmail.com"]
}
df = pd.DataFrame(person)
df
df.columns
#changing column names,, but you need to pass all the column names
df.columns = ['first_name', 'last_name', 'email']
df
#using list comprehension
df.columns = [x.upper() for x in df.columns]
df
df.columns = df.columns.str.replace('_', ' ')
df
#renaming specific column
df.rename(columns={'FIRST NAME': 'first', 'LAST NAME': 'last'})
df
#inplace is used to make save change in original df
df.rename(columns={'FIRST NAME': 'first', 'LAST NAME': 'last'}, inplace=True)
#changing data of row
df.loc[2] = ['John', 'Smith', 'John@gmail.com']
df
df.rename(columns={'EMAIL': 'email'},inplace=True)
#changing specific columns of row
df.loc[2,['first', 'email']] = ['Jack', 'Jack@gmail.com']
df
#you can also use 'at' method to change single value
df.at[2, 'last'] = 'Doe'
df
filt = (df['email'] == 'Jack@gmail.com')
df[filt]
#this will not change last_name as in background it return multiple operations, bcoz value is set on a temporary object that is made
df[filt]['last'] = 'Smith'
#you can use loc method with filter for setting up the value
filt = (df['email'] == 'Jack@gmail.com')
df.loc[filt, 'last'] = 'Smith'
df
#changing all emails to lower case
df['email'] = df['email'].str.lower()
df
# apply - used to work with series objects.. when function is applied with series object it gets applied to every value (each value of column), when function is applied with df it gets applied to each series objects (column/row)
# applymap - used to work with df, applies function to each individual value in df
# map - only works with series objects, returns NaN if value is not provided
# replace - only works with series objects, returns previous value for value not provided
# apply
# checking length of email addresses
df['email'].apply(len)
def update_email(email):
return email.upper()
#we can also use custom function with apply
df['email'].apply(update_email)
#same with lambda function
df['email'].apply(lambda x: x.upper())
print(df['email'].apply(len)) # applying in series objects
print(df.apply(len)) # applying in dataframes same as len(df['email'])
# applymap
df.applymap(len)
df =df.applymap(str.lower)
#map
#returns NaN if value is not provided
df['first'].map({'kushagra':'kush', 'jane': 'Jane'})
#replace
#returns previous value for value not provided
df['first'].replace({'kushagra':'kush', 'jane': 'Jane'})