In [1]:
import pandas as pd
In [2]:
person = {
    "first": ["Kushagra", "Jane", "John"],
    "last": ["Gupta", "Doe", "Doe"],
    "email": ["Kushagra225@gmail.com", "JaneDoe123@gmail.com", "John@gmail.com"]
}
In [3]:
df = pd.DataFrame(person)
In [4]:
df
Out[4]:
first last email
0 Kushagra Gupta Kushagra225@gmail.com
1 Jane Doe JaneDoe123@gmail.com
2 John Doe John@gmail.com
In [5]:
df.columns
Out[5]:
Index(['first', 'last', 'email'], dtype='object')
In [6]:
#changing column names,, but you need to pass all the column names
df.columns = ['first_name', 'last_name', 'email']
In [7]:
df
Out[7]:
first_name last_name email
0 Kushagra Gupta Kushagra225@gmail.com
1 Jane Doe JaneDoe123@gmail.com
2 John Doe John@gmail.com
In [8]:
#using list comprehension
df.columns = [x.upper() for x in df.columns]
In [9]:
df
Out[9]:
FIRST_NAME LAST_NAME EMAIL
0 Kushagra Gupta Kushagra225@gmail.com
1 Jane Doe JaneDoe123@gmail.com
2 John Doe John@gmail.com
In [12]:
df.columns = df.columns.str.replace('_', ' ')
In [13]:
df
Out[13]:
FIRST NAME LAST NAME EMAIL
0 Kushagra Gupta Kushagra225@gmail.com
1 Jane Doe JaneDoe123@gmail.com
2 John Doe John@gmail.com
In [14]:
#renaming specific column
df.rename(columns={'FIRST NAME': 'first', 'LAST NAME': 'last'})
Out[14]:
first last EMAIL
0 Kushagra Gupta Kushagra225@gmail.com
1 Jane Doe JaneDoe123@gmail.com
2 John Doe John@gmail.com
In [15]:
df
Out[15]:
FIRST NAME LAST NAME EMAIL
0 Kushagra Gupta Kushagra225@gmail.com
1 Jane Doe JaneDoe123@gmail.com
2 John Doe John@gmail.com
In [16]:
#inplace is used to make save change in original df
df.rename(columns={'FIRST NAME': 'first', 'LAST NAME': 'last'}, inplace=True)
In [36]:
#changing data of row
df.loc[2] = ['John', 'Smith', 'John@gmail.com']
In [37]:
df
Out[37]:
first last email
0 Kushagra Gupta Kushagra225@gmail.com
1 Jane Doe JaneDoe123@gmail.com
2 John Smith John@gmail.com
In [38]:
df.rename(columns={'EMAIL': 'email'},inplace=True)
In [39]:
#changing specific columns of row
df.loc[2,['first', 'email']] = ['Jack', 'Jack@gmail.com']
In [40]:
df
Out[40]:
first last email
0 Kushagra Gupta Kushagra225@gmail.com
1 Jane Doe JaneDoe123@gmail.com
2 Jack Smith Jack@gmail.com
In [41]:
#you can also use 'at' method to change single value
df.at[2, 'last'] = 'Doe'
In [42]:
df
Out[42]:
first last email
0 Kushagra Gupta Kushagra225@gmail.com
1 Jane Doe JaneDoe123@gmail.com
2 Jack Doe Jack@gmail.com
In [44]:
filt = (df['email'] == 'Jack@gmail.com')
df[filt]
Out[44]:
first last email
2 Jack Doe Jack@gmail.com
In [45]:
#this will not change last_name as in background it return multiple operations, bcoz value is set on a temporary object that is made
df[filt]['last'] = 'Smith'
/Users/kushagra/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
In [46]:
#you can use loc method with filter for setting up the value
filt = (df['email'] == 'Jack@gmail.com')
df.loc[filt, 'last'] = 'Smith'
df
Out[46]:
first last email
0 Kushagra Gupta Kushagra225@gmail.com
1 Jane Doe JaneDoe123@gmail.com
2 Jack Smith Jack@gmail.com
In [48]:
#changing all emails to lower case
df['email'] = df['email'].str.lower()
In [49]:
df
Out[49]:
first last email
0 Kushagra Gupta kushagra225@gmail.com
1 Jane Doe janedoe123@gmail.com
2 Jack Smith jack@gmail.com
In [56]:
# apply - used to work with series objects.. when function is applied with series object it gets applied to every value (each value of column), when function is applied with df it gets applied to each series objects (column/row)
# applymap - used to work with df, applies function to each individual value in df
# map - only works with series objects, returns NaN if value is not provided
# replace - only works with series objects, returns previous value for value not provided
In [57]:
# apply
# checking length of email addresses
df['email'].apply(len)
Out[57]:
0    21
1    20
2    14
Name: email, dtype: int64
In [51]:
def update_email(email):
    return email.upper()
In [52]:
#we can also use custom function with apply
df['email'].apply(update_email)
Out[52]:
0    KUSHAGRA225@GMAIL.COM
1     JANEDOE123@GMAIL.COM
2           JACK@GMAIL.COM
Name: email, dtype: object
In [54]:
#same with lambda function
df['email'].apply(lambda x: x.upper())
Out[54]:
0    KUSHAGRA225@GMAIL.COM
1     JANEDOE123@GMAIL.COM
2           JACK@GMAIL.COM
Name: email, dtype: object
In [55]:
print(df['email'].apply(len)) # applying in series objects
print(df.apply(len)) # applying in dataframes same as len(df['email'])
0    21
1    20
2    14
Name: email, dtype: int64
first    3
last     3
email    3
dtype: int64
In [58]:
# applymap
df.applymap(len)
Out[58]:
first last email
0 8 5 21
1 4 3 20
2 4 5 14
In [64]:
df =df.applymap(str.lower)
In [65]:
#map
#returns NaN if value is not provided
df['first'].map({'kushagra':'kush', 'jane': 'Jane'})
Out[65]:
0    kush
1    Jane
2     NaN
Name: first, dtype: object
In [66]:
#replace
#returns previous value for value not provided
df['first'].replace({'kushagra':'kush', 'jane': 'Jane'})
Out[66]:
0    kush
1    Jane
2    jack
Name: first, dtype: object