In [2]:
import pandas as pd
In [7]:
person1 = {
    "first": "Kushagra",
    "last": "Gupta",
    "email": "Kushagra225@gmail.com"
}
In [8]:
person2 = {
    "first": ["Kushagra"],
    "last": ["Gupta"],
    "email": ["Kushagra225@gmail.com"]
}
In [9]:
person3 = {
    "first": ["Kushagra", "Jane", "John"],
    "last": ["Gupta", "Doe", "Doe"],
    "email": ["Kushagra225@gmail.com", "JaneDoe123@gmail.com", "John@gmail.com"]
}
In [10]:
person3['email']
Out[10]:
['Kushagra225@gmail.com', 'JaneDoe123@gmail.com', 'John@gmail.com']
In [11]:
#converting dictionary to dataframe, dataframe is container of multiple series object..
df = pd.DataFrame(person3)
In [12]:
df
Out[12]:
first last email
0 Kushagra Gupta Kushagra225@gmail.com
1 Jane Doe JaneDoe123@gmail.com
2 John Doe John@gmail.com
In [13]:
#access single column
df['email']
Out[13]:
0    Kushagra225@gmail.com
1     JaneDoe123@gmail.com
2           John@gmail.com
Name: email, dtype: object
In [14]:
#series is list of data, rows of single column
type(df['email'])
Out[14]:
pandas.core.series.Series
In [15]:
# same as above,, but can be a problem when you have column name as same as method,, eg count
df.email
Out[15]:
0    Kushagra225@gmail.com
1     JaneDoe123@gmail.com
2           John@gmail.com
Name: email, dtype: object
In [16]:
#access multiple column
df[['last', 'email']]
Out[16]:
last email
0 Gupta Kushagra225@gmail.com
1 Doe JaneDoe123@gmail.com
2 Doe John@gmail.com
In [17]:
print(type(df[['last', 'email']]))
<class 'pandas.core.frame.DataFrame'>
In [19]:
#gives all the columns name
df.columns
Out[19]:
Index(['first', 'last', 'email'], dtype='object')
In [20]:
#gives number of rows & columns in df
df.shape
Out[20]:
(3, 3)
In [21]:
#gives number of rows & columns in df & their type
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
first    3 non-null object
last     3 non-null object
email    3 non-null object
dtypes: object(3)
memory usage: 200.0+ bytes
In [22]:
#accessing 1st row of df
df.iloc[0]
Out[22]:
first                 Kushagra
last                     Gupta
email    Kushagra225@gmail.com
Name: 0, dtype: object
In [25]:
#accessing particular row and column,, email of 1st and 2nd row data
df.iloc[[0,1], 2]
Out[25]:
0    Kushagra225@gmail.com
1     JaneDoe123@gmail.com
Name: email, dtype: object
In [27]:
#we can write column name instead of number in loc
df.loc[[0,1], 'email']
Out[27]:
0    Kushagra225@gmail.com
1     JaneDoe123@gmail.com
Name: email, dtype: object
In [28]:
df.loc[[0,1], ['email', 'first']]
Out[28]:
email first
0 Kushagra225@gmail.com Kushagra
1 JaneDoe123@gmail.com Jane
In [31]:
#we can also use slicing in rows & columns and slicing in pandas is inclusive.. meaning 0:2 will return 0,1,2 index rows
df.loc[0:2, 'first':'email']
Out[31]:
first last email
0 Kushagra Gupta Kushagra225@gmail.com
1 Jane Doe JaneDoe123@gmail.com
2 John Doe John@gmail.com