In [1]:
# The itertools module is a collection of functions that allows us to work with iterators in an efficient way. Depending on your problem, this can save you a lot of memory and also a lot of work.
#iterator: it's basically a sequential data that we can iterate or loop over which hold one object in memory at a time.
In [2]:
import itertools
In [3]:
#by default it starts counting from 0
counter = itertools.count()

print(next(counter))
print(next(counter))
print(next(counter))
print(next(counter))
0
1
2
3
In [4]:
#but you can start from any point
counter = itertools.count(start=7)

print(next(counter))
print(next(counter))
print(next(counter))
print(next(counter))
7
8
9
10
In [5]:
#you can change the step, by default it's 1
counter = itertools.count(start=7, step=5)

print(next(counter))
print(next(counter))
print(next(counter))
print(next(counter))
7
12
17
22
In [9]:
#can count backwards ana decimal
counter = itertools.count(start=7, step=-2.5)

print(next(counter))
print(next(counter))
print(next(counter))
print(next(counter))
7
4.5
2.0
-0.5
In [12]:
#we can number list time
data = [100, 200, 300, 400]

num = list(zip(itertools.count(start=1),data))

print(num)
[(1, 100), (2, 200), (3, 300), (4, 400)]
In [14]:
#our zip function get stopped when our shortest list gets exhausted
data = [100, 200, 300, 400]

num = list(zip(range(10),data))

print(num)
#rest of the values of range is not used
[(0, 100), (1, 200), (2, 300), (3, 400)]
In [15]:
#we can use zip_longest
data = [100, 200, 300, 400]

num = list(itertools.zip_longest(range(10),data))

print(num)
#when values get exhauested from one object, it uses none.
[(0, 100), (1, 200), (2, 300), (3, 400), (4, None), (5, None), (6, None), (7, None), (8, None), (9, None)]
In [17]:
counter = itertools.cycle([1, 2, 3])

print(next(counter))
print(next(counter))
print(next(counter))
print(next(counter))
print(next(counter))
print(next(counter))
print(next(counter))
#just repeats the cycle of values
1
2
3
1
2
3
1
In [18]:
counter = itertools.repeat(2)
# just repeats the value
print(next(counter))
print(next(counter))
print(next(counter))
print(next(counter))
print(next(counter))

#counter = itertools.repeat(2, times=3)
#we can set the times to repeat and after exhaustion it will throw StopIteration exception.
2
2
2
2
2
In [21]:
#useful example of repeat function.
#map takes values from iterator and pass it to function and returns iterable object.
squares = map(pow, range(10), itertools.repeat(2))
print(list(squares))
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
In [24]:
#starmap: takes values of pair of tuples and pass it to function
squares = itertools.starmap(pow,[(0,2),(1,2),(2,2),(3,2)])
print(list(squares))
[0, 1, 4, 9]
In [32]:
#combination: number of different ways by which we can group certain number of items, where order doesn't matter
#permutation: number of different ways by which we can group certain number of items, where order does matter

letters = ['a', 'b', 'c', 'd']
In [28]:
#combination [by default not allows repeat]
result = itertools.combinations(letters,2)

for item in result:
    print(item)
    
#for allowing repeatation we have to use combinations_with_replacement instead of combinations
('a', 'b')
('a', 'c')
('a', 'd')
('b', 'c')
('b', 'd')
('c', 'd')
In [31]:
#permutation [by default not allows repeat]
result = itertools.permutations(letters,2)

for item in result:
    print(item)
('a', 'b')
('a', 'c')
('a', 'd')
('b', 'a')
('b', 'c')
('b', 'd')
('c', 'a')
('c', 'b')
('c', 'd')
('d', 'a')
('d', 'b')
('d', 'c')
In [39]:
numbers = [0, 1, 2]
names = ['Corey', 'Nicole']
In [40]:
#product is use to get combination of repeat value [ cartesian product ]
result = itertools.product(numbers,repeat=2)

for item in result:
    print(item)
(0, 0)
(0, 1)
(0, 2)
(1, 0)
(1, 1)
(1, 2)
(2, 0)
(2, 1)
(2, 2)
In [41]:
letters = ['a', 'b', 'c', 'd']
numbers = [0, 1, 2, 3]
names = ['Corey', 'Nicole']
In [42]:
#chain loops over the chain of objects passed till each passed object is exahausted
combined = itertools.chain(letters, numbers, names)

for item in combined:
    print(item)
a
b
c
d
0
1
2
3
Corey
Nicole
In [43]:
#islice(iterable, start, stop, step) :- This iterator selectively prints the values mentioned in its iterable container passed as argument. This iterator takes 4 arguments, iterable container, starting pos., ending position and step
result = itertools.islice(range(10), 5)
#one argument is treated as stopping point
for item in result:
    print(item)
0
1
2
3
4
In [44]:
result = itertools.islice(range(10), 1,5)
#two argument is treated as starting and stopping point
for item in result:
    print(item)
1
2
3
4
In [47]:
result = itertools.islice(range(10), 1, 7, 2)
#three argument is treated as starting, stopping point and steps
for item in result:
    print(item)
1
3
5
In [50]:
#printing just header lines of a large log file in a memory efficient way, that's where you can use islice

with open('file.log') as f:
    header_lines = itertools.islice(f,3)
    
    for line in header_lines:
        print(line, end='')
Date: 2018-11-08
Author: Kushagra
Description: This is a sample log file
In [51]:
letters = ['a', 'b', 'c', 'd', 'e']
selectors = [True, False, False, True, True]
In [53]:
#the function itertools.compress() which filters elements from an iterable based on a list of selectors
result = itertools.compress(letters, selectors)

for item in result:
    print(item)

#similar to filter function but filter takes a function but compress take list
a
d
e
In [54]:
numbers= [1,2,3,4,5,6,7,8]

def even(n):
    if n%2==0:
        return True
    return False
In [56]:
#example of filter function
result = filter(even, numbers)

for item in result:
    print(item)
2
4
6
8
In [57]:
#itertools.filterfalse which is inverse of filter function 
result = itertools.filterfalse(even, numbers)

for item in result:
    print(item)
#now we get inverse result of filter function
1
3
5
7
In [58]:
numbers = [0, 1, 2, 3, 2, 1, 0]
In [60]:
#accumulate returns running total
result = itertools.accumulate(numbers)

for item in result:
    print(item)
#instead of addition we can use different operator like multiply for that we have to import opertor module
#and pass mul function in accumulate like:
#result = itertools.accumulate(numbers, operator.mul)
0
1
3
6
8
9
9
In [61]:
#Groupby
# groupby method actually just iterates through a list and whenever the key changes it creates a new group
# but keys should be sorted 
#suppose we have list of dictionary containing details of a person and we want to categories them by same state.
people = [
    {
        'name': 'John Doe',
        'city': 'Gotham',
        'state': 'NY'
    },
    {
        'name': 'Jane Doe',
        'city': 'Kings Landing',
        'state': 'NY'
    },
    {
        'name': 'Corey Schafer',
        'city': 'Boulder',
        'state': 'CO'
    },
    {
        'name': 'Al Einstein',
        'city': 'Denver',
        'state': 'CO'
    },
    {
        'name': 'John Henry',
        'city': 'Hinton',
        'state': 'WV'
    },
    {
        'name': 'Randy Moss',
        'city': 'Rand',
        'state': 'WV'
    },
    {
        'name': 'Nicole K',
        'city': 'Asheville',
        'state': 'NC'
    },
    {
        'name': 'Jim Doe',
        'city': 'Charlotte',
        'state': 'NC'
    },
    {
        'name': 'Jane Taylor',
        'city': 'Faketown',
        'state': 'NC'
    }
]
In [62]:
#function which will act as a key for our groupby function
def get_state(person):
    return person['state']
In [75]:
person_group = itertools.groupby(people,get_state)
#tee splits the container into a number of iterators mentioned in the argument. [default =2]
copy1, copy2 = itertools.tee(person_group)
In [76]:
for key, group in copy1:
    print(key)
    for person in group:
        print(person)
    print()
#categorised result on the basis of our passed key which is state.
NY
{'name': 'John Doe', 'city': 'Gotham', 'state': 'NY'}
{'name': 'Jane Doe', 'city': 'Kings Landing', 'state': 'NY'}

CO
{'name': 'Corey Schafer', 'city': 'Boulder', 'state': 'CO'}
{'name': 'Al Einstein', 'city': 'Denver', 'state': 'CO'}

WV
{'name': 'John Henry', 'city': 'Hinton', 'state': 'WV'}
{'name': 'Randy Moss', 'city': 'Rand', 'state': 'WV'}

NC
{'name': 'Nicole K', 'city': 'Asheville', 'state': 'NC'}
{'name': 'Jim Doe', 'city': 'Charlotte', 'state': 'NC'}
{'name': 'Jane Taylor', 'city': 'Faketown', 'state': 'NC'}

In [77]:
#we can also get the count for each state
for key, group in copy2:
    print(key,len(list(group)))
NY 0
CO 0
WV 0
NC 0