# The itertools module is a collection of functions that allows us to work with iterators in an efficient way. Depending on your problem, this can save you a lot of memory and also a lot of work.
#iterator: it's basically a sequential data that we can iterate or loop over which hold one object in memory at a time.

import itertools

#by default it starts counting from 0
counter = itertools.count()

print(next(counter))
print(next(counter))
print(next(counter))
print(next(counter))

#but you can start from any point
counter = itertools.count(start=7)

print(next(counter))
print(next(counter))
print(next(counter))
print(next(counter))

#you can change the step, by default it's 1
counter = itertools.count(start=7, step=5)

print(next(counter))
print(next(counter))
print(next(counter))
print(next(counter))

7
12
17
22

#can count backwards ana decimal
counter = itertools.count(start=7, step=-2.5)

print(next(counter))
print(next(counter))
print(next(counter))
print(next(counter))

7
4.5
2.0
-0.5

#we can number list time
data = [100, 200, 300, 400]

num = list(zip(itertools.count(start=1),data))

print(num)

[(1, 100), (2, 200), (3, 300), (4, 400)]

#our zip function get stopped when our shortest list gets exhausted
data = [100, 200, 300, 400]

num = list(zip(range(10),data))

print(num)
#rest of the values of range is not used

[(0, 100), (1, 200), (2, 300), (3, 400)]

#we can use zip_longest
data = [100, 200, 300, 400]

num = list(itertools.zip_longest(range(10),data))

print(num)
#when values get exhauested from one object, it uses none.

[(0, 100), (1, 200), (2, 300), (3, 400), (4, None), (5, None), (6, None), (7, None), (8, None), (9, None)]

counter = itertools.cycle([1, 2, 3])

print(next(counter))
print(next(counter))
print(next(counter))
print(next(counter))
print(next(counter))
print(next(counter))
print(next(counter))
#just repeats the cycle of values

1
2
3
1
2
3
1

counter = itertools.repeat(2)
# just repeats the value
print(next(counter))
print(next(counter))
print(next(counter))
print(next(counter))
print(next(counter))

#counter = itertools.repeat(2, times=3)
#we can set the times to repeat and after exhaustion it will throw StopIteration exception.

2
2
2
2
2

#useful example of repeat function.
#map takes values from iterator and pass it to function and returns iterable object.
squares = map(pow, range(10), itertools.repeat(2))
print(list(squares))

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

#starmap: takes values of pair of tuples and pass it to function
squares = itertools.starmap(pow,[(0,2),(1,2),(2,2),(3,2)])
print(list(squares))

[0, 1, 4, 9]

#combination: number of different ways by which we can group certain number of items, where order doesn't matter
#permutation: number of different ways by which we can group certain number of items, where order does matter

letters = ['a', 'b', 'c', 'd']

#combination [by default not allows repeat]
result = itertools.combinations(letters,2)

for item in result:
    print(item)
    
#for allowing repeatation we have to use combinations_with_replacement instead of combinations

('a', 'b')
('a', 'c')
('a', 'd')
('b', 'c')
('b', 'd')
('c', 'd')

#permutation [by default not allows repeat]
result = itertools.permutations(letters,2)

for item in result:
    print(item)

('a', 'b')
('a', 'c')
('a', 'd')
('b', 'a')
('b', 'c')
('b', 'd')
('c', 'a')
('c', 'b')
('c', 'd')
('d', 'a')
('d', 'b')
('d', 'c')

numbers = [0, 1, 2]
names = ['Corey', 'Nicole']

#product is use to get combination of repeat value [ cartesian product ]
result = itertools.product(numbers,repeat=2)

for item in result:
    print(item)

(0, 0)
(0, 1)
(0, 2)
(1, 0)
(1, 1)
(1, 2)
(2, 0)
(2, 1)
(2, 2)

letters = ['a', 'b', 'c', 'd']
numbers = [0, 1, 2, 3]
names = ['Corey', 'Nicole']

#chain loops over the chain of objects passed till each passed object is exahausted
combined = itertools.chain(letters, numbers, names)

for item in combined:
    print(item)

a
b
c
d
0
1
2
3
Corey
Nicole

#islice(iterable, start, stop, step) :- This iterator selectively prints the values mentioned in its iterable container passed as argument. This iterator takes 4 arguments, iterable container, starting pos., ending position and step
result = itertools.islice(range(10), 5)
#one argument is treated as stopping point
for item in result:
    print(item)

result = itertools.islice(range(10), 1,5)
#two argument is treated as starting and stopping point
for item in result:
    print(item)

result = itertools.islice(range(10), 1, 7, 2)
#three argument is treated as starting, stopping point and steps
for item in result:
    print(item)

1
3
5

#printing just header lines of a large log file in a memory efficient way, that's where you can use islice

with open('file.log') as f:
    header_lines = itertools.islice(f,3)
    
    for line in header_lines:
        print(line, end='')

Date: 2018-11-08
Author: Kushagra
Description: This is a sample log file

letters = ['a', 'b', 'c', 'd', 'e']
selectors = [True, False, False, True, True]

#the function itertools.compress() which filters elements from an iterable based on a list of selectors
result = itertools.compress(letters, selectors)

for item in result:
    print(item)

#similar to filter function but filter takes a function but compress take list

a
d
e

numbers= [1,2,3,4,5,6,7,8]

def even(n):
    if n%2==0:
        return True
    return False

#example of filter function
result = filter(even, numbers)

for item in result:
    print(item)

#itertools.filterfalse which is inverse of filter function 
result = itertools.filterfalse(even, numbers)

for item in result:
    print(item)
#now we get inverse result of filter function

numbers = [0, 1, 2, 3, 2, 1, 0]

#accumulate returns running total
result = itertools.accumulate(numbers)

for item in result:
    print(item)
#instead of addition we can use different operator like multiply for that we have to import opertor module
#and pass mul function in accumulate like:
#result = itertools.accumulate(numbers, operator.mul)

#Groupby
# groupby method actually just iterates through a list and whenever the key changes it creates a new group
# but keys should be sorted 
#suppose we have list of dictionary containing details of a person and we want to categories them by same state.
people = [
    {
        'name': 'John Doe',
        'city': 'Gotham',
        'state': 'NY'
    },
    {
        'name': 'Jane Doe',
        'city': 'Kings Landing',
        'state': 'NY'
    },
    {
        'name': 'Corey Schafer',
        'city': 'Boulder',
        'state': 'CO'
    },
    {
        'name': 'Al Einstein',
        'city': 'Denver',
        'state': 'CO'
    },
    {
        'name': 'John Henry',
        'city': 'Hinton',
        'state': 'WV'
    },
    {
        'name': 'Randy Moss',
        'city': 'Rand',
        'state': 'WV'
    },
    {
        'name': 'Nicole K',
        'city': 'Asheville',
        'state': 'NC'
    },
    {
        'name': 'Jim Doe',
        'city': 'Charlotte',
        'state': 'NC'
    },
    {
        'name': 'Jane Taylor',
        'city': 'Faketown',
        'state': 'NC'
    }
]

#function which will act as a key for our groupby function
def get_state(person):
    return person['state']

person_group = itertools.groupby(people,get_state)
#tee splits the container into a number of iterators mentioned in the argument. [default =2]
copy1, copy2 = itertools.tee(person_group)

for key, group in copy1:
    print(key)
    for person in group:
        print(person)
    print()
#categorised result on the basis of our passed key which is state.

NY
{'name': 'John Doe', 'city': 'Gotham', 'state': 'NY'}
{'name': 'Jane Doe', 'city': 'Kings Landing', 'state': 'NY'}

CO
{'name': 'Corey Schafer', 'city': 'Boulder', 'state': 'CO'}
{'name': 'Al Einstein', 'city': 'Denver', 'state': 'CO'}

WV
{'name': 'John Henry', 'city': 'Hinton', 'state': 'WV'}
{'name': 'Randy Moss', 'city': 'Rand', 'state': 'WV'}

NC
{'name': 'Nicole K', 'city': 'Asheville', 'state': 'NC'}
{'name': 'Jim Doe', 'city': 'Charlotte', 'state': 'NC'}
{'name': 'Jane Taylor', 'city': 'Faketown', 'state': 'NC'}

#we can also get the count for each state
for key, group in copy2:
    print(key,len(list(group)))

NY 0
CO 0
WV 0
NC 0