In [1]:
#Web Scraping
#To install this module run "pip install requests-html"
#Using this module you can also scrape dynamic generated data 
In [4]:
from requests_html import HTML
In [22]:
with open('simple2.html') as html_file:
    source = html_file.read()
    html = HTML(html=source)
    
print(html.html)
<!doctype html>
<html class="no-js" lang="">
    <head>
        <title>Test - A Sample Website</title>
        <meta charset="utf-8">
        <link rel="stylesheet" href="css/normalize.css">
        <link rel="stylesheet" href="css/main.css">
    </head>
    <body>
        <h1 id='site_title'>Test Website</h1>
        <hr></hr>
        <div class="article">
            <h2><a href="article_1.html">Article 1 Headline</a></h2>
            <p>This is a summary of article 1</p>
        </div>
        <hr></hr>
        <div class="article">
            <h2><a href="article_2.html">Article 2 Headline</a></h2>
            <p>This is a summary of article 2</p>
        </div>
        <hr></hr>
        <div id='footer'>
            <p>Footer Information</p>
        </div>
        <script>
        var para = document.createElement("p");
        var node = document.createTextNode("This is text generated by JavaScript.");
        para.appendChild(node);
        var element = document.getElementById("footer");
        element.appendChild(para);
        </script>
    </body>
</html>

In [23]:
match =html.find('title') 
#[0] is used to print the first element of the list
print(match[0])
print(match[0].text)
print(match[0].html)
<Element 'title' >
Test - A Sample Website
<title>Test - A Sample Website</title>
In [24]:
#or if you only want first, you can do like this
match =html.find('title', first=True) 
#[0] is used to print the first element of the list
print(match)
print(match.text)
print(match.html)
<Element 'title' >
Test - A Sample Website
<title>Test - A Sample Website</title>
In [25]:
#To search any div id you can use this method
match =html.find('#footer', first=True)
print(match.text)
Footer Information
In [29]:
article = html.find('div.article', first=True)
headline = article.find('h2',first=True)
summary = article.find('p',first=True)
In [32]:
print(headline.text)
print(summary.text)
Article 1 Headline
This is a summary of article 1
In [44]:
#To find all the headline and summary
articles = html.find('div.article')

for article in articles:
    headline = article.find('h2',first=True)
    summary = article.find('p',first=True)
    print(headline.text)
    print(summary.text)
    print()
Article 1 Headline
This is a summary of article 1

Article 2 Headline
This is a summary of article 2

In [45]:
#Webscraping
from requests_html import HTML, HTMLSession
In [53]:
session = HTMLSession()
r = session.get('https://coreyms.com')  #response object using request library
print(r.html)
<HTML url='https://coreyms.com/'>
In [83]:
article = r.html.find('article',first=True)
headline = article.find('.entry-title-link',first=True)
paragraph =article.find('.entry-content p',first=True)
video_src = article.find('iframe',first=True)
print(headline.text)
print()
print(paragraph.text)
print()
print(video_src)
print()
print(video_src.attrs) #to get attributes
Visual Studio Code (Windows) – Setting up a Python Development Environment and Complete Overview

In this Python Programming Tutorial, we will be learning how to set up a Python development environment in VSCode on Windows. VSCode is a very nice free editor for writing Python applications and many developers are now switching over to this editor. In this video, we will learn how to install VSCode, get the Python extension installed, how to change Python interpreters, create virtual environments, format/lint our code, how to use Git within VSCode, how to debug our programs, how unit testing works, and more. We have a lot to cover, so let’s go ahead and get started…

<Element 'iframe' allowfullscreen='true' class=('youtube-player',) height='360' src='https://www.youtube.com/embed/-nh9rCzPJ20?version=3&rel=1&fs=1&autohide=2&showsearch=0&showinfo=1&iv_load_policy=1&wmode=transparent' style='border:0;' type='text/html' width='640'>

{'allowfullscreen': 'true', 'class': ('youtube-player',), 'height': '360', 'src': 'https://www.youtube.com/embed/-nh9rCzPJ20?version=3&rel=1&fs=1&autohide=2&showsearch=0&showinfo=1&iv_load_policy=1&wmode=transparent', 'style': 'border:0;', 'type': 'text/html', 'width': '640'}
In [90]:
#splitting the url to get video id
vid_id =video_src.attrs['src']
print(vid_id)
vid_id=video_src.attrs['src'].split('/')
print(vid_id[4])
vid_id= vid_id[4].split('?')[0]
print(vid_id)
https://www.youtube.com/embed/-nh9rCzPJ20?version=3&rel=1&fs=1&autohide=2&showsearch=0&showinfo=1&iv_load_policy=1&wmode=transparent
-nh9rCzPJ20?version=3&rel=1&fs=1&autohide=2&showsearch=0&showinfo=1&iv_load_policy=1&wmode=transparent
-nh9rCzPJ20
In [91]:
video_link = f'https://youtube.com/watch?v={vid_id}'
print(video_link)
https://youtube.com/watch?v=-nh9rCzPJ20
In [98]:
#To get title, summary, video links from all the article
articles = r.html.find('article')
for article in articles:
    headline = article.find('.entry-title-link',first=True)
    paragraph =article.find('.entry-content p',first=True)
    vid_id = article.find('iframe',first=True).attrs['src'].split('/')[4].split('?')[0]
    video_link = f'https://youtube.com/watch?v={vid_id}'
    print(headline.text)
    print(paragraph.text)
    print(video_link)
    print()
    
Visual Studio Code (Windows) – Setting up a Python Development Environment and Complete Overview
In this Python Programming Tutorial, we will be learning how to set up a Python development environment in VSCode on Windows. VSCode is a very nice free editor for writing Python applications and many developers are now switching over to this editor. In this video, we will learn how to install VSCode, get the Python extension installed, how to change Python interpreters, create virtual environments, format/lint our code, how to use Git within VSCode, how to debug our programs, how unit testing works, and more. We have a lot to cover, so let’s go ahead and get started…
https://youtube.com/watch?v=-nh9rCzPJ20

Visual Studio Code (Mac) – Setting up a Python Development Environment and Complete Overview
In this Python Programming Tutorial, we will be learning how to set up a Python development environment in VSCode on MacOS. VSCode is a very nice free editor for writing Python applications and many developers are now switching over to this editor. In this video, we will learn how to install VSCode, get the Python extension installed, how to change Python interpreters, create virtual environments, format/lint our code, how to use Git within VSCode, how to debug our programs, how unit testing works, and more. We have a lot to cover, so let’s go ahead and get started…
https://youtube.com/watch?v=06I63_p-2A4

Clarifying the Issues with Mutable Default Arguments
In this Python Programming Tutorial, we will be clarifying the issues with mutable default arguments. We discussed this in my last video titled “5 Common Python Mistakes and How to Fix Them”, but I received many comments from people who were still confused. So we will be doing a deeper dive to explain exactly what is going on here. Let’s get started…
https://youtube.com/watch?v=_JGmemuINww

5 Common Python Mistakes and How to Fix Them
In this Python Programming Tutorial, we will be going over some of the most common mistakes. I get a lot of questions from people every day, and I have seen a lot of people making these same mistakes in their code. So we will investigate each of these common mistakes and also look at the fixes for each other these as well. Here are the timestamps for each topic we will cover…
1) Indentation and Spaces – 0:45
2) Naming Conflicts – 4:12
3) Mutable Default Args – 10:05
4) Exhausting Iterators – 16:35
5) Importing with * – 22:13
https://youtube.com/watch?v=zdJEYhA2AZQ

How I Setup a New Development Machine – Using Scripts to Automate Installs and Save Time
In this video, I’ll be showing how I set up a new development machine. I recently got a new MacBook and wanted to show how I use scripts to automate a lot of this process. It used to take me a lot of time to install all of my software and get everything set up the way I like it. Now I use these automated scripts to do this in minutes. Let’s get started…
https://youtube.com/watch?v=kIdiWut8eD8

How to Write Python Scripts to Analyze JSON APIs and Sort Results
In this Python Programming Tutorial, we will be learning how to grab data from a JSON API, parse out the information we want, and then sort the data using a custom key. The API we will be using is a JSON API for Homebrew Packages and we will be sorting the packages by their popularity. We cover a lot of topics in this tutorial. We will be using the Requests Library, converting to/from JSON, reading and writing to files, writing our own sorting function, and more. Let’s get started…
https://youtube.com/watch?v=1lxrb_ezP-g

Homebrew Tutorial: Simplify Software Installation on Mac Using This Package Manager
In this video, we’ll be learning how to use the Homebrew Package Manager on MacOS. Brew allows us to easily install command-line tools with a simple command. We can also install native applications for Mac using Brew Cask. I often use these commands in scripts to install a lot of new software quickly and easily on new machines. Let’s get started…
https://youtube.com/watch?v=SELYgZvAZbU

Python Tutorial: VENV (Windows) – How to Use Virtual Environments with the Built-In venv Module
In this Python Programming Tutorial, we will be learning how to use virtual environments on the Windows operating systems with the built-in
venv
module. We will learn how to create them, activate them, remove them, and much more. Let’s get started…
https://youtube.com/watch?v=APOPm01BVrk

Python Tutorial: VENV (Mac & Linux) – How to Use Virtual Environments with the Built-In venv Module
In this Python Programming Tutorial, we will be learning how to use virtual environments on the Mac and Linux operating systems with the built-in
venv
module. We will learn how to create them, activate them, remove them, and much more. Let’s get started…
https://youtube.com/watch?v=Kg1Yvry_Ydk

10 Python Tips and Tricks For Writing Better Code
In this Python Programming video, we will be going over 10 tips and tricks for writing Pythonic code. Here are the timestamps for each topic we will cover…
1) Ternary Conditionals – 0:34
2) Underscore Placeholders – 2:13
3) Context Managers – 4:25
4) Enumerate – 6:50
5) Zip – 8:52
6) Unpacking – 13:02
7) Setattr/Getattr – 19:08
8) GetPass – 26:24
9) Python dash m – 29:18
10) Help/Dir – 33:17
https://youtube.com/watch?v=C-gEQdGVXbk

In [99]:
#writing data to csv file
import csv
with open('site.csv','w') as csv_file:
    csv_writer = csv.writer(csv_file)
    csv_writer.writerow(['headline','summary','video'])
    articles = r.html.find('article')
    for article in articles:
        headline = article.find('.entry-title-link',first=True)
        paragraph =article.find('.entry-content p',first=True)
        vid_id = article.find('iframe',first=True).attrs['src'].split('/')[4].split('?')[0]
        video_link = f'https://youtube.com/watch?v={vid_id}'
        csv_writer.writerow([headline.text,paragraph.text,video_link])
In [101]:
#To get all the links from a website, requests_html has a very simple method
session = HTMLSession()
r = session.get('https://coreyms.com') 
#print(r.html.links)
#if you want absolute path of links
#print(r.html.absolute_links)
{'https://plus.google.com/+CoreySchafer44/posts', 'https://coreyms.com/tag/virtual-environment', 'http://coreyms.com/portfolio', 'https://coreyms.com/tag/api', 'https://www.patreon.com/coreyms', 'https://www.amazon.com/gp/product/1491946008/ref=as_li_tl?ie=UTF8&camp=1789&creative=9325&creativeASIN=1491946008&linkCode=as2&tag=coreyms-20&linkId=39335cdc340fb7ce5bd51d59c57e7e54', 'https://coreyms.com/tag/dev-machine', 'https://coreyms.com/tag/macbook-pro', 'https://coreyms.com/page/16', 'http://www.se-radio.net/', 'https://coreyms.com/development/python/visual-studio-code-windows-setting-up-a-python-development-environment-and-complete-overview', 'http://www.samharris.org/podcast', 'https://coreyms.com/tag/visual-studios', 'https://coreyms.com/development/python/5-common-python-mistakes-and-how-to-fix-them-2#respond', 'http://coreyms.com/feed/', 'https://coreyms.com/tag/context-manager', 'https://coreyms.com/tag/dir-function', 'https://coreyms.com/category/development/terminal', 'https://coreyms.com/contributors', 'https://coreyms.com/tag/tips-and-tricks', 'https://www.amazon.com/gp/product/020161622X/ref=as_li_tl?ie=UTF8&camp=1789&creative=9325&creativeASIN=020161622X&linkCode=as2&tag=coreyms-20&linkId=a2699f6b6cb5814da54f71140c52f2ca', 'https://coreyms.com/category/web-design', 'https://coreyms.com/tag/indentationerror', 'https://coreyms.com/tag/underscores', 'https://coreyms.com/development/how-i-setup-a-new-development-machine-using-scripts-to-automate-installs-and-save-time', 'http://www.billburr.com/podcast', 'https://blog.codepen.io/radio/', 'https://coreyms.com/development/terminal/homebrew-tutorial-simplify-software-installation-on-mac-using-this-package-manager', 'http://www.dancarlin.com/hardcore-history-series/', 'https://coreyms.com/development/python/how-to-write-python-scripts-to-analyze-json-apis-and-sort-results', 'https://www.amazon.com/gp/product/1593276036/ref=as_li_tl?ie=UTF8&camp=1789&creative=9325&creativeASIN=1593276036&linkCode=as2&tag=coreyms-20&linkId=75ff844a147bc8cb5fb325608b286158', 'https://coreyms.com/tag/functions', 'https://coreyms.com/development/python/visual-studio-code-mac-setting-up-a-python-development-environment-and-complete-overview', 'https://www.amazon.com/gp/product/0984782850/ref=as_li_tl?ie=UTF8&camp=1789&creative=9325&creativeASIN=0984782850&linkCode=as2&tag=coreyms-20&linkId=e2f7c21906426f17958a1d04718e7d02', 'https://coreyms.com/page/2', 'https://coreyms.com/development/python/python-tutorial-venv-windows-how-to-use-virtual-environments-with-the-built-in-venv-module#respond', 'https://coreyms.com/development/how-i-setup-a-new-development-machine-using-scripts-to-automate-installs-and-save-time#respond', 'https://coreyms.com/category/diy/home-improvement', 'https://coreyms.com/contact', 'https://coreyms.com/development/python/clarifying-the-issues-with-mutable-default-arguments#respond', 'https://coreyms.com/tag/virtualenv', 'https://coreyms.com/category/development', 'https://coreyms.com/tag/help-function', 'https://github.com/CoreyMSchafer', 'https://coreyms.com/giveaway', 'https://coreyms.com/tag/unpacking', 'https://coreyms.com/tag/zip', 'https://coreyms.com/category/web-design/css', 'https://coreyms.com/tag/package-manager', 'https://coreyms.com/', 'https://coreyms.com/author/coreymschafer', 'https://coreyms.com/category/development/javascript', 'https://coreyms.com/tag/brew', 'http://hanselminutes.com/', 'https://coreyms.com/development/python/python-tutorial-venv-mac-linux-how-to-use-virtual-environments-with-the-built-in-venv-module#respond', 'https://coreyms.com/tag/mutable-default-arguments', 'https://www.youtube.com/user/schafer5', 'https://coreyms.com/tag/macos', 'https://coreyms.com/category/diy', 'https://coreyms.com/tag/vs-code', 'https://coreyms.com/tag/data', 'http://talkpython.fm/', 'https://coreyms.com/category/development/python', 'https://coreyms.com/category/diy/woodworking', 'https://coreyms.com/tag/ternary-operator', 'https://twitter.com/CoreyMSchafer', 'https://coreyms.com/tag/setattr', 'https://coreyms.com/tag/getattr', 'https://coreyms.com/tag/json', 'https://coreyms.com/development/python/10-python-tips-and-tricks-for-writing-better-code#respond', 'https://coreyms.com/tag/common-errors', 'https://coreyms.com/tag/macbook', 'https://coreyms.com/development/terminal/homebrew-tutorial-simplify-software-installation-on-mac-using-this-package-manager#respond', 'http://carasantamaria.com/podcast/', 'https://www.amazon.com/gp/product/1449355730/ref=as_li_tl?ie=UTF8&camp=1789&creative=9325&creativeASIN=1449355730&linkCode=as2&tag=coreyms-20&linkId=2f9ceaf471d7d35f2c2657051780fc6f', 'https://coreyms.com/development/python/python-tutorial-venv-windows-how-to-use-virtual-environments-with-the-built-in-venv-module', 'https://www.instagram.com/coreymschafer/', 'https://coreyms.com/category/development/git', 'https://coreyms.com/development/python/python-tutorial-venv-mac-linux-how-to-use-virtual-environments-with-the-built-in-venv-module', 'https://coreyms.com/tag/mac', 'https://coreyms.com/tag/development-machine', 'https://coreyms.com/development/python/visual-studio-code-mac-setting-up-a-python-development-environment-and-complete-overview#respond', 'https://coreyms.com/tag/venv', 'https://coreyms.com/tag/vscode', 'http://shoptalkshow.com/', 'https://coreyms.com/tag/getpass', 'https://coreyms.com/development/python/10-python-tips-and-tricks-for-writing-better-code', 'https://coreyms.com/tag/python-gotchas', 'https://coreyms.com/tag/sorting', 'https://coreyms.com/development/python/5-common-python-mistakes-and-how-to-fix-them-2', 'http://coreyms.com', 'https://coreyms.com/tag/common-mistakes', 'https://coreyms.com/tag/data-science', 'https://coreyms.com/support', 'https://coreyms.com/category/development/wordpress', 'https://coreyms.com/development/python/clarifying-the-issues-with-mutable-default-arguments', 'http://www.startalkradio.net/shows-archive/', 'https://coreyms.com/tag/enumerate', 'https://coreyms.com/development/python/visual-studio-code-windows-setting-up-a-python-development-environment-and-complete-overview#respond', 'https://coreyms.com/tag/visual-studio-code', 'https://coreyms.com/tag/development-environment', 'https://coreyms.com/page/3', 'https://www.amazon.com/gp/product/0201835959/ref=as_li_tl?ie=UTF8&camp=1789&creative=9325&creativeASIN=0201835959&linkCode=as2&tag=coreyms-20&linkId=c3de80ab4a4761f7634751cf323af13f', 'https://coreyms.com/tag/homebrew', 'https://coreyms.com/development/python/how-to-write-python-scripts-to-analyze-json-apis-and-sort-results#respond'}
In [105]:
#running javascript
from requests_html import HTML, HTMLSession
with open('simple2.html') as html_file:
    source = html_file.read()
    html = HTML(html=source)
    html.render()
match = html.find('#footer',first=True)
print(match.html)