In [1]:
#when you wanna compare two sets of values and wanna know how they correlate
#you use Scatter Plots
In [3]:
import pandas as pd
from matplotlib import pyplot as plt
In [4]:
x = [5, 7, 8, 5, 6, 7, 9, 2, 3, 4, 4, 4, 2, 6, 3, 6, 8, 6, 4, 1]
y = [7, 4, 3, 9, 1, 3, 2, 5, 2, 4, 8, 7, 1, 6, 4, 9, 7, 7, 5, 1]
In [11]:
#plot style
plt.style.use('seaborn')
#plotting and s for size, c for color
plt.scatter(x, y, s=100, c='green', marker='X', edgecolor='black', linewidth=1, alpha=0.75)
Out[11]:
<matplotlib.collections.PathCollection at 0x7f6cd31f9a90>
In [18]:
colors = [7, 5, 9, 7, 5, 7, 2, 5, 3, 7, 1, 2, 8, 1, 9, 2, 5, 6, 7, 5]
sizes = [209, 486, 381, 255, 191, 315, 185, 228, 174,
       538, 239, 394, 399, 153, 273, 293, 436, 501, 397, 539]
In [19]:
#plot style
plt.style.use('seaborn')
#plotting and s for size, c for color
plt.scatter(x, y, s=sizes, c=colors, cmap='Greens',
            edgecolor='black', linewidth=1, alpha=0.75)

cbar = plt.colorbar()
cbar.set_label("Satisfaction")
In [20]:
#plotting real world data [top 200 trending video from youtube total views, likes and ratio of likes to dislikes]

data = pd.read_csv('2019-05-31-data.csv')
view_count = data['view_count']
likes = data['likes']
ratio = data['ratio']
In [26]:
#Correlation of having more likes of a higher viewed video

plt.scatter(view_count, likes, c=ratio, cmap='summer',
            edgecolor='black', linewidth=1, alpha=0.750)
            
plt.xscale('log') 
plt.yscale('log')

plt.title('Trending YouTube Videos')
plt.xlabel('View Count')
plt.ylabel('Total Likes')

cbar = plt.colorbar()
cbar.set_label("Like/Dislike Ratio")

plt.tight_layout()

plt.show()