In [3]:
import numpy as np
from matplotlib import pyplot as plt
from collections import Counter
import csv
In [7]:
#Loading and Analysing data
with open("data.csv") as csv_file:
    csv_reader = csv.DictReader(csv_file)
    
    row = next(csv_reader)
    print(row)
    #we got list of languages for id 1
    print(row['LanguagesWorkedWith'].split(";"))
OrderedDict([('Responder_id', '1'), ('LanguagesWorkedWith', 'HTML/CSS;Java;JavaScript;Python')])
['HTML/CSS', 'Java', 'JavaScript', 'Python']
In [8]:
#Now let's count all the languages for all the data using Counter
with open("data.csv") as csv_file:
    csv_reader = csv.DictReader(csv_file)
    languages_counter = Counter()
    
    for row in csv_reader:
        languages_counter.update(row['LanguagesWorkedWith'].split(";"))
        
print(languages_counter)
Counter({'JavaScript': 59219, 'HTML/CSS': 55466, 'SQL': 47544, 'Python': 36443, 'Java': 35917, 'Bash/Shell/PowerShell': 31991, 'C#': 27097, 'PHP': 23030, 'C++': 20524, 'TypeScript': 18523, 'C': 18017, 'Other(s):': 7920, 'Ruby': 7331, 'Go': 7201, 'Assembly': 5833, 'Swift': 5744, 'Kotlin': 5620, 'R': 5048, 'VBA': 4781, 'Objective-C': 4191, 'Scala': 3309, 'Rust': 2794, 'Dart': 1683, 'Elixir': 1260, 'Clojure': 1254, 'WebAssembly': 1015, 'F#': 973, 'Erlang': 777})
In [9]:
#we can print the most common 15 using most_common method of Counter
print(languages_counter.most_common(15))
[('JavaScript', 59219), ('HTML/CSS', 55466), ('SQL', 47544), ('Python', 36443), ('Java', 35917), ('Bash/Shell/PowerShell', 31991), ('C#', 27097), ('PHP', 23030), ('C++', 20524), ('TypeScript', 18523), ('C', 18017), ('Other(s):', 7920), ('Ruby', 7331), ('Go', 7201), ('Assembly', 5833)]
In [10]:
languages=[]
popularity=[]

for item in languages_counter.most_common(15):
    languages.append(item[0])
    popularity.append(item[1])
In [11]:
print(languages)
print(popularity)
['JavaScript', 'HTML/CSS', 'SQL', 'Python', 'Java', 'Bash/Shell/PowerShell', 'C#', 'PHP', 'C++', 'TypeScript', 'C', 'Other(s):', 'Ruby', 'Go', 'Assembly']
[59219, 55466, 47544, 36443, 35917, 31991, 27097, 23030, 20524, 18523, 18017, 7920, 7331, 7201, 5833]
In [15]:
#plotting Horizontal bar Chart
plt.barh(languages,popularity)

plt.title("Most Popular Languages")
#plt.ylabel("Programming Languages")
plt.xlabel("Number of People Who Use")

plt.tight_layout()

plt.show()
In [16]:
#Better to show Most Popular Language First
languages.reverse()
popularity.reverse()

#plotting Horizontal bar Chart
plt.barh(languages,popularity)

plt.title("Most Popular Languages")
#plt.ylabel("Programming Languages")
plt.xlabel("Number of People Who Use")

plt.tight_layout()

plt.show()
In [18]:
#Better Way 
import pandas as pd
In [19]:
data = pd.read_csv('data.csv')
ids= data['Responder_id']
lang_responses= data['LanguagesWorkedWith']
In [20]:
languages_counter = Counter()
In [25]:
for response in lang_responses:
    languages_counter.update(response.split(";"))
In [26]:
languages=[]
popularity=[]
In [27]:
for item in languages_counter.most_common(15):
    languages.append(item[0])
    popularity.append(item[1])
In [28]:
print(languages)
print(popularity)
['JavaScript', 'HTML/CSS', 'SQL', 'Java', 'Bash/Shell/PowerShell', 'PHP', 'Swift', 'Python', 'C#', 'C++', 'TypeScript', 'C', 'Other(s):', 'Ruby', 'Go']
[146788, 143035, 135113, 123486, 119560, 110599, 93313, 36443, 27097, 20524, 18523, 18017, 7920, 7331, 7201]
In [29]:
#Better to show Most Popular Language First
languages.reverse()
popularity.reverse()

#plotting Horizontal bar Chart
plt.barh(languages,popularity)

plt.title("Most Popular Languages")
#plt.ylabel("Programming Languages")
plt.xlabel("Number of People Who Use")

plt.tight_layout()

plt.show()