Showing posts with label histogram. Show all posts
Showing posts with label histogram. Show all posts

Matplotlib - plotting a dictionary using matplot lib



 # Dictionary - Histograms and matplotlib


# Create a histogram based on the frequency of characters in given string


# Histogram - String - Word Frequency


import matplotlib.pyplot as plt


import operator


string1 = "An algorithm is a step-by-step procedure to solve a given problem. It is a well-defined computational procedure that takes some values as input, manipulates them using a set of instructions and produces some values as output and terminates in a finite amount of time. An algorithm, when formally written in a programming language is called a program, or code. Derivation of an algorithm that solves the problem and conversion of the algorithm into code, together,  is known as Algorithmic Problem Solving."



string1 = string1.lower()


ch = set(string1)


D = {c:string1.count(c) for c in ch if c.isalpha()}  


print("Histogram")

print("Length of dictionary = ", len(D))

print(D)


plt.bar(range(len(D)), list(D.values()), align='center')

plt.xticks(range(len(D)), list(D.keys()))


plt.xlabel('Character')

plt.ylabel('Frequency')

plt.title('Character Frequency in given string.')


plt.show()


"""

Output

>python matplot.py

Histogram

Length of dictionary =  22

{'k': 2, 'e': 40, 'n': 30, 'g': 14, 's': 26, 'u': 14, 'o': 38, 'i': 32, 't': 37, 'r': 25, 'l': 22, 'b': 4, 'w': 4, 'f': 7, 'p': 14, 'v': 8, 'a': 39, 'y': 2, 'm': 20, 'd': 12, 'c': 10, 'h': 12}


"""

Dictionary - Histogram - character frequency of a string

 # Dictionary - Histograms

# Create a histogram based on the frequency of characters in given string

# Histogram - String - Word Frequency


import operator


string1 = "An algorithm is a step-by-step procedure to solve a given problem. It is a well-defined computational procedure that takes some values as input, manipulates them using a set of instructions and produces some values as output and terminates in a finite amount of time. An algorithm, when formally written in a programming language is called a program, or code. Derivation of an algorithm that solves the problem and conversion of the algorithm into code, together,  is known as Algorithmic Problem Solving."


string1 = string1.lower()


ch = set(string1)


D = {c:string1.count(c) for c in ch if c.isalpha()}  


print("Histogram")

print("Length of dictionary = ", len(D))

print(D)


DSorted = sorted(D.items(), key=operator.itemgetter(1), reverse=True)


D_max3 = dict(DSorted[:3])

print("Most frequently used 3 characters:")

print(D_max3)


"""

Sample output

>python Dict_Hist.py

Histogram

Length of dictionary =  22

{'t': 37, 'a': 39, 'w': 4, 'l': 22, 'g': 14, 'h': 12, 'o': 38, 'u': 14, 's': 26, 'i': 32, 'm': 20, 'e': 40, 'd': 12, 'c': 10, 'r': 25, 'v': 8, 'n': 30, 'b': 4, 'y': 2, 'p': 14, 'k': 2, 'f': 7}

Most frequently used 3 characters:

{'e': 40, 'a': 39, 'o': 38}

"""

PYTHON ASSIGNMENT 7: MISCELLANEOUS PROGRAMS

Operators and Control Structures

Swap two numbers

Circulate the values of n variables

Find GCD of two numbers

Distance between two points

Square root of a number using Newton’s method 

Exponentiation of a number 

First N prime numbers



List

Maximum in a given list of numbers

Sum of elements in a list

Matrix multiplication


Search

Linear search

Binary search


Sort

Insertion sort

Selection sort

Merge sort


Dictionaries : Histograms

Read a string and find the 5 most frequent characters

Read a string and find the 3 most frequent words


File operations:

File copy

Merge two files

Read a file name from command line and count the number of lines, words and characters in the file.

Read a file and create a dictionary for the frequency of words in the file

Read a file and create a dictionary for the frequency of characters in the file.

File handling 18: Files and dictionary, Histogram, character frequency

# Write a program that creates a dictionary
# of the frequency of all characters in a file.
# display five most frequently occuring characters

# Files, Dictionary and Command line arguments
# Histogram - Character frequency

import sys
import operator

fname = sys.argv[1]
fs = open(fname, "r")

Char_freq = {}

while(True):
    Line = fs.readline().strip().lower()
   
    if not Line:
        break
   
    for ch in Line:
        if (ch.isalpha()):
            if ch in Char_freq:
                Char_freq[ch] += 1
            else:
                Char_freq[ch] = 1
           
for ch in Char_freq:
    print(ch, Char_freq[ch])

List1 = sorted(Char_freq.items(), key=operator.itemgetter(1), reverse=True)

print("Five most frequently occuring characters :")
for k,v in List1[0:5]:
    print(k,v)

fs.close()

File Handling 17: Files and Dictionary: Histogram, Word Frequency, remove noise words

# Write a program that creates a dictionary
# of the frequency of all words in a file.

# Files, Dictionary and Command line arguments
# Histogram - Word frequency

import sys

fname = sys.argv[1]
fs = open(fname, "r")

Word_freq = {}

while(True):
new_Line = ""
Line = fs.readline().strip().lower()

if not Line:
break

for ch in Line:
if (ch.isalpha() or ch.isspace()):
new_Line+=ch
L = new_Line.split(" ")

for wd in L:
if wd in Word_freq:
Word_freq[wd] += 1
else:
Word_freq[wd] = 1

for ele in Word_freq:
print(ele.ljust(15), Word_freq[ele])

print("Number of unique words in file = ", len(Word_freq))

fs.close()




# Write a program that creates a dictionary
# of the frequency of all words in a file.
# Remove noise words in file like a, an, the etc

# Files, Dictionary and Command line arguments
# Histogram - Word frequency

import sys

fname = sys.argv[1]
fs = open(fname, "r")

Word_freq = {}

Noise_words = ['an', 'as', 'of', 'it','by',  'to', 'so', 'do', 'be', 'up', 'on', 'ie', 'its', 'are', 'all', 'has', 'can', 'how', 'end', 'any', 'may', 'for', 'will', 'use', 'one', 'two', 'the', 'also', 'have', 'this', 'that', 'what', 'where', 'when', 'then', 'those', 'from', 'once', 'more', 'most' ]

while(True):
new_Line = ""
Line = fs.readline().strip().lower()

if not Line:
break

for ch in Line:
if (ch.isalpha() or ch.isspace()):
new_Line+=ch
L = new_Line.split(" ")

for wd in L:
if (len(wd)<=1 or wd in Noise_words):
continue

if wd in Word_freq:
Word_freq[wd] += 1
else:
Word_freq[wd] = 1

for ele in Word_freq:
print(ele.ljust(15), Word_freq[ele])

print("Number of unique words in file = ", len(Word_freq))

fs.close()

Histogram - String - Word frequency

# Histogram - String - Word Frequency

import operator

string1 = "An algorithm is a step-by-step procedure to solve a given problem. It is a well-defined computational procedure that takes some values as input, manipulates them using a set of instructions and produces some values as output and terminates in a finite amount of time. An algorithm, when formally written in a programming language is called a program, or code. Derivation of an algorithm that solves the problem and conversion of the algorithm into code, together,  is known as Algorithmic Problem Solving."
string1 = string1.lower()

string1 = string1.replace(".", "")
string1 = string1.replace(",", "")
string1 = string1.replace("-", " ")

List1 = string1.split(" ")

dict1 = {}
for ele in List1:
    if ele in dict1:
        dict1[ele] += 1
    else:
        dict1[ele] = 1
print("Histogram")
print("Length of dictionary = ", len(dict1))
print(dict1)

List2 = sorted(dict1.items(), key=operator.itemgetter(1), reverse=True)

dict2 = dict(List2[:3])
print("Most frequently used 3 words:")
print(dict2)

Histogram - String - Character frequency

# Histogram - Character frequency

import operator

string1 = "An algorithm is a step-by-step procedure to solve a given problem. It is a well-defined computational procedure that takes some values as input, manipulates them using a set of instructions and produces some values as output and terminates in a finite amount of time. An algorithm, when formally written in a programming language is called a program, or code. Derivation of an algorithm that solves the problem and conversion of the algorithm into code, together,  is known as Algorithmic Problem Solving."
string1 = string1.lower()

dict1 = {}
for idx in range(97, 123, 1):
    count = string1.count(chr(idx))
    if(count != 0):
        dict1[chr(idx)] = count

print("Histogram")
print(dict1)


List1 = sorted(dict1.items(), key=operator.itemgetter(1), reverse=True)
dict2 = dict(List1[:5])
print("Most frequent 5 characters")
print(dict2)