Skip to content

Commit

Permalink
Create heaps_law.py
Browse files Browse the repository at this point in the history
  • Loading branch information
wunter committed Mar 3, 2016
1 parent fc52dc6 commit 2bd2da8
Showing 1 changed file with 57 additions and 0 deletions.
57 changes: 57 additions & 0 deletions heaps_law.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@

import re
import matplotlib
from matplotlib import pyplot
import pylab


def heaps_law (texto, scale = False, k = 10, b = 0.5):

'''
Calcular la ley de Heaps. Formula --> V = k* (n**b)
Donde V -> numero de palabras unicas, n -> total de palabras,
k -> variable con valor entre 10 y 100
b -> variable con valor entre 0.4 y 0.6
Los valores de k y b hay que optimizarlos para cada corpus o texto.
scale = cambiar a False para desactivar / 'log' escala logaritmica
'''
# Distribución del texto dado

unique_words = set()

heaps_data = []

for i, w in enumerate (texto):
unique_words.add(w)
heaps_data.append((i,len(unique_words)))

# Distribución ideal de Heaps --> V = k * (n**b)

heaps_ideal= [(n, k*(n**b)) for n in range(1,len(texto))]


# Plotting

unique,total = zip(*heaps_data)

u_ideal,t_ideal = zip(*heaps_ideal)

if scale:
pyplot.xscale(scale)
pyplot.yscale(scale)

pyplot.plot(unique, total, 'r-', label = "Distribución del texto")

pyplot.plot(u_ideal, t_ideal, "b-", label = "Distribución Ideal")

pylab.legend(loc='upper left')
pyplot.title('Ley de Heaps')
pyplot.xlabel('Numero de Palabras')
pyplot.ylabel('Palabras Unicas')

return pyplot.show()

0 comments on commit 2bd2da8

Please sign in to comment.