# Word Vectors Assignment Solution 2020

## PPMI

Given data

In [24]:
import numpy as np
from math import log

contexts = "global_JJ classic_JJ ancient_JJ liberal_JJ".split()
words = "politician_NN agenda_NN conservative_NN liberal_NN".split()

counts = np.array([
    [0,5,0,3],
    [1,1,1,4],
    [0,4,0,1],
    [1,6,0,1]
    ])

(nrows,ncols) = counts.shape

Some printing code

In [25]:
# Constant for printing nice
centering_constant = max(len(w) for w in words) + 4

def print_count_table (banner, words, word_counts):
    print(f'{banner:^{centering_constant}}')
    print('=' * centering_constant)
    for (i, w) in enumerate(words):
        print(f'{w:<16} {word_counts[i]:2}')    

Adding up the counts

In [26]:
# Total number of context events
N = counts.sum()
# Counts for all the words
word_counts = counts.sum(axis=1)
# Counts for all thw contexts
context_counts = counts.sum(axis=0)

Now compute the **joint probs**, dividing by N:

In [27]:
# Joint probs, divide everywhere by N
jps = counts/N

Print out event counts and joint probs.

In [28]:
print('N: ',N)
print()
print('Counts')
for i in range(nrows):
    print(f'{words[i]:<16} {counts[i,:]}')
#print(counts)
print()
print("Joint probs")
for i in range(nrows):
    print(f'{words[i]:<16} {jps[i,:]}')
#print(jps)
print(jps.sum())

N:  28

Counts
politician_NN    [0 5 0 3]
agenda_NN        [1 1 1 4]
conservative_NN  [0 4 0 1]
liberal_NN       [1 6 0 1]

Joint probs
politician_NN    [0.         0.17857143 0.         0.10714286]
agenda_NN        [0.03571429 0.03571429 0.03571429 0.14285714]
conservative_NN  [0.         0.14285714 0.         0.03571429]
liberal_NN       [0.03571429 0.21428571 0.         0.03571429]
0.9999999999999998


So in particular,  p(w = politician NN, c = liberal JJ) = .075.

Print out the word/context counts  (sum rows and columns)

In [29]:

print_count_table('Words', words, word_counts)
print()   
print_count_table('Contexts', contexts, context_counts)

       Words       
politician_NN     8
agenda_NN         7
conservative_NN   5
liberal_NN        8

     Contexts      
global_JJ         2
classic_JJ       16
ancient_JJ        1
liberal_JJ        9


### Compute Word, Context Probs, and PPMI

In [30]:
def word_prob(i):
    return word_counts[i]/N


def ctxt_prob(j):
    return context_counts[j]/N



So in particular:

In [31]:
word_prob(words.index('politician_NN'))

0.2857142857142857

In [32]:
ctxt_prob(contexts.index('liberal_JJ'))

0.32142857142857145

PMI values

In [33]:
def pmi (i,j):
    # Actual mle joint_prob
    joint_prob = jps[i,j]
    # Prob if the two words were independent
    ind_prob = (word_prob(i)*ctxt_prob(j))
    if jps[i,j] > 0:
        return log ((joint_prob/ind_prob), 2)
    else:
        # Returning 0 for undefined values
        return 0.0

Print out PMI values, just to show the negative values.

In [34]:
for i in range(nrows):
    for j in range (ncols):
        print(f'{words[i]:<18} {contexts[j]:<12} {pmi(i,j):> 5.3f}')
    print()

politician_NN      global_JJ     0.000
politician_NN      classic_JJ    0.129
politician_NN      ancient_JJ    0.000
politician_NN      liberal_JJ    0.222

agenda_NN          global_JJ     1.000
agenda_NN          classic_JJ   -2.000
agenda_NN          ancient_JJ    2.000
agenda_NN          liberal_JJ    0.830

conservative_NN    global_JJ     0.000
conservative_NN    classic_JJ    0.485
conservative_NN    ancient_JJ    0.000
conservative_NN    liberal_JJ   -0.684

liberal_NN         global_JJ     0.807
liberal_NN         classic_JJ    0.392
liberal_NN         ancient_JJ    0.000
liberal_NN         liberal_JJ   -1.363



Compute the PPMis, saving them in the array named `ppmis` for the next part of the homework.

In [17]:
def ppmi (i,j):
    return max(pmi(i,j), 0)

ppmis = np.zeros((nrows,ncols))

for i in range(nrows):
    for j in range (ncols):
        ppmis[i,j] = ppmi(i,j)

Print out the PPMi values.

In [21]:
for i in range(nrows):
    wv = f'{words[i]:<16}'
    print(wv, end = '')
    fill = ' ' * len(wv)
    for j in range (ncols):
        print(f"{'' if j == 0 else fill} {contexts[j]:<12} {ppmis[i,j]:> 5.3f}")
    print()

politician_NN    global_JJ     0.000
                 classic_JJ    0.556
                 ancient_JJ    0.000
                 liberal_JJ    0.447

agenda_NN        global_JJ     0.000
                 classic_JJ    0.000
                 ancient_JJ    0.193
                 liberal_JJ    1.055

conservative_NN  global_JJ     0.000
                 classic_JJ    0.913
                 ancient_JJ    0.000
                 liberal_JJ    0.000

liberal_NN       global_JJ     0.000
                 classic_JJ    0.819
                 ancient_JJ    0.000
                 liberal_JJ    0.000

cabal_NN         global_JJ     1.252
                 classic_JJ    0.000
                 ancient_JJ    1.415
                 liberal_JJ    0.000



So in particular,  the PPMI value in the problem:

```
agenda_NN      global_JJ    1.0
```

## Cosine

In [36]:
def vec_length (wvec):
    # Takes a word vector as argument, returns its Euclidean length
    return pow((wvec**2).sum(),0.5)

def normalized_vec (wvec):
    # Divide a vector by its length returning a "normalized"
    # vector of length 1
    return (wvec/vec_length(wvec))

def cosine (vec1, vec2):
    return normalized_vec(vec1).dot(normalized_vec(vec2))

Cosine using count vectors

In [37]:
liberal_vec = counts[words.index('liberal_NN'),:]
agenda_vec = counts[words.index('agenda_NN'),:]
print('Cos from Counts', cosine(liberal_vec,agenda_vec))

Cos from Counts 0.4093776101606328


In [39]:
liberal_ppmi_vec = ppmis[words.index('liberal_NN'),:]
agenda_ppmi_vec = ppmis[words.index('agenda_NN'),:]
print('Cos from PPMis', cosine(liberal_ppmi_vec, agenda_ppmi_vec))

Cos from PPMis 0.0


In [40]:
liberal_ppmi_vec 

array([0.        , 0.81942775, 0.        , 0.        ])

In [41]:
agenda_ppmi_vec 

array([0.        , 0.        , 0.19264508, 1.05514155])

## Additional questions

1.  If you really wanted to know whether the probability of word `i`
    was independent of the presence of word `j`, which would be better to     use, PMI or PPMI?  Why?
2.  Suppose `P(w = i | c = j)` = 2 * `P(w = i)` and suppose neither `P(w = i)` nor `P(c = j)` is equal to 0. Can  the PPMI value of target word `i` and context word 
`j` be 0?  Explain.  Use an example with made-up counts, if it  helps.