----------------------
from pattern.vector import Document, Model,  HIERARCHICAL


maker=open('cqrr/maker1').read()
orel=open('cqrr/oreally').read()
#theo=open('cqrr/theogony').read()

d1 = Document(maker, name='1')
d2 = Document(orel, name='2')
#d3 = Document(theo, name='2')

m = Model([d1, d2])
#m = Model([d2,d3])
m.reduce(2)

for d in m.documents:
    print
    print d.name
    for concept, w1 in m.lsa.vectors[d.id].items():
        for feature, w2 in m.lsa.concepts[concept].items():
            if w1 != 0 and w2 != 0:
                print (feature, w1 * w2)
                
                
                
print m.cluster(method=HIERARCHICAL, k=2)
 
-------------------------------------------------------





1
(u'consider', 0.112508790092602)
(u'random', 0.112508790092602)
(u'colors', 0.112508790092602)
(u'assembled', 0.112508790092602)
(u'lining', 0.112508790092602)
(u'program', 0.112508790092602)
(u'choose', 0.112508790092602)
(u'samuel', 0.112508790092602)
(u'lady', 0.112508790092602)
(u'leds', 0.112508790092602)
(u'disco', 0.112508790092602)
(u'clay', 0.112508790092602)
(u'futuristic', 0.112508790092602)
(u'led', 0.112508790092602)
(u'ribbon', 0.112508790092602)
(u'wire', 0.112508790092602)
(u'stitch', 0.112508790092602)
(u'pattern', 0.225017580185205)
(u'emotions', 0.112508790092602)
(u'hula', 0.112508790092602)
(u'silicone', 0.112508790092602)
(u'attention', 0.112508790092602)
(u'marilyn', 0.112508790092602)
(u'wait', 0.112508790092602)
(u'favorite', 0.112508790092602)
(u'maker', 0.112508790092602)
(u'hoop', 0.112508790092602)
(u'sparkles', 0.112508790092602)
(u'sheer', 0.112508790092602)
(u'create', 0.112508790092602)
(u'spiral', 0.112508790092602)
(u'time', 0.112508790092602)
(u'flexible', 0.112508790092602)
(u'glam', 0.112508790092602)
(u'flora', 0.112508790092602)
(u'look', 0.112508790092602)
(u'project', 0.112508790092602)
(u'hacking', 0.112508790092602)
(u'guide', 0.112508790092602)
(u'modern', 0.112508790092602)
(u'comfortable', 0.112508790092602)
(u'casings', 0.112508790092602)
(u'dress', 0.45003516037041)
(u'monroe', 0.112508790092602)
(u'dancing', 0.225017580185205)
(u'six', 0.112508790092602)
(u'wiring', 0.112508790092602)
(u'40', 0.112508790092602)
(u'incredibly', 0.112508790092602)
(u'strips', 0.225017580185205)
(u'raindrop', 0.112508790092602)
(u'sewing', 0.112508790092602)
(u'light', 0.112508790092602)
(u'fashions', 0.112508790092602)
(u'crafty', 0.112508790092602)

2
(u'spreads', 0.156173761888606)
(u'galvanizing', 0.156173761888606)
(u'amplifying', 0.156173761888606)
(u'knowledge', 0.156173761888606)
(u'signals', 0.156173761888606)
(u'adoption', 0.156173761888606)
(u'homing', 0.156173761888606)
(u'trends', 0.156173761888606)
(u'books', 0.156173761888606)
(u'research', 0.156173761888606)
(u'creating', 0.156173761888606)
(u'active', 0.156173761888606)
(u'community', 0.156173761888606)
(u'magazines', 0.156173761888606)
(u'faint', 0.156173761888606)
(u'innovators', 0.156173761888606)
(u'catalyst', 0.156173761888606)
(u'advocacy', 0.156173761888606)
(u'matter', 0.156173761888606)
(u'future', 0.156173761888606)
(u'history', 0.156173761888606)
(u'conferences', 0.156173761888606)
(u'geeks', 0.156173761888606)
(u'participant', 0.156173761888606)
(u'technology', 0.468521285665818)
(u'online', 0.156173761888606)
(u'development', 0.156173761888606)
(u'services', 0.156173761888606)
(u'alpha', 0.156173761888606)
(u'evangelism', 0.156173761888606)
(u'1978', 0.156173761888606)
(u'tech', 0.156173761888606)
(u'chronicler', 0.156173761888606)
Cluster([Document(id='P21N9t2-2', name='2'), Document(id='P21N9t2-1', name='1')])