word2vec embeddings.xml (1704B)
1 <!-- 2 import gensim.downloader 3 import sklearn.decomposition 4 from xml.etree import ElementTree 5 TARGET_WORDS = ["Paris","France","Madrid","Spain","Italy","Rome","Germany","Berlin"] 6 data = gensim.downloader.load('word2vec-google-news-300') 7 source = data[TARGET_WORDS] 8 pca = sklearn.decomposition.PCA(n_components=2, svd_solver='full') 9 target = pca.fit_transform(source) 10 root = ElementTree.Element("embeddings") 11 for word, vector in zip(TARGET_WORDS, target): 12 embedding = ElementTree.SubElement(root, "embedding") 13 ElementTree.SubElement(embedding, "x").text = str(vector[0]) 14 ElementTree.SubElement(embedding, "y").text = str(vector[1]) 15 ElementTree.SubElement(embedding, "label").text = str(word) 16 explained = ElementTree.SubElement(root, "explained") 17 ElementTree.SubElement(explained, "x").text = str(pca.explained_variance_ratio_[0]) 18 ElementTree.SubElement(explained, "y").text = str(pca.explained_variance_ratio_[1]) 19 20 tree = ElementTree.ElementTree(root) 21 tree.write("word2vec embeddings.xml") 22 --> 23 <embeddings><embedding><x>1.0263773</x><y>0.23883666</y><label>Paris</label></embedding><embedding><x>-0.947096</x><y>0.05958702</y><label>France</label></embedding><embedding><x>0.93604654</x><y>-1.3953391</y><label>Madrid</label></embedding><embedding><x>-0.873475</x><y>-1.1731068</y><label>Spain</label></embedding><embedding><x>-1.005623</x><y>-0.33860308</y><label>Italy</label></embedding><embedding><x>1.3307737</x><y>-0.19644451</y><label>Rome</label></embedding><embedding><x>-1.2115865</x><y>1.060732</y><label>Germany</label></embedding><embedding><x>0.7445842</x><y>1.7443377</y><label>Berlin</label></embedding><explained><x>0.27626586</x><y>0.25357923</y></explained></embeddings>