NDP Songs Lyrics Analysis¶

Installation¶

Before staring work, download packages.

pip install numpy scipy matplotlib ipython scikit-learn pandas wordcloud nltk gensim bokeh

Then, download nltk stopwords package.

> python
$ import nltk
$ nltk.download(‘stopwords’)

import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics.pairwise import cosine_similarity
from nltk.corpus import stopwords
from nltk.stem.snowball import SnowballStemmer
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import matplotlib
from matplotlib import pyplot as plt
import os

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

Loading and inspecting data¶

data = pd.read_csv("./songs.csv")
data

Feature Engineering¶

Removing punctions & white spaces¶

def formatted_text(text):
    '''a function for removing punctuation & whitespace characters'''
    import string
    # replacing the punctuations with no space, 
    # which in effect deletes the punctuation marks 
    translator = str.maketrans('', '', string.punctuation)
    # return the text stripped of punctuation marks
    result = text.translate(translator)
    
    #result = result.replace("\n","")
    return result.lower()

data['lyrics'] = data['lyrics'].apply(formatted_text)
data['lyrics']

0     stand up for singapore do the best you can\nre...
1     we have a vision for tomorrow just believe jus...
2     there was a time when people said\nthat singap...
3     weve built a nation with our hands\n the toil ...
4     whenever i am feeling low\n i look around me a...
5     let us be together aspire to achieve\n no matt...
6     the moment when i was born\n into this world i...
7     morning comes around and i\n cant wait to see ...
8     remember the days we set out together with fai...
9     one singapore\n one people strong and free\n w...
10    whenever i am feeling low\n i look around me a...
11    at a time when hope was low\n the journey seem...
12    this is my home\n shes everything to me\n grac...
13    ive walked the streets of cairo and bombay\n i...
14    will you make this island\n amazing in all way...
15    have you seen a star\n one that guides you no ...
16    theres a jewel on the ocean\n a gem upon the s...
17    its a brand new day a brand new story\n i reme...
18    i have a dream of starting a life \n i have a ...
19    first light\n rolls across my peaceful isle\n ...
20    i will be here right by your side here\ni will...
21    it isnt easy building something out of nothing...
22    raise your head to the skies\n this is how we ...
23    step by step\n together well build our dreams\...
24    how many times have you heard them say\n the f...
25    remember the days we set out together with fai...
Name: lyrics, dtype: object

Apply the function to each examples¶

def length(text):    
    '''a function which returns the length of text'''
    return len(str(text))
data['length'] = data['lyrics'].apply(length)
data

Histogram of text lenght of each song lyric¶

# matplotlib.rcParams['figure.figsize'] = (12.0, 6.0)
bins = 500

plt.hist(list(data['length']), alpha = 1, label='Songs')

plt.xlabel('length')
plt.ylabel('numbers')
plt.grid()


plt.legend(loc='upper right')

plt.show()

Removing Stopwords¶

# extracting the stopwords from nltk library
sw = stopwords.words('english')
# displaying the stopwords
np.array(sw)

print("Number of stopwords: ", len(sw))

Number of stopwords:  179

def stopwords(text):
    '''a function for removing the stopword'''
    # removing the stop words and lowercasing the selected words
    text = [word.lower() for word in text.split() if word.lower() not in sw]
    # joining the list of words with space separator
    return " ".join(text)

data['lyrics'] = data['lyrics'].apply(stopwords)

Stemming¶

Collect vocabulary count¶

# create a count vectorizer object
count_vectorizer = CountVectorizer()
# fit the count vectorizer using the text data
count_vectorizer.fit(data['lyrics'])
# collect the vocabulary items used in the vectorizer
dictionary = count_vectorizer.vocabulary_.items()

# lists to store the vocab and counts
vocab = []
count = []
# iterate through each vocab and count append the value to designated lists
for key, value in dictionary:
    vocab.append(key)
    count.append(value)
# store the count in panadas dataframe with vocab as index
vocab_bef_stem = pd.Series(count, index=vocab)

# sort the dataframe
vocab_bef_stem = vocab_bef_stem.sort_values(ascending=False)

top_vacab = vocab_bef_stem.head(10)
top_vacab

youve       595
youre       594
young       593
youll       592
you         591
yo          590
yet         589
years       588
yearning    587
yeah        586
dtype: int64

# create an object of stemming function
stemmer = SnowballStemmer("english")

def stemming(text):    
    '''a function which stems each word in the given text'''
    text = [stemmer.stem(word) for word in text.split()]
    return " ".join(text)

data['lyrics'] = data['lyrics'].apply(stemming)

top_vacab

youve       595
youre       594
young       593
youll       592
you         591
yo          590
yet         589
years       588
yearning    587
yeah        586
dtype: int64

Word Counting¶

Word Cloud¶

%%time

text = " ".join(e for e in data['lyrics'])

wordcloud = WordCloud(max_font_size=50, max_words=100, background_color="white", width=700).generate(text)
# Save image file 
wordcloud.to_file("./bag_of_words.png")

# Display the generated image:
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()

CPU times: user 790 ms, sys: 58.1 ms, total: 848 ms
Wall time: 689 ms

Bag of Words¶

# lists to store the vocab and counts
vocab = []
count = []
# iterate through each vocab and count append the value to designated lists
for key, value in dictionary:
    vocab.append(key)
    count.append(value)
# store the count in panadas dataframe with vocab as index
vocab_after_stem = pd.Series(count, index=vocab)

# sort the dataframe
vocab_after_stem = vocab_after_stem.sort_values(ascending=False)

print("total vacab is", len(vocab_after_stem))

total vacab is 596

count = CountVectorizer()
bag_of_words = count.fit_transform(np.array(data['lyrics']))
# Show feature matrix
bag_of_words.toarray()
# Get feature names
feature_names = count.get_feature_names()
print("feature_names", len(feature_names))

bag_of_wrods_dataFrame = pd.DataFrame(bag_of_words.toarray(), columns=feature_names)

pd.concat([data.year, bag_of_wrods_dataFrame], axis=1, sort=False)

feature_names 504

The most used word in total lyrics¶

mostly_used_word = bag_of_wrods_dataFrame.sum(axis=0).sort_values(ascending=False)
mostly_used_word_dataFrame = pd.DataFrame(data=mostly_used_word, columns=["count"])
mostly_used_word_dataFrame.head(20)

Pick top words in bag of words¶

# Singapore, Together, One, Dream, Home
pd.concat([data.year, bag_of_wrods_dataFrame.singapor, bag_of_wrods_dataFrame.togeth, bag_of_wrods_dataFrame.one, bag_of_wrods_dataFrame.dream, bag_of_wrods_dataFrame.home], axis=1, sort=False)

Song Lyrics Similiarity¶

TF-IDF Extraction¶

Lyrics Similiarity¶

# create the object of tfid vectorizer
tfid_vectorizer = TfidfVectorizer(analyzer='word', min_df = 0, stop_words = 'english')
# fit the vectorizer using the text data
tfid_vectorizer.fit(data['lyrics'])
# collect the vocabulary items used in the vectorizer
dictionary = tfid_vectorizer.vocabulary_.items()  

# extract the tfid representation matrix of the text data
tfid_matrix = tfid_vectorizer.transform(data['lyrics'])
# collect the tfid matrix in numpy array
array = tfid_matrix.todense()

# store the tf-idf array into pandas dataframe
similiarity_df = pd.DataFrame(array, columns=tfid_vectorizer.get_feature_names())

pd.concat([data.year, similiarity_df], axis=1, sort=False)

Use Scikit learn Cosine Similarity function to compare the first document i.e. Document 0 with the other Documents in Corpus.

Find songs with similar with Because it's Singapore

song_number = 24
current_song = tfid_matrix[song_number-1:song_number]
similarity = cosine_similarity(tfid_matrix[song_number-1:song_number].toarray(), tfid_matrix.toarray())

print("Song: ", data['title'][song_number-1])

# sort the dataframe
similarity_dataFrame = pd.DataFrame(data=similarity.flatten(), columns=["cosine_similarity"])
similarity_dataFrame['title'] = data['title']
similarity_dataFrame['year'] = data['year']

similarity_dataFrame.sort_values(by=['cosine_similarity'], ascending=False)

Song:  Because it's Singapore

Using TSNE to Plot a Subset of Similar Words from Word2Vec¶

current_directory = os.getcwd()
final_directory = os.path.join(current_directory, r'word2vec')
if not os.path.exists(final_directory):
   os.makedirs(final_directory)

import gensim
from gensim.models import Phrases
from gensim.models.word2vec import LineSentence
from gensim import corpora, models
from gensim.models import LdaMulticore
from gensim.models import Word2Vec
from gensim.corpora import Dictionary, MmCorpus


data = pd.read_csv("./songs.csv")

all_sentences_normalized_filepath = 'word2vec/all_lyrics_text.txt'

with open(all_sentences_normalized_filepath) as foo:
    lines = len(foo.readlines())
    print("total lines: ", lines)

total lines:  726

with open(all_sentences_normalized_filepath, 'w', encoding='utf-8') as f:
    c = len(data.lyrics.values)
    for lyrics in data.lyrics.values:
        if pd.isnull(lyrics): # if there is null, go to next
            continue
        f.write(formatted_text(lyrics) + '\n')

%%time
USE_PREMADE_BIGRAM_MODEL = False
all_bigram_model_filepath = 'word2vec/all_bigram_model'

all_unigram_sentences = LineSentence(all_sentences_normalized_filepath)
if not USE_PREMADE_BIGRAM_MODEL:    
    
    all_bigram_model = Phrases(all_unigram_sentences) #check whether phrase or not
    all_bigram_model.save(all_bigram_model_filepath)
    
else:
    all_bigram_model = Phrases.load(all_bigram_model_filepath)

CPU times: user 12.9 ms, sys: 2.04 ms, total: 14.9 ms
Wall time: 13.5 ms

print(all_sentences_normalized_filepath)

word2vec/all_lyrics_text.txt

%%time

USE_PREMADE_BIGRAM_SENTENCES = False

all_bigram_sentences_filepath = 'word2vec/all_sentences_for_word2vec.txt'

if not USE_PREMADE_BIGRAM_SENTENCES:
    
    with open(all_bigram_sentences_filepath, 'w', encoding='utf-8') as f:
        for unigram_sentence in all_unigram_sentences:
            all_bigram_sentence = all_bigram_model[unigram_sentence]
            f.write(' '.join(all_bigram_sentence) + '\n')
else:
    assert path.exists(all_bigram_sentences_filepath)

CPU times: user 213 ms, sys: 14.7 ms, total: 228 ms
Wall time: 1.18 s

%%time

USE_PREMADE_WORD2VEC = False

all2vec_filepath = 'word2vec/all_word2vec_model'

if not USE_PREMADE_WORD2VEC:
    lyrics_for_word2vec = LineSentence(all_bigram_sentences_filepath)
    all2vec = Word2Vec(lyrics_for_word2vec, size=100, window=5, min_count=1, sg=1)
    for _ in range(25):
        all2vec.train(lyrics_for_word2vec, total_examples=675, epochs=30)
    all2vec.save(all2vec_filepath)
else:
    all2vec = Word2Vec.load(all2vec_filepath)
all2vec.init_sims()

CPU times: user 25.6 s, sys: 1.24 s, total: 26.8 s
Wall time: 28.2 s

all2vec_filepath = 'word2vec/all_word2vec_model'
all2vec = Word2Vec.load(all2vec_filepath)

wv_dataFrame = pd.DataFrame(all2vec.wv.index2word)
wv_dataFrame.head()

Find the most smiliar words with 'singapore'¶

all2vec.wv.most_similar(positive=['singapore'], topn=20)

[('can_achieve', 0.47907426953315735),
 ('singaporeans', 0.4682055413722992),
 ('taken', 0.4670264720916748),
 ('forevermore', 0.46100330352783203),
 ('granted', 0.4527587592601776),
 ('lies', 0.4477716088294983),
 ('sometimes', 0.4453321695327759),
 ('goal', 0.4383291006088257),
 ('smile', 0.4373680651187897),
 ('stand_up', 0.43636220693588257),
 ('ohohohohohohoh', 0.4358106553554535),
 ('thankful', 0.43003904819488525),
 ('hands', 0.4268384575843811),
 ('winding', 0.4247167706489563),
 ('best', 0.41068035364151),
 ('gone', 0.3935614228248596),
 ('things', 0.39219731092453003),
 ('we’ve', 0.3921252489089966),
 ('heartbeat', 0.38764864206314087),
 ('more', 0.38321107625961304)]

all2vec_dic = dict({})
for idx, key in enumerate(all2vec.wv.vocab):
    all2vec_dic[key] = all2vec.wv[key].tolist()
    
all2vec_dataFrame = pd.DataFrame.from_dict(all2vec_dic).T
all2vec_dataFrame.head()

%%time

from sklearn.manifold import TSNE
import pickle

USE_PREMADE_TSNE = False

tsne_filepath = 'example/tsne.pkl'

if not USE_PREMADE_TSNE:
    
    tsne = TSNE(random_state=0)
    tsne_points = tsne.fit_transform(all2vec_dataFrame)
    with open(tsne_filepath, 'wb') as f:
        pickle.dump(tsne_points, f)
else:
    with open(tsne_filepath, 'rb') as f:
        tsne_points = pickle.load(f)

tsne_df = pd.DataFrame(tsne_points, index=all2vec_dataFrame.index, columns=['x_coord', 'y_coord'])
tsne_df['word'] = tsne_df.index

CPU times: user 4.63 s, sys: 31.8 ms, total: 4.66 s
Wall time: 4.69 s

tsne_df.head()

from bokeh.plotting import figure, show, output_notebook

from bokeh.models import HoverTool, ColumnDataSource, LabelSet, value


# prepare the data in a form suitable for bokeh.
plot_data = ColumnDataSource(data=tsne_df)

# create the plot and configure it
tsne_plot = figure(title='t-SNE Word Embeddings',
                   plot_width = 800,
                   plot_height = 800,
                   active_scroll='wheel_zoom'
                  )

tsne_plot.circle('x_coord', 'y_coord', source=plot_data,
                 color='red', line_alpha=0.2, fill_alpha=0.1,
                 size=10, hover_line_color='orange')

labels = LabelSet(x='x_coord', y='y_coord', text='word', level='glyph',
              x_offset=5, y_offset=5, source=plot_data, render_mode='canvas')


# adjust visual elements of the plot
tsne_plot.title.text_font_size = value('16pt')
tsne_plot.xaxis.visible = False
tsne_plot.yaxis.visible = False
tsne_plot.grid.grid_line_color = None
tsne_plot.outline_line_color = None

tsne_plot.add_layout(labels)

# show time
output_notebook()
show(tsne_plot);

	year	title	composer	writer	singer	lyrics
0	1984	Stand Up for Singapore	Hugh Harrison	Hugh Harrison	NaN	Stand up for Singapore, do the best you can\nR...
1	1986	Count on Me, Singapore	Hugh Harrison	Hugh Harrison	Clement Chow	We have a vision for tomorrow, just believe, j...
2	1987	We are Singapore	Hugh Harrison	Hugh Harrison	Jonathan Tan Teck Meng, Roslinda Baharudin, Ro...	There was a time when people said\nThat Singap...
3	1990	One People, One Nation, One Singapore	Jeremy Monteiro	Jim Aitchison	NaN	We've built a nation with our hands\n The toil...
4	1998	Home	Dick Lee	Dick Lee	Kit Chan	Whenever I am feeling low\n I look around me a...
5	1999	Together	Ken Lim	Ken Lim	Evelyn Tan and Dreamz FM	Let us be together aspire to achieve\n No matt...
6	2000	Shine on Me	Jim Lim	Jim Lim	Mavis Hee and Jai Wahab	The moment when I was born\n Into this world, ...
7	2001	Where I Belong	Ken Lim	Ken Lim	Tanya Chua	Morning comes around and I\n Can't wait to see...
8	2002	We Will Get There	Dick Lee	Dick Lee	Stefanie Sun	Remember the days, we set out together with fa...
9	2003	One United People	Joshua Wan	Joshua Wan	Stefanie Sun	One Singapore\n One people strong and free\n W...
10	2004	Home remix	Dick Lee	Dick Lee	Kit Chan and JJ Lin	Whenever I am feeling low\n I look around me a...
11	2005	Reach Out for the Skies	Elaine Chan	Selena Tan	Taufik Batisah and Rui En	At a time when hope was low\n The journey seem...
12	2006	My Island Home	Joshua Wan	Joshua Wan	Kaira Gong	This is my home\n She's everything to me\n Gra...
13	2007	There’s No Place I’d Rather Be	Jimmy Ye	Jimmy Ye	Kit Chan	I've walked the streets of Cairo and Bombay\n ...
14	2007	Will you	Jimmy Ye	Jimmy Ye	Janani Sridhar, Asha Edmund, Emma Yong, Lily A...	Will you make this island\n Amazing in all way...
15	2008	Shine for Singapore	Benny Wong	Benny Wong	Joi Chua and Hady Mirza	Have you seen a star,\n One that guides you no...
16	2009	What Do You See?	Electrico	Electrico	Electrico	There's a jewel on the ocean,\n a gem upon the...
17	2010	Song for Singapore	Corrinne May	Corrinne May	Corrinne May	It's a brand new day a brand new story\n I rem...
18	2011	In a Heartbeat	Goh Kheng Long	Haresh Sharma	Sylvia Ratonel	I have a dream of starting a life \n I have a ...
19	2012	Love at First Light	Iskandar Ismail	Paul Tan	Olivia Ong and Natanya Tan	First light\n Rolls across my peaceful isle\n ...
20	2013	One Singapore	Elaine Chan	Selena Tan	Sing A Nation choir	I will be here, right by your side here.\nI wi...
21	2015	Our Singapore	Dick Lee	Dick Lee	JJ Lin	It isn't easy building something out of nothin...
22	2016	Tomorrow's Here Today	Don Richmond	Don Richmond	53A	Raise your head to the skies\n This is how we ...
23	2017	Because it's Singapore	Lee Wei Song	Jay Lim	Jay Lim	Step by step\n Together we'll build our dreams...
24	2018	We are Singapore remix	Charlie Lim	Charlie Lim	Featuring Vanessa Fernandez, Aisyah Aziz, Shak...	How many times have you heard them say;\n The ...
25	2019	Our Singapore - remix of We Will Get There (20...	NaN	NaN	NaN	Remember the days, we set out together with fa...

	year	title	composer	writer	singer	lyrics	length
0	1984	Stand Up for Singapore	Hugh Harrison	Hugh Harrison	NaN	stand up for singapore do the best you can\nre...	608
1	1986	Count on Me, Singapore	Hugh Harrison	Hugh Harrison	Clement Chow	we have a vision for tomorrow just believe jus...	846
2	1987	We are Singapore	Hugh Harrison	Hugh Harrison	Jonathan Tan Teck Meng, Roslinda Baharudin, Ro...	there was a time when people said\nthat singap...	956
3	1990	One People, One Nation, One Singapore	Jeremy Monteiro	Jim Aitchison	NaN	weve built a nation with our hands\n the toil ...	548
4	1998	Home	Dick Lee	Dick Lee	Kit Chan	whenever i am feeling low\n i look around me a...	795
5	1999	Together	Ken Lim	Ken Lim	Evelyn Tan and Dreamz FM	let us be together aspire to achieve\n no matt...	718
6	2000	Shine on Me	Jim Lim	Jim Lim	Mavis Hee and Jai Wahab	the moment when i was born\n into this world i...	738
7	2001	Where I Belong	Ken Lim	Ken Lim	Tanya Chua	morning comes around and i\n cant wait to see ...	936
8	2002	We Will Get There	Dick Lee	Dick Lee	Stefanie Sun	remember the days we set out together with fai...	1010
9	2003	One United People	Joshua Wan	Joshua Wan	Stefanie Sun	one singapore\n one people strong and free\n w...	1008
10	2004	Home remix	Dick Lee	Dick Lee	Kit Chan and JJ Lin	whenever i am feeling low\n i look around me a...	1221
11	2005	Reach Out for the Skies	Elaine Chan	Selena Tan	Taufik Batisah and Rui En	at a time when hope was low\n the journey seem...	990
12	2006	My Island Home	Joshua Wan	Joshua Wan	Kaira Gong	this is my home\n shes everything to me\n grac...	834
13	2007	There’s No Place I’d Rather Be	Jimmy Ye	Jimmy Ye	Kit Chan	ive walked the streets of cairo and bombay\n i...	653
14	2007	Will you	Jimmy Ye	Jimmy Ye	Janani Sridhar, Asha Edmund, Emma Yong, Lily A...	will you make this island\n amazing in all way...	1075
15	2008	Shine for Singapore	Benny Wong	Benny Wong	Joi Chua and Hady Mirza	have you seen a star\n one that guides you no ...	1170
16	2009	What Do You See?	Electrico	Electrico	Electrico	theres a jewel on the ocean\n a gem upon the s...	1375
17	2010	Song for Singapore	Corrinne May	Corrinne May	Corrinne May	its a brand new day a brand new story\n i reme...	1200
18	2011	In a Heartbeat	Goh Kheng Long	Haresh Sharma	Sylvia Ratonel	i have a dream of starting a life \n i have a ...	982
19	2012	Love at First Light	Iskandar Ismail	Paul Tan	Olivia Ong and Natanya Tan	first light\n rolls across my peaceful isle\n ...	902
20	2013	One Singapore	Elaine Chan	Selena Tan	Sing A Nation choir	i will be here right by your side here\ni will...	1706
21	2015	Our Singapore	Dick Lee	Dick Lee	JJ Lin	it isnt easy building something out of nothing...	802
22	2016	Tomorrow's Here Today	Don Richmond	Don Richmond	53A	raise your head to the skies\n this is how we ...	877
23	2017	Because it's Singapore	Lee Wei Song	Jay Lim	Jay Lim	step by step\n together well build our dreams\...	576
24	2018	We are Singapore remix	Charlie Lim	Charlie Lim	Featuring Vanessa Fernandez, Aisyah Aziz, Shak...	how many times have you heard them say\n the f...	1493
25	2019	Our Singapore - remix of We Will Get There (20...	NaN	NaN	NaN	remember the days we set out together with fai...	1447

	year	achiev	add	afar	afraid	ago	ahead	air	alight	aliv	...	worri	worthwhil	write	yeah	year	yearn	yo	youll	young	youv
0	1984	0.000000	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.13294	0.000000	0.000000	0.000000	0.000000	0.000000	0.107363	0.000000	0.26588
1	1986	0.511749	0.000000	0.00000	0.000000	0.000000	0.000000	0.103979	0.000000	0.000000	...	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.00000
2	1987	0.075723	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.00000
3	1990	0.000000	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.00000
4	1998	0.000000	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.00000
5	1999	0.088700	0.000000	0.00000	0.127953	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.00000	0.000000	0.000000	0.116440	0.000000	0.000000	0.000000	0.000000	0.00000
6	2000	0.000000	0.000000	0.12307	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.109219	0.00000
7	2001	0.000000	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.00000
8	2002	0.000000	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.00000
9	2003	0.000000	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.00000
10	2004	0.000000	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.00000
11	2005	0.277157	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.066635	...	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.00000
12	2006	0.000000	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.00000
13	2007	0.000000	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.096961	0.000000	0.00000
14	2007	0.000000	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.00000	0.183715	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.00000
15	2008	0.116993	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.00000
16	2009	0.056188	0.000000	0.00000	0.081053	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.073760	0.000000	0.00000
17	2010	0.000000	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.00000
18	2011	0.000000	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.083993	0.074540	...	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.00000
19	2012	0.000000	0.115511	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.00000
20	2013	0.000000	0.000000	0.00000	0.000000	0.061286	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.00000	0.000000	0.122573	0.000000	0.000000	0.061286	0.000000	0.000000	0.00000
21	2015	0.000000	0.000000	0.00000	0.000000	0.000000	0.261340	0.000000	0.000000	0.000000	...	0.000000	0.00000	0.000000	0.000000	0.118912	0.000000	0.000000	0.000000	0.000000	0.00000
22	2016	0.000000	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.00000	0.000000	0.000000	0.000000	0.110234	0.000000	0.000000	0.000000	0.00000
23	2017	0.000000	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.00000
24	2018	0.054551	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.088671	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.00000
25	2019	0.000000	0.000000	0.00000	0.000000	0.000000	0.090272	0.000000	0.000000	0.000000	...	0.000000	0.00000	0.000000	0.000000	0.082150	0.000000	0.000000	0.000000	0.090272	0.00000

	cosine_similarity	title	year
23	1.000000	Because it's Singapore	2017
24	0.297777	We are Singapore remix	2018
25	0.281339	Our Singapore - remix of We Will Get There (20...	2019
2	0.278583	We are Singapore	1987
8	0.275578	We Will Get There	2002
9	0.274681	One United People	2003
5	0.266684	Together	1999
21	0.263443	Our Singapore	2015
20	0.251536	One Singapore	2013
3	0.251397	One People, One Nation, One Singapore	1990
1	0.228239	Count on Me, Singapore	1986
4	0.193306	Home	1998
10	0.180547	Home remix	2004
0	0.160546	Stand Up for Singapore	1984
6	0.158855	Shine on Me	2000
12	0.157851	My Island Home	2006
17	0.157827	Song for Singapore	2010
7	0.151226	Where I Belong	2001
19	0.148836	Love at First Light	2012
15	0.118256	Shine for Singapore	2008
22	0.100100	Tomorrow's Here Today	2016
18	0.084479	In a Heartbeat	2011
14	0.079300	Will you	2007
16	0.076563	What Do You See?	2009
11	0.042271	Reach Out for the Skies	2005
13	0.041592	There’s No Place I’d Rather Be	2007

	0	1	2	3	4	5	6	7	8	9	...	90	91	92	93	94	95	96	97	98	99
stand_up	0.239891	0.565018	0.063534	0.164660	-0.234081	-0.309161	0.489351	-0.780812	0.899208	-0.157450	...	-0.233194	0.821353	-0.549739	-0.468398	-0.085528	-0.649743	0.647005	-0.571035	-0.741131	-0.115513
for	0.889538	0.673244	-0.824383	0.274197	-0.185458	0.151348	-0.231167	0.314655	-0.139669	0.386256	...	-0.160107	0.335228	0.113395	0.371620	0.026625	-0.455606	0.377863	-0.433757	0.080183	-0.602206
singapore	-0.135616	0.655438	-0.386355	0.826533	0.144880	0.215064	0.498785	-0.288478	-0.153735	-0.387304	...	0.154733	0.027422	-0.176869	0.273895	0.096506	0.034425	0.418511	-0.304195	0.494054	0.178736
do	0.572304	-0.298063	0.420800	0.371776	-0.271642	0.017550	0.627456	-0.002975	0.359950	0.332036	...	-0.008266	0.998744	0.156871	0.102102	-0.456976	0.104097	0.457750	-0.652603	-0.207516	-0.074222
the	0.037027	-0.373364	-0.159412	-0.032979	-0.158839	0.945273	0.424767	-0.093098	0.476826	-0.156614	...	0.097924	-0.131293	-0.380869	0.158264	0.206562	-0.264686	-0.369306	-0.567412	0.246076	-0.071524

	count
singapor	98
togeth	66
one	58
dream	52
home	50
heart	34
well	33
know	33
come	32
stand	31
hand	28
see	26
sing	25
let	24
make	23
alway	23
us	22
reach	22
world	21
shine	21

	x_coord	y_coord	word
stand_up	9.463366	25.297653	stand_up
for	9.690619	-7.415847	for
singapore	9.550549	-9.126614	singapore
do	9.451916	23.508945	do
the	7.538430	-8.201977	the