X Tutup
{ "cells": [ { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": true }, "outputs": [], "source": [ "doc1 = \"Sugar is bad to consume. My sister likes to have sugar, but not my father.\"\n", "doc2 = \"My father spends a lot of time driving my sister around to dance practice.\"\n", "doc3 = \"Doctors suggest that driving may cause increased stress and blood pressure.\"\n", "doc4 = \"Sometimes I feel pressure to perform well at school, but my father never seems to drive my sister to do better.\"\n", "doc5 = \"Health experts say that Sugar is not good for your lifestyle.\"\n", "\n", "doc_complete = [doc1, doc2, doc3, doc4, doc5]" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "from nltk.corpus import stopwords\n", "from nltk.stem.wordnet import WordNetLemmatizer\n", "import string\n", "stop = set(stopwords.words('english'))\n", "exclude = set(string.punctuation)\n", "lemma = WordNetLemmatizer()\n", "\n", "def clean(doc):\n", " stop_free = ' '.join([i for i in doc.lower().split() if i not in stop])\n", " punc_free = ''.join([ch for ch in stop_free if ch not in exclude])\n", " normalized = ' '.join(lemma.lemmatize(word) for word in punc_free.split())\n", " return normalized\n", "doc_clean = [clean(doc).split() for doc in doc_complete]" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "import gensim\n", "from gensim import corpora\n", "dictionary = corpora.Dictionary(doc_clean)\n", "doc_term_matrix = [dictionary.doc2bow(doc) for doc in doc_clean]" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": true }, "outputs": [], "source": [ "Lda = gensim.models.ldamodel.LdaModel\n", "ldamodel = Lda(doc_term_matrix, num_topics = 3, id2word = dictionary, passes=50)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[(0, '0.135*\"sugar\" + 0.054*\"like\" + 0.054*\"consume\" + 0.054*\"bad\"'), (1, '0.056*\"father\" + 0.056*\"sister\" + 0.056*\"pressure\" + 0.056*\"driving\"'), (2, '0.029*\"sister\" + 0.029*\"father\" + 0.029*\"blood\" + 0.029*\"may\"')]\n" ] } ], "source": [ "print(ldamodel.print_topics(num_topics=3, num_words=4))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 2 }
X Tutup