123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129 |
- def wordcloud(
- s: TextSeries,
- font_path: str = None,
- width: int = 400,
- height: int = 200,
- max_words=200,
- mask=None,
- contour_width=0,
- contour_color="PAPAYAWHIP",
- min_font_size=4,
- background_color="PAPAYAWHIP",
- max_font_size=None,
- relative_scaling="auto",
- colormap=None,
- return_figure=False,
- ):
- """
- Plot wordcloud image using WordCloud from word_cloud package.
- Most of the arguments are very similar if not equal to the mother
- function. In constrast, all words are taken into account when computing
- the wordcloud, inclusive stopwords. They can be easily removed with
- preprocessing.remove_stopwords.
- Words are computed using generate_from_frequencies.
- To reduce blur in the wordcloud image, `width` and `height` should be at
- least 400.
- Parameters
- ----------
- s : :class:`texthero._types.TextSeries`
- font_path : str, optional, default=None
- Font path to the font that will be used (OTF or TTF). Defaults to
- DroidSansMono path on a Linux machine. If you are on another OS or
- don't have this font, you need to adjust this path.
- width : int, optional, default=400
- Width of the canvas.
- height : int, optional, default=200
- Height of the canvas.
- max_words : int, optional, default=200
- The maximum number of words.
- mask : nd-array or None, optional, default=None
- When set, gives a binary mask on where to draw words. When set, width
- and height will be ignored and the shape of mask will be used instead.
- All white (#FF or #FFFFFF) entries will be considerd "masked out"
- while other entries will be free to draw on.
- contour_width: float, optional, default=0
- If mask is not None and contour_width > 0, draw the mask contour.
- contour_color: str, optional, default="PAPAYAWHIP"
- Mask contour color.
- min_font_size : int, optional, default=4
- Smallest font size to use. Will stop when there is no more room in
- this size.
- background_color : str, optional, default="PAPAYAWHIP"
- Background color for the word cloud image.
- max_font_size : int or None, optional, default=None
- Maximum font size for the largest word. If None, height of the image
- is used.
- relative_scaling : float, optional, default="auto"
- Importance of relative word frequencies for font-size. With
- relative_scaling=0, only word-ranks are considered. With
- relative_scaling=1, a word that is twice as frequent will have twice
- the size. If you want to consider the word frequencies and not only
- their rank, relative_scaling around .5 often looks good.
- If 'auto' it will be set to 0.5 unless repeat is true, in which
- case it will be set to 0.
- colormap : string or matplotlib colormap, optional, default="viridis"
- Matplotlib colormap to randomly draw colors from for each word.
- """
- text = s.str.cat(sep=" ")
- if colormap is None:
- # Custom palette.
- # TODO move it under tools.
- corn = (255.0 / 256, 242.0 / 256, 117.0 / 256)
- mango_tango = (255.0 / 256, 140.0 / 256, 66.0 / 256)
- crayola = (63.0 / 256, 136.0 / 256, 197.0 / 256)
- crimson = (215.0 / 256, 38.0 / 256, 61.0 / 256)
- oxford_blue = (2.0 / 256, 24.0 / 256, 43.0 / 256)
- texthero_cm = lsg.from_list(
- "texthero", [corn, mango_tango, crayola, crimson, oxford_blue]
- )
- colormap = texthero_cm
- words = s.str.cat(sep=" ").split()
- wordcloud = WordCloud(
- font_path=font_path,
- width=width,
- height=height,
- max_words=max_words,
- mask=mask,
- contour_width=contour_width,
- contour_color=contour_color,
- min_font_size=min_font_size,
- background_color=background_color,
- max_font_size=max_font_size,
- relative_scaling=relative_scaling,
- colormap=colormap,
- # stopwords=[], # TODO. Will use generate from frequencies.
- # normalize_plurals=False, # TODO.
- ).generate_from_frequencies(dict(Counter(words)))
- # fig = px.imshow(wordcloud)
- # fig.show()
- fig, ax = plt.subplots(figsize=(20, 10))
- ax.imshow(wordcloud, interpolation="bilinear")
- ax.axis("off")
- if return_figure:
- return fig
|