I moved the automatic summarization API "summpy" with python3.

Recently, it was necessary to summarize the voice converted into text and import it as data, and for consideration, I ran the automatic sentence summarization API published by Recruit Technologies with python3. Originally it works with python2.7, so I think that many people think that it should work with python3 and share it. GitHub of Recruit Technologies

This time, the algorithm converts only lexrank and confirms only access on the Web. The annotations have been removed.

environment

OS windows10Home
python Winpython64-3.7.7.1.exe
Mecab MeCab 0.996 64bit version https://github.com/ikegami-yukino/mecab/releases/tag/v0.996
python additional module CherryPy==18.6.0、mecab==0.996.2
## Placement
summpy-master
  ├─server.bat
  └─summpy
    ├─lexrank.py
    ├─server.py
    ├─tools.py
    ├─misc
    │  ├─divrank.py
    │  └─mecab_segmenter.py
    └─server_data
       └─test.html

Source

server.py


#!/usr/bin/env python
# coding: utf-8

import sys
import os
import re
import getopt
import cherrypy
import json

path = os.getcwd()         #Win compatible
sys.path.append(path)      #Win compatible
from summpy import tools   #Win compatible

class Summarizer(object):
    def __init__(self):
        self.summarizers = {}

    def get_summarizer(self, name):
        if name in self.summarizers:
            pass
        elif name == 'lexrank':
            from summpy import lexrank
            self.summarizers[name] = lexrank.summarize
        elif name == 'mcp':
            from summpy import mcp_summ
            self.summarizers[name] = mcp_summ.summarize

        return self.summarizers[name]

    @cherrypy.expose
    def summarize(self, text=None, algo='lexrank', **summarizer_params):
        try:  # TODO: generate more useful error message
            # fix parameter type
            for param, value in list(summarizer_params.items()):
                if value == '':
                    del summarizer_params[param]
                    continue
                elif re.match(r'^\d*.\d+$', value):
                    value = float(value)
                elif re.match(r'^\d+$', value):
                    value = int(value)
                elif value == 'true':
                    value = True
                elif value == 'false':
                    value = False
                summarizer_params[param] = value

            if algo in ('lexrank', 'clexrank', 'divrank'):
                summarizer = self.get_summarizer('lexrank')
                if algo == 'clexrank':
                    summarizer_params['continuous'] = True
                if algo == 'divrank':
                    summarizer_params['use_divrank'] = True
            elif algo == 'mcp':
                summarizer = self.get_summarizer('mcp')
                
            summary, debug_info = summarizer(text, **summarizer_params)  #  **XXXXX allows any number of keyword arguments and is a dictionary named XXXXX

        except Exception as e:
            print(str(e))           
            return json.dumps({'error': str(e)}, ensure_ascii=False, indent=2)
        else:
            res = json.dumps(
                tools.tree_encode({
                    'summary': summary, 'debug_info': debug_info
                }),
                ensure_ascii=False, indent=2
            )

            return res.encode('utf8')  #Fix https://stackoverflow.com/questions/20215147/python-cherrypy-500-valueerror-page-handlers-must-return-bytes

if __name__ == '__main__':
    options, args = getopt.getopt(sys.argv[1:], 'h:p:')
    options = dict(options)
    host, port = options['-h'], int(options['-p'])

    cherrypy.config.update({
        'server.socket_host': host,
        'server.socket_port': port
    })

    conf = {
        '/': {
            'tools.staticdir.root': path
        },
        '/summarize': {
            'tools.response_headers.on': True,
            'tools.response_headers.headers': [
                ('Content-type', 'application/json')
            ]
        },
        '/static': {
            'tools.staticdir.on': True,
            'tools.staticdir.dir': 'summpy\\server_data',     #Win compatible
            'tools.response_headers.headers': [
                ('Content-type', 'application/json')
            ]
        }
    }
    cherrypy.quickstart(Summarizer(), '/', conf)

lexrank.py


#!/usr/bin/env python
# -*- coding: utf-8 -*-

import sys,os
import getopt
import codecs
import collections
import numpy
import networkx
from sklearn.feature_extraction import DictVectorizer
from sklearn.metrics import pairwise_distances

path = os.getcwd()                                        #Win compatible
sys.path.append(path)                                     #Win compatible

from summpy import tools                                  #Win compatible
from summpy.misc.divrank import divrank, divrank_scipy    #Win compatible

def lexrank(sentences, continuous=False, sim_threshold=0.1, alpha=0.9,
            use_divrank=False, divrank_alpha=0.25):

    # configure ranker
    ranker_params = {'max_iter': 1000}
    if use_divrank:
        ranker = divrank_scipy
        ranker_params['alpha'] = divrank_alpha
        ranker_params['d'] = alpha
    else:
        ranker = networkx.pagerank_scipy
        ranker_params['alpha'] = alpha

    graph = networkx.DiGraph()

    # sentence -> tf
    sent_tf_list = []
    for sent in sentences:

        words = tools.word_segmenter_ja(sent)
        tf = collections.Counter(words)
        sent_tf_list.append(tf)
    sent_vectorizer = DictVectorizer(sparse=True)
    sent_vecs = sent_vectorizer.fit_transform(sent_tf_list)

    # compute similarities between senteces
    sim_mat = 1 - pairwise_distances(sent_vecs, sent_vecs, metric='cosine')

    if continuous:
        linked_rows, linked_cols = numpy.where(sim_mat > 0)
    else:
        linked_rows, linked_cols = numpy.where(sim_mat >= sim_threshold)

    # create similarity graph
    graph.add_nodes_from(list(range(sent_vecs.shape[0])))
    for i, j in zip(linked_rows, linked_cols):
        if i == j:
            continue
        weight = sim_mat[i,j] if continuous else 1.0
        #  graph.add_edge(i, j, {'weight': weight})    #Change
        graph.add_edge(i, j)

    scores = ranker(graph, **ranker_params)
    return scores, sim_mat

def summarize(text, sent_limit=None, char_limit=None, imp_require=None,
              debug=False, **lexrank_params):
    debug_info = {}
    sentences = list(tools.sent_splitter_ja(text))
    scores, sim_mat = lexrank(sentences, **lexrank_params)
    sum_scores = sum(scores.values())
    acc_scores = 0.0
    indexes = set()
    num_sent, num_char = 0, 0
    for i in sorted(scores, key=lambda i: scores[i], reverse=True):
        num_sent += 1
        num_char += len(sentences[i])
        if sent_limit is not None and num_sent > sent_limit:
            break
        if char_limit is not None and num_char > char_limit:
            break
        if imp_require is not None and acc_scores / sum_scores >= imp_require:
            break
        indexes.add(i)
        acc_scores += scores[i]

    if len(indexes) > 0:
        summary_sents = [sentences[i] for i in sorted(indexes)]
    else:
        summary_sents = sentences

    if debug:
        debug_info.update({
            'sentences': sentences, 'scores': scores
        })

    return summary_sents, debug_info


if __name__ == '__main__':

    _usage = '''
Usage:
  python lexrank.py -f <file_name> [-e <encoding> ]
                  [ -v lexrank | clexrank | divrank ]
                  [ -s <sent_limit> | -c <char_limit> | -i <imp_required> ]
  Args:
    -f: plain text file to be summarized
    -e: input and output encoding (default: utf-8)
    -v: variant of LexRank (default is 'lexrank')
    -s: summary length (the number of sentences)
    -c: summary length (the number of charactors)
    -i: cumulative LexRank score [0.0-1.0]
    '''.strip()

    options, args = getopt.getopt(sys.argv[1:], 'f:e:v:s:c:i:')
    options = dict(options)

    if len(options) < 2:
        print(_usage)
        sys.exit(0)

    fname = options['-f']
    encoding = options['-e'] if '-e' in options else 'utf-8'
    variant = options['-v'] if '-v' in options else 'lexrank'
    sent_limit = int(options['-s']) if '-s' in options else None
    char_limit = int(options['-c']) if '-c' in options else None
    imp_require = float(options['-i']) if '-i' in options else None

    if fname == 'stdin':
        text = '\n'.join(
            line for line in sys.stdin.readlines()
        ).decode(encoding)
    else:
        text = codecs.open(fname, encoding=encoding).read()

    lexrank_params = {}
    if variant == 'clexrank':
        lexrank_params['continuous'] = True
    if variant == 'divrank':
        lexrank_params['use_divrank'] = True

    sentences, debug_info = summarize(
        text, sent_limit=sent_limit, char_limit=char_limit,
        imp_require=imp_require, **lexrank_params
    )
    for sent in sentences:
        print(sent.strip().encode(encoding))

tools.py


#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os,sys
import re
import json
path = os.getcwd()       #Win compatible
sys.path.append(path)    #Win compatible

def tree_encode(obj, encoding='utf-8'):
    type_ = type(obj)
    if type_ == list or type_ == tuple:
        return [tree_encode(e, encoding) for e in obj]
    elif type_ == dict:
        new_obj = dict(
            (tree_encode(k, encoding), tree_encode(v, encoding))
            for k, v in obj.items()
        )
        return new_obj
    elif type_ == str:    #  unicode:⇒str:Automatic conversion to
        #  return obj.encode(encoding)  #Delete
        return obj
    else:
        return obj


def sent_splitter_ja(text, delimiters=set('。.?!\n\r'),
                     parenthesis='()「」『』“”'):
    paren_chars = set(parenthesis)
    close2open = dict(list(zip(parenthesis[1::2], parenthesis[0::2])))
    pstack = []
    buff = []

    for i, c in enumerate(text):
        c_next = text[i+1] if i+1 < len(text) else None
        # check correspondence of parenthesis
        if c in paren_chars:
            if c in close2open:  # close
                if len(pstack) > 0 and pstack[-1] == close2open[c]:
                    pstack.pop()
            else:  # open
                pstack.append(c)

        buff.append(c)
        if c in delimiters:
            if len(pstack) == 0 and c_next not in delimiters:
                yield ''.join(buff)
                buff = []

    if len(buff) > 0:
        yield ''.join(buff)

if os.environ.get('SUMMPY_USE_JANOME') is not None:
    from summpy.misc.janome_segmenter import word_segmenter_ja
else:
    try:
        from summpy.misc.mecab_segmenter import word_segmenter_ja
    except ImportError:
        from summpy.misc.janome_segmenter import word_segmenter_ja


if __name__ == '__main__':
    pass

divrank.py(There is no change.)


#!/usr/bin/env python
# -*- coding: utf-8 -*-

import networkx as nx
from networkx.exception import NetworkXError
from networkx.utils import not_implemented_for


@not_implemented_for('multigraph')
def divrank(G, alpha=0.25, d=0.85, personalization=None,
            max_iter=100, tol=1.0e-6, nstart=None, weight='weight',
            dangling=None):
    '''
    Returns the DivRank (Diverse Rank) of the nodes in the graph.
    This code is based on networkx.pagerank.

    Args: (diff from pagerank)
      alpha: controls strength of self-link [0.0-1.0]
      d: the damping factor

    Reference:
      Qiaozhu Mei and Jian Guo and Dragomir Radev,
      DivRank: the Interplay of Prestige and Diversity in Information Networks,
      http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.174.7982
    '''

    if len(G) == 0:
        return {}

    if not G.is_directed():
        D = G.to_directed()
    else:
        D = G

    # Create a copy in (right) stochastic form
    W = nx.stochastic_graph(D, weight=weight)
    N = W.number_of_nodes()

    # self-link (DivRank)
    for n in W.nodes_iter():
        for n_ in W.nodes_iter():
            if n != n_ :
                if n_ in W[n]:
                    W[n][n_][weight] *= alpha
            else:
                if n_ not in W[n]:
                    W.add_edge(n, n_)
                W[n][n_][weight] = 1.0 - alpha

    # Choose fixed starting vector if not given
    if nstart is None:
        x = dict.fromkeys(W, 1.0 / N)
    else:
        # Normalized nstart vector
        s = float(sum(nstart.values()))
        x = dict((k, v / s) for k, v in list(nstart.items()))

    if personalization is None:
        # Assign uniform personalization vector if not given
        p = dict.fromkeys(W, 1.0 / N)
    else:
        missing = set(G) - set(personalization)
        if missing:
            raise NetworkXError('Personalization dictionary '
                                'must have a value for every node. '
                                'Missing nodes %s' % missing)
        s = float(sum(personalization.values()))
        p = dict((k, v / s) for k, v in list(personalization.items()))

    if dangling is None:
        # Use personalization vector if dangling vector not specified
        dangling_weights = p
    else:
        missing = set(G) - set(dangling)
        if missing:
            raise NetworkXError('Dangling node dictionary '
                                'must have a value for every node. '
                                'Missing nodes %s' % missing)
        s = float(sum(dangling.values()))
        dangling_weights = dict((k, v/s) for k, v in list(dangling.items()))
    dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]

    # power iteration: make up to max_iter iterations
    for _ in range(max_iter):
        xlast = x
        x = dict.fromkeys(list(xlast.keys()), 0)
        danglesum = d * sum(xlast[n] for n in dangling_nodes)
        for n in x:
            D_t = sum(W[n][nbr][weight] * xlast[nbr] for nbr in W[n])
            for nbr in W[n]:
                #x[nbr] += d * xlast[n] * W[n][nbr][weight]
                x[nbr] += (
                    d * (W[n][nbr][weight] * xlast[nbr] / D_t) * xlast[n]
                )
            x[n] += danglesum * dangling_weights[n] + (1.0 - d) * p[n]

        # check convergence, l1 norm
        err = sum([abs(x[n] - xlast[n]) for n in x])
        if err < N*tol:
            return x
    raise NetworkXError('divrank: power iteration failed to converge '
                        'in %d iterations.' % max_iter)


def divrank_scipy(G, alpha=0.25, d=0.85, personalization=None,
                  max_iter=100, tol=1.0e-6, nstart=None, weight='weight',
                  dangling=None):
    '''
    Returns the DivRank (Diverse Rank) of the nodes in the graph.
    This code is based on networkx.pagerank_scipy
    '''
    import scipy.sparse

    N = len(G)
    if N == 0:
        return {}

    nodelist = G.nodes()
    M = nx.to_scipy_sparse_matrix(G, nodelist=nodelist, weight=weight,
                                  dtype=float)
    S = scipy.array(M.sum(axis=1)).flatten()
    S[S != 0] = 1.0 / S[S != 0]
    Q = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr')
    M = Q * M

    # self-link (DivRank)
    M = scipy.sparse.lil_matrix(M)
    M.setdiag(0.0)
    M = alpha * M
    M.setdiag(1.0 - alpha)
    #print M.sum(axis=1)

    # initial vector
    x = scipy.repeat(1.0 / N, N)

    # Personalization vector
    if personalization is None:
        p = scipy.repeat(1.0 / N, N)
    else:
        missing = set(nodelist) - set(personalization)
        if missing:
            raise NetworkXError('Personalization vector dictionary '
                                'must have a value for every node. '
                                'Missing nodes %s' % missing)
        p = scipy.array([personalization[n] for n in nodelist],
                        dtype=float)
        p = p / p.sum()

    # Dangling nodes
    if dangling is None:
        dangling_weights = p
    else:
        missing = set(nodelist) - set(dangling)


        if missing:
            raise NetworkXError('Dangling node dictionary '
                                'must have a value for every node. '
                                'Missing nodes %s' % missing)
        # Convert the dangling dictionary into an array in nodelist order
        dangling_weights = scipy.array([dangling[n] for n in nodelist],
                                       dtype=float)
        dangling_weights /= dangling_weights.sum()
    is_dangling = scipy.where(S == 0)[0]

    # power iteration: make up to max_iter iterations
    for _ in range(max_iter):
        xlast = x
        D_t =  M * x
        x = (
            d * (x / D_t * M * x + sum(x[is_dangling]) * dangling_weights)
            + (1.0 - d) * p
        )
        # check convergence, l1 norm
        err = scipy.absolute(x - xlast).sum()
        if err < N * tol:
            return dict(list(zip(nodelist, list(map(float, x)))))

    raise NetworkXError('divrank_scipy: power iteration failed to converge '
                        'in %d iterations.' % max_iter)


if __name__ == '__main__':

    g = nx.Graph()

    # this network appears in the reference.
    edges = {
        1: [2, 3, 6, 7, 8, 9],
        2: [1, 3, 10, 11, 12],
        3: [1, 2, 15, 16, 17],
        4: [11, 13, 14],
        5: [17, 18, 19, 20],
        6: [1],
        7: [1],
        8: [1],
        9: [1],
        10: [2],
        11: [4],
        12: [2],
        13: [4],
        14: [4],
        15: [3],
        16: [3],
        17: [3, 5],
        18: [5],
        19: [5],
        20: [5]
    }

    for u, vs in edges.items():
        for v in vs:
            g.add_edge(u, v)

    scores = nx.pagerank(g)
    print('# PageRank')
    print('# rank: node score')
    #print sum(scores.values())
    for i, n in enumerate(sorted(scores, key=lambda n: scores[n], reverse=True)):
        print('# {}: {} {}'.format(i+1, n, scores[n]))

    scores = divrank(g)
    print('\n# DivRank')
    #print sum(scores.values())
    print('# rank: node score')
    for i, n in enumerate(sorted(scores, key=lambda n: scores[n], reverse=True)):
        print('# {}: {} {}'.format(i+1, n, scores[n]))

    scores = divrank_scipy(g)
    print('\n# DivRank (scipy)')
    #print sum(scores.values())
    print('# rank: node score')
    for i, n in enumerate(sorted(scores, key=lambda n: scores[n], reverse=True)):
        print('# {}: {} {}'.format(i+1, n, scores[n]))

mecab_segmenter.py


#!/usr/bin/env python
# -*- coding: utf-8 -*-

import re
import MeCab


_mecab = MeCab.Tagger()
#Part of speech,Part of speech subclassification 1,Part of speech subclassification 2,Part of speech subclassification 3,Inflected form,Utilization type,Prototype,reading,pronunciation
_mecab_feat_labels = 'pos cat1 cat2 cat3 conj conj_t orig read pron'.split(' ')


def _mecab_parse_feat(feat):
    return dict(list(zip(_mecab_feat_labels, feat.split(','))))


def _mecab_node2seq(node, decode_surface=True, feat_dict=True,
                    mecab_encoding='utf-8'):
    # MeCab.Node cannot change attribute.
    while node:
        if decode_surface:
        #    node._surface = node.surface.decode(mecab_encoding)Fix
            node._surface = node.surface
            
        if feat_dict:  #Save part of speech information with dict
            node.feat_dict = _mecab_parse_feat(
                #  node.feature.decode(mecab_encoding)Fix
                node.feature
            )

        yield node
        node = node.next


def is_stopword(n):  # <- mecab node
    if len(n._surface) == 0:
        return True
    elif re.search(r'^[\s!-@\[-`\{-~ 、-〜!-@[-`]+$', n._surface):  #Corrected to ur⇒r The following three lines
        return True
    elif re.search(r'^(suffix|Non-independent)', n.feat_dict['cat1']):
        return True
    elif 'Sahen Suru' == n.feat_dict['conj'] or 'is there' == n.feat_dict['orig']:
        return True
    elif re.search(r'^(noun|verb|adjective)', n.feat_dict['pos']):
        return False
    else:
        return True


def not_stopword(n):  # <- mecab node
    return not is_stopword(n)


def node2word(n):  # <- mecab node
    return n._surface


def node2norm_word(n):  # mecab node
    if n.feat_dict['orig'] != '*':
        return n.feat_dict['orig']
    else:
        return n._surface


def word_segmenter_ja(sent, node_filter=not_stopword,
                      node2word=node2norm_word, mecab_encoding='utf-8'):

    #if type(sent) == str:Delete
    #    sent = sent.encode(mecab_encoding)Delete
    
    nodes = list(
        _mecab_node2seq(_mecab.parseToNode(sent))
    )
    if node_filter:
        nodes = [n for n in nodes if node_filter(n)]
        
    words = [node2word(n) for n in nodes]

    return words


if __name__ == '__main__':
    text = 'It's nice weather today, is not it.'
    print('|'.join(word_segmenter_ja(text)))  #.encode('utf-8')Delete

test.html(Basically, there is no change. Added text size and ajax errors.)


<html>
  <head>
    <meta charset="UTF-8">
  </head>
  <body>
    <textarea type="text" name="text" rows="20" cols="70"></textarea>
    <br>
    algorithm (lexrank|clexrank|divrank|mcp): <input type="text" value="lexrank" name="algo" /><br>
    length (the number of sentences): <input type="text" value="3" name="sent_limit" /><br>
    length (the number of chars): <input type="text" value="" name="char_limit" /><br>
    cumulative LexRank score: <input type="text" value="" name="imp_require" /><br>
    <button id="summarize">summarize</button>
    <br>
    <div id="out"></div>

    <script src="http://code.jquery.com/jquery-2.0.3.min.js"></script>
    <script type="text/javascript">
      $(document).ready(function () {
        $("#summarize").click(function (e) {
          var text = $("textarea[name='text']").val();
          var params = {
            text: text,
            algo: $("input[name='algo']").val(),
            sent_limit: $("input[name='sent_limit']").val(),
            char_limit: $("input[name='char_limit']").val(),
            imp_require: $("input[name='imp_require']").val(),
            debug: true
          };
          $.post("/summarize", params)
            .done(function (res) {
              var sentences = res.summary;
              var debug_info = res.debug_info;
              var out = $("#out");
              var summ_length = 0;
              out.empty();
              sentences.forEach(function (s) {
                summ_length += s.length;
                out.append("<p>" + s + "</p>");
              });
              var summ_rate = summ_length / text.length;
              out.prepend(
                '<p style="color:blue">'
                + 'Summary rate: ' + summ_rate
                + ' (' + summ_length + '/' + text.length + 'letter)'
                + '</p>'
              );
            }).fail((jqXHR, textStatus, errorThrown) => {
              alert("error" + jqXHR + "/" + textStatus + "/" + errorThrown)
            })
        });
      });
    </script>

  </body>
</html>

Other


【server.bat】
python -m summpy.server -h 127.0.0.1 -p 8000

[URL when accessing from a browser]
http://127.0.0.1:8000/static/test.html

I think you should give it to git ... It was a post after a long time.

Recommended Posts

I moved the automatic summarization API "summpy" with python3.
I tried to summarize various sentences using the automatic summarization API "summpy"
Call the API with python3.
I tried hitting the API with echonest's python client
Hit the Etherpad-lite API with Python
I liked the tweet with python. ..
I tried to get the authentication code of Qiita API with Python.
I tried to get the movie information of TMDb API with Python
I tried "smoothing" the image with Python + OpenCV
I tried "differentiating" the image with Python + OpenCV
I tried "binarizing" the image with Python + OpenCV
I tried to touch the CSV file with Python
I tried to solve the soma cube with python
I want to inherit to the back with python dataclass
A memo that I touched the Datastore with python
I tried to solve the problem with Python Vol.1
Note calling the CUDA Driver API with Python ctypes
I tried fp-growth with python
I tried scraping with Python
Use Trello API with python
Use Twitter API with Python
I made blackjack with python!
I touched the Qiita API
Web API with Python + Falcon
Play RocketChat with API / Python
I tried gRPC with Python
Use subsonic API with python3
I tried scraping with python
I made blackjack with Python.
I made wordcloud with Python.
I downloaded the python source
I tried to find the entropy of the image with python
Try hitting the Twitter API quickly and easily with Python
I replaced the Windows PowerShell cookbook with a python script.
I tried "gamma correction" of the image with Python + OpenCV
I tried to simulate how the infection spreads with Python
I was hooked for 2 minutes with the Python debugger pdb
A note about hitting the Facebook API with the Python SDK
I tried using the Python library from Ruby with PyCall
Why can I use the module by importing with python?
I touched the latest automatic test tool "Playwright for Python"
The first API to make with python Djnago REST framework
I wrote the basic grammar of Python with Jupyter Lab
I made LINE-bot with Python + Flask + ngrok + LINE Messaging API
I tried follow management with Twitter API and Python (easy)
I evaluated the strategy of stock system trading with Python.
Let's touch the API of Netatmo Weather Station with Python. #Python #Netatmo
What I did to welcome the Python2 EOL with confidence
I tried saving the DRF API request history with django-request
[Python] I tried collecting data using the API of wikipedia
I tried to divide the file into folders with Python
I compared the speed of Hash with Topaz, Ruby and Python
I tried scraping the ranking of Qiita Advent Calendar with Python
Extract the xz file with python
I tried to solve the ant book beginner's edition with python
Getting the arXiv API in Python
Create Awaitable with Python / C API
I want to know the weather with LINE bot feat.Heroku + Python
I tried the Naro novel API 2
Get reviews with python googlemap api
I can't install python3 with pyenv-vertualenv