[PYTHON] Try refactoring step by step

I wanted to try something like this once

Premise

First edition

code

f.py


def write(lines):
    print 'write:', lines

csv.py


# -*- coding: utf-8 -*-

import f


def as_kvs(rows):
    keys = rows[0]
    value_rows = rows[1:]

    return [dict(zip(keys, value_row)) for value_row in value_rows]


def write_first_one_or_empty(rows, sort_key, filter_key, filter_value, write_key):
    #Make multiple kv
    kvs = as_kvs(rows)

    #Sort by specified key
    _sorted = sorted(kvs, key=lambda row: row[sort_key])

    #Filters only where the specified key has the specified value
    _filtered = filter(lambda row: row[filter_key] == filter_value, _sorted)

    if 0 < len(_filtered):
        #Extract the first line with the specified key
        _mapped_one = map(lambda row: row[write_key], _filtered)[0]
        #Write
        f.write([_mapped_one])
    else:
        #Create empty file
        f.write([])


def write_all_or_empty(rows, filter_key, filter_value, write_key):
    #Make multiple kv
    kvs = as_kvs(rows)

    #Filters only where the specified key has the specified value
    _filtered = filter(lambda row: row[filter_key] == filter_value, kvs)

    if _filtered:
        #Extract all lines with the specified key
        _mapped = map(lambda row: row[write_key], _filtered)
        #Write
        f.write(_mapped)
    else:
        #Create empty file
        f.write([])


def write_all_or_error(rows, filter_key, filter_value, write_key):
    #Make multiple kv
    kvs = as_kvs(rows)

    #Filters only where the specified key has the specified value
    _filtered = filter(lambda row: row[filter_key] == filter_value, kvs)

    if _filtered:
        #Extract all lines with the specified key
        _mapped = map(lambda row: row[write_key], _filtered)
        #Write
        f.write(_mapped)
    else:
        #error
        raise Exception("no result")

Caller

main.py


# status,Give csv consisting of code
#Sort by code
#The first case where status is active
#Write out code
#If not, just create a file

csv.write_first_one_or_empty(
    [['status', 'code'], ['dead', '001'], ['active', '003'], ['active', '002']],
    'code', 'status', 'active', 'code'
)

#result
# write: ['002']

main.py


# name,Give a csv consisting of gender
#All cases where gender is male
#Write out name
#If not, just create a file

csv.write_all_or_empty(
    [['name', 'gender'], ['Eva', 'female'], ['Sunny', 'female']],
    'gender', 'male', 'name'
)

#result
# write: []

main.py


# status,Give csv consisting of tel
#All cases with dead status
#Export tel
#Otherwise an error

csv.write_all_or_error(
    [['status', 'tel'], ['dead', '090-1111-1111'], ['active', '090-9999-9999']],
    'status', 'dead', 'tel'
)

#result
# write: ['090-1111-1111']

problem

step-1 I will write a comment in English for the time being

Difference

 def write_first_one_or_empty(rows, sort_key, filter_key, filter_value, write_key):
-    #Make multiple kv
+    # as kvs
     kvs = as_kvs(rows)

-    #Sort by specified key
+    # sort by specified key
     _sorted = sorted(kvs, key=lambda row: row[sort_key])

-    #Filters only where the specified key has the specified value
+    # filter by specified key and value
     _filtered = filter(lambda row: row[filter_key] == filter_value, _sorted)

     if 0 < len(_filtered):
-        #Extract the first line with the specified key
+        # extract by specified key and first one
         _mapped_one = map(lambda row: row[write_key], _filtered)[0]
-        #Write
+        # write
         f.write([_mapped_one])
     else:
-        #Create empty file
+        # write empty
         f.write([])
 def write_all_or_empty(rows, filter_key, filter_value, write_key):
-    #Make multiple kv
+    # as kvs
     kvs = as_kvs(rows)

-    #Filters only where the specified key has the specified value
+    # filter by specified key and value
     _filtered = filter(lambda row: row[filter_key] == filter_value, kvs)

     if _filtered:
-        #Extract all lines with the specified key
+        # extract by specified key
         _mapped = map(lambda row: row[write_key], _filtered)
-        #Write
+        # write
         f.write(_mapped)
     else:
-        #Create empty file
+        # write empty
         f.write([])
 def write_all_or_error(rows, filter_key, filter_value, write_key):
-    #Make multiple kv
+    # as kvs
     kvs = as_kvs(rows)

-    #Filters only where the specified key has the specified value
+    # filter by specified key and value
     _filtered = filter(lambda row: row[filter_key] == filter_value, kvs)

     if _filtered:
-        #Extract all lines with the specified key
+        # extract by specified key
         _mapped = map(lambda row: row[write_key], _filtered)
-        #Write
+        # write
         f.write(_mapped)
     else:
-        #error
+        # error
         raise Exception("no result")

Next problem

step-2 Organize comments and cut out methods based on English comments

policy

Difference

The method that was cut out is like this

+def sort_by_specified_key(kvs, key):
+    return sorted(kvs, key=lambda row: row[key])
+def filter_by_specified_key_and_value(kvs, key, value):
+    return filter(lambda row: row[key] == value, kvs)
+def extract_by_specified_key_and_first_one(kvs, key):
+    return kvs[0][key]

+def extract_by_specified_key(kvs, key):
+    return map(lambda row: row[key], kvs)
+def error():
+    raise Exception("no result")

The main body is like this

-def write_first_one_or_empty(rows, sort_key, filter_key, filter_value, write_key):
+def write_first_one_or_empty(rows, sort_key, filter_key, filter_value, extraction_key):
-    # as kvs
     kvs = as_kvs(rows)

-    # sort by specified key
-    _sorted = sorted(kvs, key=lambda row: row[sort_key])
+    _sorted = sort_by_specified_key(kvs, sort_key)

-    # filter by specified key and value
-    _filtered = filter(lambda row: row[filter_key] == filter_value, _sorted)
+    _filtered = filter_by_specified_key_and_value(_sorted, filter_key, filter_value)

     if 0 < len(_filtered):
-        # extract by specified key and first one
-        _mapped_one = map(lambda row: row[write_key], _filtered)[0]
-        # write
-        f.write([_mapped_one])
+        extracted_one = extract_by_specified_key_and_first_one(_filtered, extraction_key)
+        f.write([extracted_one])
     else:
-        # write empty
         f.write([])
-def write_all_or_empty(rows, filter_key, filter_value, write_key):
+def write_all_or_empty(rows, filter_key, filter_value, extraction_key):
-    # as kvs
     kvs = as_kvs(rows)

-    # filter by specified key and value
-    _filtered = filter(lambda row: row[filter_key] == filter_value, kvs)
+    _filtered = filter_by_specified_key_and_value(kvs, filter_key, filter_value)

     if _filtered:
-        # extract by specified key
-        _mapped = map(lambda row: row[write_key], _filtered)
-        # write
-        f.write(_mapped)
+        extracted = extract_by_specified_key(_filtered, extraction_key)
+        f.write(extracted)
     else:
-        # write empty
         f.write([])
-def write_all_or_error(rows, filter_key, filter_value, write_key):
+def write_all_or_error(rows, filter_key, filter_value, extraction_key):
-    # as kvs
     kvs = as_kvs(rows)

-    # filter by specified key and value
-    _filtered = filter(lambda row: row[filter_key] == filter_value, kvs)
+    _filtered = filter_by_specified_key_and_value(kvs, filter_key, filter_value)

     if _filtered:
-        # extract by specified key
-        _mapped = map(lambda row: row[write_key], _filtered)
-        # write
-        f.write(_mapped)
+        extracted = extract_by_specified_key(_filtered, extraction_key)
+        f.write(extracted)
     else:
-        # error
-        raise Exception("no result")
+        error()

Next problem

step-3 Do a little local refactoring

Difference

I prepared head and tail because index access is a little unfriendly.

+def head(xs):
+    return xs[0]
+def tail(xs):
+    return xs[1:]

Where to use

 def as_kvs(rows):
-    keys = rows[0]
-    value_rows = rows[1:]
+    keys = head(rows)
+    value_rows = tail(rows)
 def extract_by_specified_key_and_first_one(kvs, key):
-    return kvs[0][key]
+    return head(kvs)[key]

Changed filter_by_specified_key_and_value to receive predicate instead of key, value to be a little more flexible

-def filter_by_specified_key_and_value(kvs, key, value):
-    return filter(lambda row: row[key] == value, kvs)
+def filter_by_predicate(kvs, predicate):
+    return filter(predicate, kvs)

Then filter_by_predicate does nothing more than filter and discards it.

-def filter_by_predicate(kvs, predicate):
-    return filter(predicate, kvs)

Then I made head, so I may not have to prepare ʻextract_by_specified_key_and_first_one`. If the names of the methods divided into small pieces are correct, it is difficult for the process to become unclear even if they are used in combination.

-def extract_by_specified_key_and_first_one(kvs, key):
-    return head(kvs)[key]

Here is the main body that reflects the above

-def write_first_one_or_empty(rows, sort_key, filter_key, filter_value, extraction_key):
+def write_first_one_or_empty(rows, sort_key, predicate, extraction_key):
     kvs = as_kvs(rows)

     _sorted = sort_by_specified_key(kvs, sort_key)

-    _filtered = filter_by_specified_key_and_value(_sorted, filter_key, filter_value)
+    _filtered = filter(predicate, _sorted)

     if 0 < len(_filtered):
-        extracted_one = extract_by_specified_key_and_first_one(_filtered, extraction_key)
+        extracted_one = head(_filtered)[extraction_key]
         f.write([extracted_one])
     else:
         f.write([])
-def write_all_or_empty(rows, filter_key, filter_value, extraction_key):
+def write_all_or_empty(rows, predicate, extraction_key):
     kvs = as_kvs(rows)

-    _filtered = filter_by_specified_key_and_value(kvs, filter_key, filter_value)
+    _filtered = filter(predicate, kvs)

     if _filtered:
         extracted = extract_by_specified_key(_filtered, extraction_key)
         f.write(extracted)
     else:
         f.write([])
-def write_all_or_error(rows, filter_key, filter_value, extraction_key):
+def write_all_or_error(rows, predicate, extraction_key):
     kvs = as_kvs(rows)

-    _filtered = filter_by_specified_key_and_value(kvs, filter_key, filter_value)
+    _filtered = filter(predicate, kvs)

     if _filtered:
         extracted = extract_by_specified_key(_filtered, extraction_key)
         f.write(extracted)
     else:
         error()

Next problem

step-4 Make common parts of the code that look similar (bad policy)

policy

Difference

+def filter_and_extract_and_write_if_not_empty(kvs, predicate, extraction_key):
+    _filtered = filter(predicate, kvs)
+
+    if _filtered:
+        extracted = extract_by_specified_key(_filtered, extraction_key)
+        f.write(extracted)

Since it was cut out, the number of lines in the main body decreased

 def write_all_or_empty(rows, predicate, extraction_key):
     kvs = as_kvs(rows)

-    _filtered = filter(predicate, kvs)
+    filter_and_extract_and_write_if_not_empty(kvs, predicate, extraction_key)

-    if _filtered:
-        extracted = extract_by_specified_key(_filtered, extraction_key)
-        f.write(extracted)
-    else:
+    if not kvs:
         f.write([])
 def write_all_or_error(rows, predicate, extraction_key):
     kvs = as_kvs(rows)

-    _filtered = filter(predicate, kvs)
+    filter_and_extract_and_write_if_not_empty(kvs, predicate, extraction_key)

-    if _filtered:
-        extracted = extract_by_specified_key(_filtered, extraction_key)
-        f.write(extracted)
-    else:
+    if not kvs:
         error()

Problems with this policy

What was wrong

step-4 Organize the processing flow (good policy)

The above-mentioned bad policy is cut back and redone

policy

Difference

Pay attention to the second body method

This time, there is no need to explicitly separate the processing for the case with the contents of the list and the case with the empty list. Basically, if the processing to which the list is passed is the same, there is no need to be aware of the length of the list (it should not be).

It's better to pass this to f.write without knowing if it's a content or an empty list

 def write_all_or_empty(rows, predicate, extraction_key):
     kvs = as_kvs(rows)

     _filtered = filter(predicate, kvs)

-    if _filtered:
-        extracted = extract_by_specified_key(_filtered, extraction_key)
-        f.write(extracted)
-    else:
-        f.write([])
+    extracted = extract_by_specified_key(_filtered, extraction_key)
+    f.write(extracted)

Next, pay attention to the first body method.

If this is not an empty list, the first element is taken out, processed, and listed again.

this

The flow

I will change the idea to the flow

First, prepare a method to get the matching first element from the list with a maximum length of 1.

+def find_first(kvs, predicate):
+    for kv in kvs:
+        if predicate(kv):
+            return [kv]
+    else:
+        return []

Then you don't have to worry about the contents like the second method of the main body method. Moreover, since it is a list conversion, you can use ʻextract_by_specified_key instead of headandkey` access.

 def write_first_one_or_empty(rows, sort_key, predicate, extraction_key):
     kvs = as_kvs(rows)

     _sorted = sort_by_specified_key(kvs, sort_key)
+    first = find_first(_sorted, predicate)

-    _filtered = filter(predicate, _sorted)
-
-    if 0 < len(_filtered):
-        extracted_one = head(extract_by_specified_key(_filtered, extraction_key))
-        f.write([extracted_one])
-    else:
-        f.write([])
+    extracted = extract_by_specified_key(first, extraction_key)
+    f.write(extracted)

Both the ʻif clause and the ʻelse clause of the main body methods 1 and 2 fit into the particle size of" writing process without being aware of the list length " In a bad example, ʻif and ʻelse were a group of export processes, but it was bad that only ʻif` was cut out.

Next problem

step-5 Organize the cut out methods

policy

Difference

l.py


+# -*- coding: utf-8 -*-
+
+
+def head(xs):
+    return xs[0]
+
+
+def tail(xs):
+    return xs[1:]
+
+
+def find_first(xs, predicate):
+    for x in xs:
+        if predicate(x):
+            return [x]
+    else:
+        return []

d.py


+# -*- coding: utf-8 -*-
+
+
+def extract_by_specified_key(xs, key):
+    return map(lambda x: x[key], xs)
+
+
+def sort_by_specified_key(xs, key):
+    return sorted(xs, key=lambda x: x[key])
-def as_kvs(rows):
+def __as_kvs(rows):
-def error():
+def __error():

Why not cut out the seemingly convenient ʻas_kvs`

kvs is just a temporary data structure prepared by converting the data structure expected by csv.py only to make it easier to process internally.

kvs is actually dict, but the dict type does not appear in the public arguments or return value of csv.py. In other words, there is no dict type as a function provided by the module, so it should not be exposed.

In addition, the argument type of ʻas_kvs is list, but there are restrictions such as header + [body]configuration and that all columns match, sol.py and I thought that it was preferable to make csv.py private processing rather than d.py`

Next problem

step-6 Refactor the private method that is used only once (maybe pros and cons here)

When it comes to slightly larger modules, it can be extremely difficult to know where and how many privates are used.

If you do so, it may be difficult to refactor the private method or understand the range of influence, which can be quite difficult.

There are many privates, but each one is used only once.

public 1 -> private 1 -> private 2 -> private 3
public a -> private a -> private b
public x -> private x

It is much easier to grasp the whole picture if you think that there are actually three publics. You can refactor private with peace of mind.

An example of a mixture of private, which is used only once, and private, which depends on various parts.

public 1 -> private 1
            |
            V
public a -> private a -> private b
                         ^
                         |
public x -> private x -> private y

private 1 and private x can be changed to some extent easily, but if you fix private b lightly, it will affect all public.

policy

Try to define private, which is used only from one place, in the method

This is the method I tried to do without permission, but there is no proper exhibition or discussion, but I think it is a good move and I use it a lot in disposable cords and solo projects

Difference

Since __error is used only in one place, I prepared it with def in def

By the way, I deleted the annoying __ because it became invisible from the outside anyway.

 def write_all_or_error(rows, predicate, extraction_key):
+    def write_or_error(kvs):
+        if kvs:
+            f.write(kvs)
+        else:
+            raise Exception("no result")
+
     kvs = __as_kvs(rows)

     _filtered = filter(predicate, kvs)

     extracted = d.extract_by_specified_key(_filtered, extraction_key)
-    if extracted:
-        f.write(extracted)
-    else:
-        __error()
+    write_or_error(extracted)

Of course, __as_kvs is used from many places, so don't change it.

The final problem

step-7 Write the test separately for conversion and export

policy

So the responsibility of the module is limited to "conversion" and the test is implemented.

Difference

It is decided to end with return without" output " Along with that, the method name was changed from write_ to ʻextract_`.

-def write_first_one_or_empty(rows, sort_key, predicate, extraction_key):
+def extract_first_one_or_empty(rows, sort_key, predicate, extraction_key):
     kvs = __as_kvs(rows)

     _sorted = d.sort_by_specified_key(kvs, sort_key)
     first = l.find_first(_sorted, predicate)

-    extracted = d.extract_by_specified_key(first, extraction_key)
-    f.write(extracted)
+    return d.extract_by_specified_key(first, extraction_key)
-def write_all_or_empty(rows, predicate, extraction_key):
+def extract_all_or_empty(rows, predicate, extraction_key):
     kvs = __as_kvs(rows)

     _filtered = filter(predicate, kvs)

-    extracted = d.extract_by_specified_key(_filtered, extraction_key)
-    f.write(extracted)
+    return d.extract_by_specified_key(_filtered, extraction_key)
-def write_all_or_error(rows, predicate, extraction_key):
-    def write_or_error(kvs):
-        if kvs:
-            f.write(kvs)
+def extract_all_or_error(rows, predicate, extraction_key):
+    def it_or_error(xs):
+        if xs:
+            return xs
         else:
             raise Exception("no result")

     kvs = __as_kvs(rows)

     _filtered = filter(predicate, kvs)

     extracted = d.extract_by_specified_key(_filtered, extraction_key)
-    write_or_error(extracted)
+    return it_or_error(extracted)

As a result, ʻimport disappears, so it can be confirmed that csv.py` no longer processes file-io.

-from private import f

The test looks like this

csv_test.py


# -*- coding: utf-8 -*-

import csv

assert csv.extract_first_one_or_empty(
    [['status', 'code'], ['dead', '001'], ['active', '003'], ['active', '002']],
    'code', lambda kvs: kvs['status'] == 'active', 'code'
) == ['002']

assert csv.extract_first_one_or_empty(
    [['status', 'code'], ['dead', '001']],
    'code', lambda kvs: kvs['status'] == 'active', 'code'
) == []

The end

Completed form

csv.py


# -*- coding: utf-8 -*-

import l
import d


def __as_kvs(rows):
    keys = l.head(rows)
    value_rows = l.tail(rows)

    return [dict(zip(keys, value_row)) for value_row in value_rows]


def extract_first_one_or_empty(rows, sort_key, predicate, extraction_key):
    kvs = __as_kvs(rows)

    _sorted = d.sort_by_specified_key(kvs, sort_key)
    first = l.find_first(_sorted, predicate)

    return d.extract_by_specified_key(first, extraction_key)


def extract_all_or_empty(rows, predicate, extraction_key):
    kvs = __as_kvs(rows)

    _filtered = filter(predicate, kvs)

    return d.extract_by_specified_key(_filtered, extraction_key)


def extract_all_or_error(rows, predicate, extraction_key):
    def it_or_error(xs):
        if xs:
            return xs
        else:
            raise Exception("no result")

    kvs = __as_kvs(rows)

    _filtered = filter(predicate, kvs)

    extracted = d.extract_by_specified_key(_filtered, extraction_key)
    return it_or_error(extracted)

Compared to the first edition

Tightening

The point

I can't say it in a nutshell ...

Don't write worthless comments

Then what is the comment to write on the contrary?

Even so, if you think it's painful without comments

Bad-looking private method

Good private method

Differences in handling private and public methods

What to test

Do not standardize because the code looks similar

def in def

scala can be obedient

def upperJoin(xs: List[String], sep: String): String = {
    def upper(xs: List[String]): List[String] = xs.map(_.toUpperCase)
    def join(xs: List[String]): String = xs.mkString(sep)

    join(upper(xs))
}

upperJoin(List("hello", "world"), " ") // HELLO WORLD

Even in java, if you use Function <T, R> etc.

public static String upperJoin(List<String> xs, String sep) {
    Function<List<String>, List<String>> upper = _xs -> _xs.stream().map(String::toUpperCase).collect(toList());
    Function<List<String>, String> join = _xs -> _xs.stream().collect(joining(sep));

    return join.apply(upper.apply(xs));
}

upperJoin(asList("hello", "world"), " "); // HELLO WORLD

can also havekell Or rather, the idea comes from the sense of defining haskell functions by value.

upperJoin xs sep = (join . upper) xs
  where
    upper = map (map toUpper)
    join = intercalate sep

upperJoin ["hello", "world"] " " -- HELLO WORLD

So the java example is close to haskell, and you can write it in python like this: (In PEP 8, naming and binding lambda is a violation of coding norms, but I hate nesting, so I'm more likely to be alone)

def upper_join(xs, sep):
    upper = lambda xs: [x.upper() for x in xs]
    join = lambda xs: sep.join(xs)

    return join(upper(xs))

upper_join(['hello', 'world'], ' ') # HELLO WORLD

Reflection

The end

I just wanted to try it, so I was satisfied

It doesn't matter, but when I see the black letters on the red and green spaces, I always think that it looks like a watermelon.

Recommended Posts

Try refactoring step by step
Step by Step for creating a Dockerfile
Try to implement and understand the segment tree step by step (python)
Low-rank approximation of images by HOSVD step by step
Try to face the integration by parts
Try to classify O'Reilly books by clustering