An example of reading CSV line by line while converting the field type while displaying the progress bar and doing something. The progress bar is click.
I will paste the code for the time being (It's also an example of how to write an iterator that can be used with with somehow ...)
Click here for field_converter.py: https://gist.github.com/naoyat/3db8cd96c8dcecb5caea This is the one from the previous article "I want to batch convert the result of" string ".split () in Python".
csv_iterator.py
import sys
import csv
import click
from field_converter import FieldConverter
class CSV_Iterator:
def __init__(self, path, skip_header=False, with_progress_bar=False,
field_converter=None):
self.path = path
self.with_progress_bar = with_progress_bar
self.field_converter = field_converter
self.f = open(path, 'r')
self.line_count = sum(1 for line in self.f)
self.f.seek(0) # rewind
self.r = csv.reader(self.f, dialect='excel')
if skip_header:
self.r.next()
self.line_count -= 1
print '(%d lines)' % (self.line_count,)
if self.with_progress_bar:
self.bar = click.progressbar(self.r, self.line_count)
def __iter__(self):
return self
def next(self):
try:
if self.with_progress_bar:
fields = self.bar.next()
else:
fields = self.r.next()
if self.field_converter:
try:
fields = self.field_converter.convert(fields)
except:
print sys.exc_info()
return fields
except:
raise StopIteration
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
if exc_type:
return False
if self.with_progress_bar:
print
self.f.close()
return True
I will put it on the gist. https://gist.github.com/naoyat/b1290d917638c412e140
Example of use.
example.py
from csv_iterator import CSV_Iterator
def foobar(csv_path):
with CSV_Iterator(csv_path,
skip_header=True,
with_progress_bar=True,
field_converter=FieldConverter(int, int, 'iso-8859-1', 'iso-8859-1', float)) as line:
for id, uid, title, query, target in line:
...
Recommended Posts