[PYTHON] Multi-condition data handling

I am creating a class that stores data of multiple conditions in each directory.

To set the conditions, separate each category with a list and put them in the class. root_path is the directory that stores the directories to be created.

a = handle_multivalue([["aaa","bbb","ccc"],["asd","dfg"],["1","2","3","4","5"]], root_path="root")

The list of paths is stored in pathlst.

print a.pathlst

output

['aaa/asd/1', 'aaa/asd/2', 'aaa/asd/3', 'aaa/asd/4', 'aaa/asd/5', 'aaa/dfg/1', 'aaa/dfg/2', 'aaa/dfg/3', 'aaa/dfg/4', 'aaa/dfg/5', 'bbb/asd/1', 'bbb/asd/2', 'bbb/asd/3', 'bbb/asd/4', 'bbb/asd/5', 'bbb/dfg/1', 'bbb/dfg/2', 'bbb/dfg/3', 'bbb/dfg/4', 'bbb/dfg/5', 'ccc/asd/1', 'ccc/asd/2', 'ccc/asd/3', 'ccc/asd/4', 'ccc/asd/5', 'ccc/dfg/1', 'ccc/dfg/2', 'ccc/dfg/3', 'ccc/dfg/4', 'ccc/dfg/5']

After analyzing the data of each condition, store the data in the above path.

You can sort the categories with the following methods. The order of the paths stored in pathlst changes.

a.transpose([1,2,0])
print a.pathlst

output

['aaa/asd/1', 'bbb/asd/1', 'ccc/asd/1', 'aaa/dfg/1', 'bbb/dfg/1', 'ccc/dfg/1', 'aaa/asd/2', 'bbb/asd/2', 'ccc/asd/2', 'aaa/dfg/2', 'bbb/dfg/2', 'ccc/dfg/2', 'aaa/asd/3', 'bbb/asd/3', 'ccc/asd/3', 'aaa/dfg/3', 'bbb/dfg/3', 'ccc/dfg/3', 'aaa/asd/4', 'bbb/asd/4', 'ccc/asd/4', 'aaa/dfg/4', 'bbb/dfg/4', 'ccc/dfg/4', 'aaa/asd/5', 'bbb/asd/5', 'ccc/asd/5', 'aaa/dfg/5', 'bbb/dfg/5', 'ccc/dfg/5']

dump_all_path creates all paths in pathlst in root_path.

a.dump_all_path()

If you enter root_path in the class, you can get the list of each condition and the full path from the created directories.

b = handle_multivalue("root")
print b.condlst
print b.pathlst

output

[['aaa', 'bbb', 'ccc'], ['asd', 'dfg'], ['1', '2', '3', '4', '5']]


['aaa/asd/1', 'aaa/asd/2', 'aaa/asd/3', 'aaa/asd/4', 'aaa/asd/5', 'aaa/dfg/1', 'aaa/dfg/2', 'aaa/dfg/3', 'aaa/dfg/4', 'aaa/dfg/5', 'bbb/asd/1', 'bbb/asd/2', 'bbb/asd/3', 'bbb/asd/4', 'bbb/asd/5', 'bbb/dfg/1', 'bbb/dfg/2', 'bbb/dfg/3', 'bbb/dfg/4', 'bbb/dfg/5', 'ccc/asd/1', 'ccc/asd/2', 'ccc/asd/3', 'ccc/asd/4', 'ccc/asd/5', 'ccc/dfg/1', 'ccc/dfg/2', 'ccc/dfg/3', 'ccc/dfg/4', 'ccc/dfg/5']

The script is shown below.

class handle_multivalue():
    def __init__(self, obj, root_path="tmpdir"):
        self.idx = "ijklmnop"
        self.ud = "ddddddd"
        if type(obj) == str:
            self.root_path = obj
            self.get_all_path_from_dir()
        else:
            self.condlst = obj
            self.root_path = root_path
            self.get_all_path()
    
    def get_all_path(self):
        arrs = []
        lenlst = []
    
        for num, i in enumerate(self.condlst):
            arr = arange(len(i))
            lenlst += [len(i)]
            arrs += [tensor(arr, idx=self.idx[num], ud="d")]
        
        self.shape = array(lenlst)
        
        o = tensor(ones(lenlst), self.idx[:len(lenlst)], self.ud[:len(lenlst)])
    
        self.cond_idx = []
        for arr in arrs:
            ret = arr * o
            ret.transpose(self.idx[:len(lenlst)])
            self.cond_idx += [ret.arr.flatten()]
        
        lst = []
    
        for cond, idx in zip(self.condlst, self.cond_idx):
            lst += [array(cond)[idx.astype(int)]]
    
        self.pathlst = ["/".join(x).strip("/") for x in array(lst).T]
    
    def get_all_conds(self):
        idx = "ijklmnop"
        ud = "ddddddd"

        gen_dir_path(self.root_path, [])
        self.cond_idx = self.all_conds_2d()
    
        lst = []
        for cond, idx in zip(self.condlst, self.conidx):
            lst += [array(cond)[idx.astype(int)]]
        self.conds2d = array(lst).T 
        
    
    def get_all_path_from_dir(self):
        self.condlst = []
        self.gen_dir_path(self.root_path, [])
        self.cond_idx = self.all_conds_2d()
    
        lst = []
        for cond, idx in zip(self.condlst, self.cond_idx):
            lst += [array(cond)[idx.astype(int)]]
        self.pathlst = ["/".join(x).strip("/") for x in array(lst).T]
            
    def all_conds_2d(self):

        o = tensor(ones(tuple(self.shape)), self.idx[:len(self.condlst)],self.ud[:len(self.condlst)])
        retlst = []
        for num,i in enumerate(self.condlst):
            ret = tensor(arange(len(i)), self.idx[num], self.ud[num]) * o
            ret.transpose(self.idx[:len(self.shape)])
            retlst += [ret.arr.flatten()]
        
        return retlst
    
    def gen_dir_path(self, path, condlst):
        lst = glob(path+"/*")
        if len(lst) == 0:
            self.shape = array([len(x) for x in condlst])
            self.condlst = condlst
            return 0
        cond = [x.split("/")[-1] for x in lst]
        condlst += [cond]
        self.gen_dir_path(lst[0], condlst)
    
    def transpose(self, tlst):
        import copy
        tmp_condlst = copy.deepcopy(self.condlst)
        self.condlst = array(self.condlst)[array(tlst)]
        self.shape = array([len(x) for x in self.condlst])
        self.cond_idx = self.all_conds_2d()
        lst = []
        for cond, idx in zip(self.condlst, self.cond_idx):
            lst += [array(cond)[idx.astype(int)]]
        #print lst
        t_arr = array(lst)[argsort(tlst)]
        self.condlst = tmp_condlst
        self.pathlst = ["/".join(x).strip("/") for x in t_arr.T]
        
    def view(self):
        import matplotlib.pyplot as plt
        for n,i in enumerate(self.pathlst):
            if not n%a.shape[-1]: 
                plt.figure()
            x = loadtxt(self.root_path + "/" + i)
            plt.plot(x, label = i) 
            plt.legend()
        plt.show()
    
    def dump_all_path(self):
        for i in self.pathlst:
            os.makedirs(self.root_path + "/" + i)
        


Recommended Posts

Multi-condition data handling
Data handling
Data handling 3 (development) About data format
Data handling 2 Analysis of various data formats
Data handling 1 Data formatting and file input / output
Exception handling
A story stuck with handling Python binary data
[Introduction to cx_Oracle] (5th) Handling of Japanese data
Python Application: Data Handling Part 2: Parsing Various Data Formats
[Data science memorandum] Handling of missing values ​​[python]