[PYTHON] Artificial data generation with numpy

You can output in csv format and scatter plot. The c_ and r_ operators are used.

Artificial data that can be created Circles and their combinations (circles in circles (2D), xor-like patterns (2D), intertwined circles (3D)) Twisted and intertwined ring (3D) Spherical shell of any dimension (spherical shell in the spherical shell) Lorenz attractor Rossler attractor

reference Marsaglia's method http://stackoverflow.com/questions/15880367/python-uniform-distribution-of-points-on-4-dimensional-sphere

gendata.py


# -*- coding: utf-8 -*-
"""
Created on Wed May 07 21:17:21 2014

@author: xiangze
"""

import csv
import numpy as np
#from matplotlib.pyplot import *
import matplotlib.pyplot as plt

PI=np.pi
PI2=2*PI

def gencircle(rc,rr=0.1,offset=[0,0],num=100,label=0):
    c=[]
    for i in range(num):
        r=rc+np.random.uniform(-rr,rr,1)
        th=np.random.uniform(0,PI2,1)
        c.append([r*np.sin(th)+offset[0],r*np.cos(th)+offset[1]])
    return np.c_[np.array(c).reshape(num,2),np.repeat(label,num)]
    
    
def genring(rc,rr=0.1,offset=[0,0,0],num=100,label=0,normaldir='x'):
    if(normaldir=='x'):
        a=gencircle(rc,rr,[offset[1],offset[2]],num,label)    
        return np.c_[np.repeat(offset[0],num),a[:,0],a[:,1],a[:,2]]
    elif(normaldir=='y'):
        a=gencircle(rc,rr,[offset[0],offset[2]],num,label)    
        return np.c_[a[:,0],np.repeat(offset[1],num),a[:,1],a[:,2]]
    else:
        a=gencircle(rc,rr,[offset[0],offset[1]],num,label)    
        return np.c_[a[:,0],a[:,1],np.repeat(offset[2],num),a[:,2]]
        
def gentwistedring0(rc=[1,0.3],rr=0.1,offset=[0,0,0],num=100,label=0,twistratio=3.0,phase=0):
    c=[]
    for i in range(num):
        r=rc[0]+np.random.uniform(-rr,rr,1)
        th=np.random.uniform(0,PI2,1)
        c1=[r*np.sin(th)+offset[0],r*np.cos(th)+offset[1],offset[2]]
        c2=[rc[1]*np.sin(th*twistratio+phase)*np.sin(th) , rc[1]*np.sin(th*twistratio+phase)*np.cos(th) ,rc[1]*np.cos(th*twistratio+phase)]

        c.append([c1[i]+c2[i] for i in range(len(c1))])
    return np.c_[np.array(c).reshape(num,3),np.repeat(label,num)]

    
def gentwistedring(rc=[1,0.3],rr=0.1,offset=[0,0,0],num=100,label=0,normaldir='x',twistratio=5.0,phase=0):
    a=gentwistedring0(rc,rr,offset,num,label,twistratio,phase)    
    if(normaldir=='x'):
        return a
    elif(normaldir=='y'):
        return np.c_[a[:,1],a[:,2],a[:0],a[:3]]
    else:
        return np.c_[a[:,2],a[:,0],a[:1],a[:3]]
    
#http://stackoverflow.com/questions/15880367/python-uniform-distribution-of-points-on-4-dimensional-sphere
#Marsaglia's method
def gensphere(rc,rr=0.1,offset=[0,0,0],num=100,label=0,dim=3):
    normal_deviates = np.random.normal(size=(dim, num))
    r=rc+np.random.uniform(-rr,rr,1)
    r = np.sqrt((normal_deviates**2).sum(axis=0))*r
    p =normal_deviates/r
    return np.c_[np.array(zip(*p)).reshape(num,dim),np.repeat(label,num)]

def gensphere0(rc,rr=0.1,offset=[0,0,0],num=100,label=0):
    c=[]
    n=int(np.sqrt(num))
    for ph in np.random.uniform(-PI,PI,n):
        for th in np.random.uniform(0,PI2,n):
            r=rc+np.random.uniform(-rr,rr,1)
            c.append([r*np.sin(th)*np.sin(ph)+offset[0],r*np.cos(th)*np.sin(ph)+offset[1],r*np.cos(ph)+offset[2]])
    return np.c_[np.array(c).reshape(num,3),np.repeat(label,num)]

def gensphere1(rc,rr=0.1,offset=[0,0,0],num=100,label=0):
    c=[]
    n=int(np.sqrt(num))
    for ph in np.random.uniform(-PI,PI,n):
        p=0
        if(p>=n):
            break
        else:
            m=int(np.abs(np.sin(ph)*n))
            if(m!=0):
                for th in np.random.uniform(0,PI2,m):
                    r=rc+np.random.uniform(-rr,rr,1)
                    c.append((r*np.sin(th)*np.sin(ph)+offset[0],r*np.cos(th)*np.sin(ph)+offset[1],r*np.cos(ph)+offset[2]))
                p=p+m
    l=len(c)
    return np.c_[np.array(c).reshape(l,3),np.repeat(label,l)]

def genlorenz(init=[0,0.1,0],offset=[0,0,0],rr=0.,num=100,p=10,r=28,b=2.66,label=0,dt=0.01):
    cc=[]
    x=init[0]
    y=init[1]
    z=init[2]
    for t in range(num):
        cc.append([x,y,z])
        x=x+dt*(-p*x+p*y)      +np.random.uniform(-rr,rr,1)
        y=y+dt*(-x*z+r*x-y)    +np.random.uniform(-rr,rr,1)
        z=z+dt*( x*y-b*z)      +np.random.uniform(-rr,rr,1)
    return np.c_[np.array(cc).reshape(num,3),np.repeat(label,num)]

def genrossler(init=[0,5,0],offset=[0,0,0],num=100,a=0.2,b=0.2,c=5.7,label=0,dt=0.05):
    cc=[]
    x=init[0]
    y=init[1]
    z=init[2]
    for t in range(num):
        cc.append([x,y,z])
        x=x+dt*(-y-z)
        y=y+dt*( x+a*y)
        z=z+dt*( b+z*(x-c))
    return np.c_[np.array(cc).reshape(num,3),np.repeat(label,num)]


def cshow2(data):
    cc=zip(*data)
    plt.scatter(cc[0],cc[1],c=cc[2])
    plt.draw()
    plt.show()

def cshow3(data):
    from mpl_toolkits.mplot3d import Axes3D
    fig=plt.figure()
    ax = Axes3D(fig)
    cc=zip(*data)
    ax.scatter(cc[0],cc[1],cc[2],c=cc[3])
    plt.draw()
    plt.show()

def test(data,dump=False,fname="test.csv"):
    if(data.shape[1]==3):
        cshow2(data)
    else:
        cshow3(data)
        
    if(dump):
        np.savetxt(fname,data,delimiter=",")

if __name__=="__main__":
    num=200
    circles=np.vstack([gencircle(1,0.1,num=num,label=0),gencircle(1,0.1,[-2,2],num=num,label=1)])
    test(circles)

#circle in circle
    cinc=np.r_[gencircle(1,0.1,num=num,label=0),gencircle(2,0.1,num=num,label=1)]
    test(cinc)

#XOR-like pattern
    xor0=np.r_[gencircle(0.5,num=num/2,offset=[0,0],label=0),gencircle(0.5,offset=[1,1],label=0)]
    xor1=np.r_[gencircle(0.5,num=num/2,offset=[0,1],label=1),gencircle(0.5,offset=[1,0],label=1)]
    xor=np.r_[xor0,xor1]    
    test(xor)

#3D ring
    rings=np.r_[genring(1,0.1,num=num,offset=[0,0,0],label=0,normaldir='x'),\
                genring(1,0.1,num=num,offset=[0,0,1],label=1,normaldir='y')]
    test(rings)   

    num=400
#sphere in sphere    
    sins=np.r_[gensphere(1,num=num,label=0),gensphere(2,num=num,label=1)]
    test(sins)
    
#twisted rings
    test(np.vstack([gentwistedring(num=num,label=0),gentwistedring(num=num,label=1,phase=PI)]))

    num=1000
    rossler=genrossler(num=num,dt=0.1)
    test(rossler)

    lorenz=genlorenz(num=num,dt=0.05)
    test(lorenz)

circles.png cinc.png xor_like.png rings.png sphere_in_sphere.png

twistedrings.png rossler.png

lorenz.png

Recommended Posts

Artificial data generation with numpy
Data set generation
Read a character data file with numpy
Convert data with shape (number of data, 1) to (number of data,) with numpy.
I tried DBM with Pylearn 2 using artificial data
Data analysis with python 2
Visualize data with Streamlit
Moving average with numpy
Reading data with TensorFlow
Data visualization with pandas
Data manipulation with Pandas!
Shuffle data with pandas
Data Augmentation with openCV
Getting Started with Numpy
[Python] Sorting Numpy data
Learn with Cheminformatics NumPy
Matrix concatenation with Numpy
Hamming code with numpy
Normarize data with Scipy
Regression analysis with NumPy
Data analysis with Python
Extend NumPy with Rust
LOAD DATA with PyMysql
[Python] Create structured array (store heterogeneous data with NumPy)
Relationship data learning with numpy and NetworkX (spectral clustering)
Automatic quiz generation with COTOHA
Kernel regression with Numpy only
Sample data created with python
I wrote GP with numpy
Embed audio data with Jupyter
Graph Excel data with matplotlib (1)
CNN implementation with just numpy
Predict candlesticks with artificial intelligence
Extract Twitter data with CSV
[Python] Calculation method with numpy
Try matrix operation with NumPy
Get Youtube data with python
Diffusion equation animation with NumPy
Debt repayment simulation with numpy
Implemented SMO with Python + NumPy
Sentence generation with GRU (keras)
Stick strings together with Numpy
Clustering ID-POS data with LDA
Learn new data with PaintsChainer
Binarize photo data with OpenCV
Graph Excel data with matplotlib (2)
Save tweet data with Django
Handle numpy arrays with f2py
Image caption generation with Chainer
Use OpenBLAS with numpy, scipy
Python3 | Getting Started with numpy
Data processing tips with Pandas
Interpolate 2D data with scipy.interpolate.griddata
Artificial data set (sine function)
Implementing logistic regression with NumPy
Read json data with python
Save & load data with joblib, pickle
Perform least squares fitting with numpy.
Accelerate query generation with SQLAlchemy ORM
How to deal with imbalanced data
How to deal with imbalanced data