VBA user tried using Python / R: String manipulation (continued)

Introduction

I am a VBA user who started studying machine learning. As a memorandum, I would like to summarize the Python / R grammar while comparing it with VBA.

table of contents

String manipulation

In the previous Article, I compared Python and R string operations with VBA, but in the continuation, I will try using the R string operation package stringr.

Reference article states that "character string processing is possible even with the functions provided by the R standard base package, but stringr is easier to use with rational behavior for a unified interface." , When you actually use it, it is certainly impressive and easy to use because the function naming and the order of the arguments are uniform. Reference: stringr — Process R strings in a decent way

String concatenation

R(stringr)

R


library(stringr)

s1 <- "abc"
s2 <- "def"
s3 <- "ghij"
str_c(s1, s2, s3)
# "abcdefghij"

String length

R(stringr)

R


s <- "abcdefghij"
str_length(s)
# 10

Extract string

R(stringr)

R


s <- "abcdefghij"
str_sub(s, 1, 2)
# "ab"
str_sub(s, -2, -1)
# "ij"
str_sub(s, 4, 6)
# "def"

Search for strings

R(stringr)

R


s <- "abcdefghij"
t <- str_c(s, s, sep="") # "abcdefghijabcdefghij"
str_detect(s, "def")
# TRUE
str_detect(t, "def")
# TRUE
str_count(s, "def")
# 1
str_count(t, "def")
# 2
str_locate(s, "def")
#      start end
# [1,]     4   6
str_locate(t, "def")
#      start end
# [1,]     4   6
class(str_locate(t, "def"))
# "matrix"
str_locate_all(t, "def")
# [[1]]
#      start end
# [1,]     4   6
# [2,]    14  16
class(str_locate_all(t, "def"))
# "list"

String replacement

R(stringr)

R


s <- "abcdefghij"
t <- str_c(s, s, sep="") # "abcdefghijabcdefghij"
str_replace(s, "def", "DEF")
# "abcDEFghij"
str_replace(t, "def", "DEF")
# "abcDEFghijabcdefghij"
str_replace_all(t, "def", "DEF")
# "abcDEFghijabcDEFghij"

String conversion

Uppercase and lowercase conversion

R(stringr)

R


s <- "abcDEFghij"
str_to_upper(s)    #Uppercase
# "ABCDEFGHIJ"
str_to_lower(s)    #To lowercase
# "abcdefghij"
str_to_title(s)    #Uppercase only at the beginning, lowercase otherwise
# "abcdefghij"
str_to_sentence(s) #Uppercase only at the beginning, lowercase otherwise
# "Abcdefghij"

ss <- "abc def ghij"
str_to_title(ss)
# "Abc Def Ghij"
str_to_sentence(ss)
# "Abc def ghij"

t <- ""
for (i in 1:str_length(s)) {
  stemp = str_sub(s,i,i)
  if (stemp == str_to_lower(stemp)) {
    stemp = str_to_upper(stemp)
  } else if (stemp == str_to_upper(stemp)) {
    stemp = str_to_lower(stemp)
  }
  t <- str_c(t, stemp)
}
t                     #Swapping uppercase and lowercase letters
# "ABCdefGHIJ"
s == str_to_upper(s)  #Judgment of all uppercase letters
# FALSE
s == str_to_lower(s)  #Judgment of all lowercase letters
# FALSE

Full-width and half-width conversion

R(stringr)

R


Inversion of string

R(stringr)

R


s <- "abcdefghij"
t <- ""
for (i in 1:str_length(s)) {
  t <- str_c(t, str_sub(s, -i, -i))
}
t
# "jihgfedcba"

Repeat string

R(stringr)

R


str_dup("A", 3)
# "AAA"
str_dup("def", 3)
# "defdefdef"

space

Space string

R(stringr)

R


str_c("-", str_dup(" ", 3), "-")
# "-   -"
# "-   -"
s <- str_c(str_dup(" ", 2), "d",
           str_dup(" ", 3), "e",
           str_dup(" ", 4), "f",
           str_dup(" ", 5))
str_c("-", s, "-")
# "-  d   e    f     -"

Delete unnecessary space before and after

R(stringr)

R


str_trim(s, side="left")
# "d   e    f     "
str_trim(s, side="right")
# "  d   e    f"
str_trim(s, side="both")
# "d   e    f"

About string vector

The functions in the stringr package can be used not only for strings (single strings), but also for strings and data frames. For example, if you use the str_length function on a string vector consisting of 3 strings, a vector consisting of 3 numbers will be returned as a result of using the str_length function for each element string.

R(stringr)

R


s1 <- "abcdefghij"
s2 <- "cdefghijkl"
s3 <- "efghijklmn"
ss <- c(s1, s2, s3)
ss
# [1] "abcdefghij" "cdefghijkl" "efghijklmn"

str_c(ss, "_1")
# [1] "abcdefghij_1" "cdefghijkl_1" "efghijklmn_1" 

str_length(ss)
# [1] 10 10 10

str_sub(ss, 1, 2)
# [1] "ab" "cd" "ef"
str_sub(ss, -2, -1)
# [1] "ij" "kl" "mn"
str_sub(ss, 2, 3)
# [1] "bc" "de" "fg"

str_detect(ss, "def")
# [1]  TRUE  TRUE FALSE
str_count(ss, "def")
# [1] 1 1 0
str_locate(ss, "def")
#      start end
# [1,]     4   6
# [2,]     2   4
# [3,]    NA  NA
str_locate_all(ss, "def")
# [[1]]
#      start end
# [1,]     4   6
# 
# [[2]]
#      start end
# [1,]     2   4
# 
# [[3]]
#      start end
# 

str_replace(ss, "def", "DEF")
# [1] "abcDEFghij" "cDEFghijkl" "efghijklmn"
str_replace_all(ss, "def", "DEF")
# [1] "abcDEFghij" "cDEFghijkl" "efghijklmn"

str_to_upper(ss)
# [1] "ABCDEFGHIJ" "CDEFGHIJKL" "EFGHIJKLMN"
str_to_lower(ss)
# [1] "abcdefghij" "cdefghijkl" "efghijklmn"
str_to_title(ss)
# [1] "Abcdefghij" "Cdefghijkl" "Efghijklmn"
str_to_sentence(ss)
# [1] "Abcdefghij" "Cdefghijkl" "Efghijklmn"

ss == str_to_upper(ss)
# [1] FALSE FALSE FALSE
ss == str_to_lower(ss)
# [1] TRUE TRUE TRUE

str_dup(ss, 2)
# [1] "abcdefghijabcdefghij" "cdefghijklcdefghijkl" "efghijklmnefghijklmn"

tt <- str_c(" ", ss, " _1 ")
tt
# [1] " abcdefghij _1 " " cdefghijkl _1 " " efghijklmn _1 "
str_trim(tt)
# [1] "abcdefghij _1" "cdefghijkl _1" "efghijklmn _1"
str_trim(tt, side="left")
# [1] "abcdefghij _1 " "cdefghijkl _1 " "efghijklmn _1 "
str_trim(tt, side="right")
# [1] " abcdefghij _1" " cdefghijkl _1" " efghijklmn _1"

I would like to summarize vectors and data frames in another article.

Summary

List

List the character string manipulation functions used in each language. For comparison, the calculation in EXCEL is also shown. s1 = "abc" s2 = "def" s3 = "ghij" s = "abcdefghij" t = "abcdefghijabcdefghij" u = "abcDEFghij" v = "abcDEFghij" w = " d e f " will do. Also, in each EXCEL cell A1 cell: = "abc" Cell A2: = "def" A3 cell: = "ghij" A4 cell: = "abcdefghij" Cell A5: = "abcdefghijabcdefghij" Cell A6: = "abcDEFghij" Cell A7: = "abcDEFghij" Cell A8: = "d e f" Is entered.

Basic operation of strings

Python R R(stringr) VBA EXCEL result
Join s1 + s2 + s3 paste0(s1, s2, s3)
paste(s1, s2, s3, sep="")
str_c(s1, s2, s3) s1 & s2 & s3 =A1&A2&A3
=CONCATENATE(
A1,A2,A3)
abcdefghij
length len(s) nchar(s) str_length(s) Len(s) =LEN(A4) 10
Inversion s[::-1] StrReverse(s) jihgfedcba
repetition 'A' * 3 str_dup("A", 3) String(3, "A") =REPT("A",3) AAA
repetition 'def' * 3 str_dup("def", 3) =REPT("def",3) defdefdef

Extract string

Python R R(stringr) VBA EXCEL result
From the left s[8:10]
s[0:2]
s[:2]
substr(s, 1, 2)
substring(s, 1, 2)
str_sub(s, 1, 2) Left(s, 2) =LEFT(A4,2) ab
From the right s[len(s)-2:len(s)]
s[-2:]
substr(s,
nchar(s)-2+1,
nchar(s))
str_sub(s, -2, -1) Right(s, 2) =RIGHT(A4,2) ij
On the way s[3:6] substr(s, 4, 6) str_sub(s, 4, 6) Mid(s, 4, 3) =MID(A4,4,3) def

Note) Regarding the extraction of the character string "in the middle", the Python and R functions specify "where to where", but the VBA and EXCEL functions specify "where and how many characters".

Search for strings

Python R R(stringr) VBA EXCEL result
Search s.find('def') str_locate(s, "def") InStr(1, s, "def") =FIND("def",A4,1)
=SEARCH("def",A4,1)
3,4
Search from behind t.rfind('def') InStrRev(t, "def") 13,14
count t.count('def') str_count(t, "def") 2

Note) See above for the str_detect and str_locate functions.

String replacement

Python R R(stringr) VBA EXCEL result
Replacement s.replace('def', 'DEF') sub("def", "DEF", s) str_replace(s, "def", "DEF") Replace(s, "def", "DEF") =SUBSTITUTE(
A4,"def","DEF")
=REPLACE(A4,
FIND("def",A4),
LEN("def"),"DEF")
abcDEFghij
Replace only the first one sub("def", "DEF", t) str_replace(t, "def", "DEF") abcDEFghij
abcdefghij
Replace all t.replace('def', 'DEF') gsub("def", "DEF", t) str_replace_all(t, "def", "DEF") Replace(t, "def", "DEF") =SUBSTITUTE(
A5,"def","DEF")
abcDEFghij
abcDEFghij

String conversion

Python R R(stringr) VBA EXCEL result
Uppercase u.upper() toupper(u) str_to_upper(u) UCase(u) =UPPER(A6) ABCDEFGHIJ
To lowercase u.lower() tolower(u) str_to_lower(u) LCase(u) =LOWER(A6) abcdefghij
Uppercase only at the beginning, lowercase otherwise u.capitalize() str_to_title(u)
str_to_sentence(u)
StrConv(u, vbProperCase) =PROPER(A6) Abcdefghij
Swap uppercase and lowercase u.swapcase() chartr("A-Za-z", "a-zA-z", u) ABCdefGHIJ
Judgment of capital letters u.isupper() u == toupper(u) u == str_to_upper(u) False
Judgment of lowercase letters u.islower() u == tolower(u) u == str_to_lower(u) False
Full-width chartr("A-Za-z", "A-Za-z", u) StrConv(u, vbWide) =JIS(A6) abcDEFghij
Half-width chartr("A-Za-z", "A-Za-z", v) StrConv(v, vbNarrow) =ASC(A7) abcDEFghij

String space

Python R R(stringr) VBA EXCEL result
space ' ' * 3 str_dup(" ", 3) Space(3) =REPT(" ",3) " "
Remove spaces on both sides w.strip(' ') str_trim(s, side="both") Trim(w) =TRIM(A8) "d e f"
Delete left space w.lstrip(' ') str_trim(s, side="left") LTrim(w) "d e f "
Delete right space w.rstrip(' ') str_trim(s, side="right") RTrim(w) " d e f"

Note) EXCEL's TRIM function is deleted to become d e f except for one space in the character string.

Whole program

The whole program used for reference is shown. See Last article for Python and VBA code.

R(stringr)

R


library(stringr)

#String concatenation
s1 <- "abc"
s2 <- "def"
s3 <- "ghij"
str_c(s1, s2, s3)
# "abcdefghij"

#String length
s <- "abcdefghij"
str_length(s)
# 10

#Extract string
s <- "abcdefghij"
str_sub(s, 1, 2)
# "ab"
str_sub(s, -2, -1)
# "ij"
str_sub(s, 4, 6)
# "def"

#Search for strings
s <- "abcdefghij"
t <- str_c(s, s, sep="") # "abcdefghijabcdefghij"
str_detect(s, "def")
# TRUE
str_detect(t, "def")
# TRUE
str_count(s, "def")
# 1
str_count(t, "def")
# 2
str_locate(s, "def")
#      start end
# [1,]     4   6
str_locate(t, "def")
#      start end
# [1,]     4   6
class(str_locate(t, "def"))
# "matrix"
str_locate_all(t, "def")
# [[1]]
#      start end
# [1,]     4   6
# [2,]    14  16
class(str_locate_all(t, "def"))
# "list"

#String replacement
s <- "abcdefghij"
t <- str_c(s, s, sep="") # "abcdefghijabcdefghij"
str_replace(s, "def", "DEF")
# "abcDEFghij"
str_replace(t, "def", "DEF")
# "abcDEFghijabcdefghij"
str_replace_all(t, "def", "DEF")
# "abcDEFghijabcDEFghij"

#Converting case of character string
s <- "abcDEFghij"
str_to_upper(s)    #Uppercase
# "ABCDEFGHIJ"
str_to_lower(s)    #To lowercase
# "abcdefghij"
str_to_title(s)    #Uppercase only at the beginning, lowercase otherwise
# "abcdefghij"
str_to_sentence(s) #Uppercase only at the beginning, lowercase otherwise
# "Abcdefghij"

ss <- "abc def ghij"
str_to_title(ss)
# "Abc Def Ghij"
str_to_sentence(ss)
# "Abc def ghij"

t <- ""
for (i in 1:str_length(s)) {
  stemp = str_sub(s,i,i)
  if (stemp == str_to_lower(stemp)) {
    stemp = str_to_upper(stemp)
  } else if (stemp == str_to_upper(stemp)) {
    stemp = str_to_lower(stemp)
  }
  t <- str_c(t, stemp)
}
t                     #Swapping uppercase and lowercase letters
# "ABCdefGHIJ"
s == str_to_upper(s)  #Judgment of all uppercase letters
# FALSE
s == str_to_lower(s)  #Judgment of all lowercase letters
# FALSE

#Inversion of string
s <- "abcdefghij"
t <- ""
for (i in 1:str_length(s)) {
  t <- str_c(t, str_sub(s, -i, -i))
}
t
# "jihgfedcba"

#Repeat string
str_dup("A", 3)
# "AAA"
str_dup("def", 3)
# "defdefdef"

#String space
str_c("-", str_dup(" ", 3), "-")
# "-   -"
# "-   -"
s <- str_c(str_dup(" ", 2), "d",
           str_dup(" ", 3), "e",
           str_dup(" ", 4), "f",
           str_dup(" ", 5))
str_c("-", s, "-")
# "-  d   e    f     -"

#Remove spaces before and after the string
str_trim(s, side="left")
# "d   e    f     "
str_trim(s, side="right")
# "  d   e    f"
str_trim(s, side="both")
# "d   e    f"


#String vector
s1 <- "abcdefghij"
s2 <- "cdefghijkl"
s3 <- "efghijklmn"
ss <- c(s1, s2, s3)
ss
# [1] "abcdefghij" "cdefghijkl" "efghijklmn"

str_c(ss, "_1")
# [1] "abcdefghij_1" "cdefghijkl_1" "efghijklmn_1" 

str_length(ss)
# [1] 10 10 10

str_sub(ss, 1, 2)
# [1] "ab" "cd" "ef"
str_sub(ss, -2, -1)
# [1] "ij" "kl" "mn"
str_sub(ss, 2, 3)
# [1] "bc" "de" "fg"

str_detect(ss, "def")
# [1]  TRUE  TRUE FALSE
str_count(ss, "def")
# [1] 1 1 0
str_locate(ss, "def")
#      start end
# [1,]     4   6
# [2,]     2   4
# [3,]    NA  NA
str_locate_all(ss, "def")
# [[1]]
#      start end
# [1,]     4   6
# 
# [[2]]
#      start end
# [1,]     2   4
# 
# [[3]]
#      start end
# 

str_replace(ss, "def", "DEF")
# [1] "abcDEFghij" "cDEFghijkl" "efghijklmn"
str_replace_all(ss, "def", "DEF")
# [1] "abcDEFghij" "cDEFghijkl" "efghijklmn"

str_to_upper(ss)
# [1] "ABCDEFGHIJ" "CDEFGHIJKL" "EFGHIJKLMN"
str_to_lower(ss)
# [1] "abcdefghij" "cdefghijkl" "efghijklmn"
str_to_title(ss)
# [1] "Abcdefghij" "Cdefghijkl" "Efghijklmn"
str_to_sentence(ss)
# [1] "Abcdefghij" "Cdefghijkl" "Efghijklmn"

ss == str_to_upper(ss)
# [1] FALSE FALSE FALSE
ss == str_to_lower(ss)
# [1] TRUE TRUE TRUE

str_dup(ss, 2)
# [1] "abcdefghijabcdefghij" "cdefghijklcdefghijkl" "efghijklmnefghijklmn"

tt <- str_c(" ", ss, " _1 ")
tt
# [1] " abcdefghij _1 " " cdefghijkl _1 " " efghijklmn _1 "
str_trim(tt)
# [1] "abcdefghij _1" "cdefghijkl _1" "efghijklmn _1"
str_trim(tt, side="left")
# [1] "abcdefghij _1 " "cdefghijkl _1 " "efghijklmn _1 "
str_trim(tt, side="right")
# [1] " abcdefghij _1" " cdefghijkl _1" " efghijklmn _1"

reference

Recommended Posts

VBA user tried using Python / R: String manipulation (continued)
VBA user tried using Python / R: string manipulation
VBA user tried using Python / R: Matrix
VBA user tried using Python / R: Iterative processing
VBA user tried using Python / R: conditional branching
VBA user tried using Python / R: basic grammar
VBA users tried using Python / R: basic arithmetic operations
Python string manipulation master
[Python] I tried using OpenPose
String date manipulation in Python
I tried using Thonny (Python / IDE)
[Python] I tried using YOLO v3
I tried using UnityCloudBuild API from Python
Python string
vprof --I tried using the profiler for Python
I tried web scraping using python and selenium
I tried object detection using Python and OpenCV
I tried using mecab with python2.7, ruby2.3, php7
I tried reading a CSV file using Python
I tried using the Datetime module by Python