Supported Functions

using Tidier
using TidierStrings
using DataFrames, Chain

df = DataFrame(
  Names = ["Alice", "Bob", "Charlie", "Dave", "Eve", "Frank", "Grace"],
  City = ["New York        2019-20", "Los    \n\n\n\n\n\n    Angeles 2007-12 2020-21", "San Antonio 1234567890         ", "       New York City", "LA         2022-23", "Philadelphia            2023-24", "San Jose               9876543210"],
  Occupation = ["Doctor", "Engineer", "Final Artist", "Scientist", "Physician", "Lawyer", "Teacher"],
  Description = ["Alice is a doctor in New York",
                 "Bob is is is an engineer in Los Angeles",
                 "Charlie is an artist in Chicago",
                 "Dave is a scientist in Houston",
                 "Eve is a physician  in Phoenix",
                 "Frank is a lawyer in Philadelphia",
                 "Grace is a teacher in San Antonio"]
)

7×4 DataFrame

Row	Names	City	Occupation	Description
	String	String	String	String
1	Alice	New York 2019-20	Doctor	Alice is a doctor in New York
2	Bob	Los \n\n\n\n\n\n Angeles 2007-12 2020-21	Engineer	Bob is is is an engineer in Los Angeles
3	Charlie	San Antonio 1234567890	Final Artist	Charlie is an artist in Chicago
4	Dave	New York City	Scientist	Dave is a scientist in Houston
5	Eve	LA 2022-23	Physician	Eve is a physician in Phoenix
6	Frank	Philadelphia 2023-24	Lawyer	Frank is a lawyer in Philadelphia
7	Grace	San Jose 9876543210	Teacher	Grace is a teacher in San Antonio

Support Regex: str_detect, str_replace, str_replace_all, str_remove, str_remove_all str_count, str_equal, str_subset

`str_squish()`¤

Removes leading and trailing white spaces from a string and also replaces consecutive white spaces in between words with a single space. It will also remove new lines.

df = @chain df begin
    @mutate(City = str_squish(City))
end

7×4 DataFrame

Row	Names	City	Occupation	Description
	String	String	String	String
1	Alice	New York 2019-20	Doctor	Alice is a doctor in New York
2	Bob	Los Angeles 2007-12 2020-21	Engineer	Bob is is is an engineer in Los Angeles
3	Charlie	San Antonio 1234567890	Final Artist	Charlie is an artist in Chicago
4	Dave	New York City	Scientist	Dave is a scientist in Houston
5	Eve	LA 2022-23	Physician	Eve is a physician in Phoenix
6	Frank	Philadelphia 2023-24	Lawyer	Frank is a lawyer in Philadelphia
7	Grace	San Jose 9876543210	Teacher	Grace is a teacher in San Antonio

`str_replace()`, `str_replace_all`¤

Replaces the first occurrence of a pattern in a string with a specified text. Takes a string, pattern to search for, and the replacement text as arguments. It also supports the use of regex and logical operator | . This is in contrast to str_replace_all() which will replace each occurence of a match within a string.

@chain df begin
  @mutate(City = str_replace(City, r"\s*20\d{2}-\d{2,4}\s*", " ####-## "))
  #@mutate(Occupation = str_replace_all(Occupation, "Doctor | Physician ", "Doctor"))
  @mutate(Description = str_replace(Description, "is | a", "4 "))

end

7×4 DataFrame

Row	Names	City	Occupation	Description
	String	String	String	String
1	Alice	New York ####-##	Doctor	Alice 4 a doctor in New York
2	Bob	Los Angeles ####-## 2020-21	Engineer	Bob 4 is is an engineer in Los Angeles
3	Charlie	San Antonio 1234567890	Final Artist	Charlie 4 an artist in Chicago
4	Dave	New York City	Scientist	Dave 4 a scientist in Houston
5	Eve	LA ####-##	Physician	Eve 4 a physician in Phoenix
6	Frank	Philadelphia ####-##	Lawyer	Frank 4 a lawyer in Philadelphia
7	Grace	San Jose 9876543210	Teacher	Grace 4 a teacher in San Antonio

`str_remove()`, `str_remove_all()`¤

These functions will remove the first occurence or all occurences of a match, respectively.

@chain df begin
    @mutate(split = str_remove_all(Description, "is"))
end

7×5 DataFrame

Row	Names	City	Occupation	Description	split
	String	String	String	String	String
1	Alice	New York 2019-20	Doctor	Alice is a doctor in New York	Alice a doctor in New York
2	Bob	Los Angeles 2007-12 2020-21	Engineer	Bob is is is an engineer in Los Angeles	Bob an engineer in Los Angeles
3	Charlie	San Antonio 1234567890	Final Artist	Charlie is an artist in Chicago	Charlie an artt in Chicago
4	Dave	New York City	Scientist	Dave is a scientist in Houston	Dave a scientt in Houston
5	Eve	LA 2022-23	Physician	Eve is a physician in Phoenix	Eve a physician in Phoenix
6	Frank	Philadelphia 2023-24	Lawyer	Frank is a lawyer in Philadelphia	Frank a lawyer in Philadelphia
7	Grace	San Jose 9876543210	Teacher	Grace is a teacher in San Antonio	Grace a teacher in San Antonio

`str_detect()`¤

Checks if a pattern exists in a string. It takes a string and a pattern as arguments and returns a boolean indicating the presence of the pattern in the string. This can be used inside of @filter, @mutate, if_else() and case_when(). str_detect supports logical operators | and &. case_when() with filter() and str_detect().

@chain df begin
    @mutate(Occupation = if_else(str_detect(Occupation, "Doctor | Physician"), "Physician", Occupation))
    @filter(str_detect(Description, "artist | doctor"))
end

2×4 DataFrame

Row	Names	City	Occupation	Description
	String	String	String	String
1	Alice	New York 2019-20	Physician	Alice is a doctor in New York
2	Charlie	San Antonio 1234567890	Final Artist	Charlie is an artist in Chicago

and

@chain df begin
    @mutate(state = case_when(str_detect(City, "NYC | New York") => "NY",
        str_detect(City, "LA | Los Angeles | San & Jose") => "CA", true => "other"))
end

7×5 DataFrame

Row	Names	City	Occupation	Description	state
	String	String	String	String	String
1	Alice	New York 2019-20	Doctor	Alice is a doctor in New York	NY
2	Bob	Los Angeles 2007-12 2020-21	Engineer	Bob is is is an engineer in Los Angeles	CA
3	Charlie	San Antonio 1234567890	Final Artist	Charlie is an artist in Chicago	other
4	Dave	New York City	Scientist	Dave is a scientist in Houston	NY
5	Eve	LA 2022-23	Physician	Eve is a physician in Phoenix	CA
6	Frank	Philadelphia 2023-24	Lawyer	Frank is a lawyer in Philadelphia	other
7	Grace	San Jose 9876543210	Teacher	Grace is a teacher in San Antonio	CA

`str_equal()`¤

Checks if two strings are exactly the same. Takes two strings as arguments and returns a boolean indicating whether the strings are identical.

@chain df begin
    @mutate(Same_City = case_when(str_equal(City, Occupation) => "Yes", true => "No"))
end

7×5 DataFrame

Row	Names	City	Occupation	Description	Same_City
	String	String	String	String	String
1	Alice	New York 2019-20	Doctor	Alice is a doctor in New York	No
2	Bob	Los Angeles 2007-12 2020-21	Engineer	Bob is is is an engineer in Los Angeles	No
3	Charlie	San Antonio 1234567890	Final Artist	Charlie is an artist in Chicago	No
4	Dave	New York City	Scientist	Dave is a scientist in Houston	No
5	Eve	LA 2022-23	Physician	Eve is a physician in Phoenix	No
6	Frank	Philadelphia 2023-24	Lawyer	Frank is a lawyer in Philadelphia	No
7	Grace	San Jose 9876543210	Teacher	Grace is a teacher in San Antonio	No

`str_to_upper()`, `str_to_lower()`¤

These will take a string and convert it to all uppercase or lowercase

@chain df begin
    @mutate(Names = str_to_upper(Names))
end

7×4 DataFrame

Row	Names	City	Occupation	Description
	String	String	String	String
1	ALICE	New York 2019-20	Doctor	Alice is a doctor in New York
2	BOB	Los Angeles 2007-12 2020-21	Engineer	Bob is is is an engineer in Los Angeles
3	CHARLIE	San Antonio 1234567890	Final Artist	Charlie is an artist in Chicago
4	DAVE	New York City	Scientist	Dave is a scientist in Houston
5	EVE	LA 2022-23	Physician	Eve is a physician in Phoenix
6	FRANK	Philadelphia 2023-24	Lawyer	Frank is a lawyer in Philadelphia
7	GRACE	San Jose 9876543210	Teacher	Grace is a teacher in San Antonio

`str_subset()`¤

Returns the subset of strings that match a pattern. Takes a vector of strings and a pattern as arguments and returns all elements of a string that contains the pattern.

@chain df begin
    @mutate(split = str_subset(Description, "artist"))
end

7×5 DataFrame

Row	Names	City	Occupation	Description	split
	String	String	String	String	String
1	Alice	New York 2019-20	Doctor	Alice is a doctor in New York
2	Bob	Los Angeles 2007-12 2020-21	Engineer	Bob is is is an engineer in Los Angeles
3	Charlie	San Antonio 1234567890	Final Artist	Charlie is an artist in Chicago	Charlie is an artist in Chicago
4	Dave	New York City	Scientist	Dave is a scientist in Houston
5	Eve	LA 2022-23	Physician	Eve is a physician in Phoenix
6	Frank	Philadelphia 2023-24	Lawyer	Frank is a lawyer in Philadelphia
7	Grace	San Jose 9876543210	Teacher	Grace is a teacher in San Antonio

This page was generated using Literate.jl.

Supported Functions

str_squish()¤

str_replace(), str_replace_all¤

str_remove(), str_remove_all()¤

str_detect()¤

str_equal()¤

str_to_upper(), str_to_lower()¤

str_subset()¤

`str_squish()`¤

`str_replace()`, `str_replace_all`¤

`str_remove()`, `str_remove_all()`¤

`str_detect()`¤

`str_equal()`¤

`str_to_upper()`, `str_to_lower()`¤

`str_subset()`¤