Reference
Index¤
Tidier.Tidier_set
Tidier.across
Tidier.case_when
Tidier.desc
Tidier.if_else
Tidier.@arrange
Tidier.@filter
Tidier.@full_join
Tidier.@group_by
Tidier.@inner_join
Tidier.@left_join
Tidier.@mutate
Tidier.@pivot_longer
Tidier.@pivot_wider
Tidier.@pull
Tidier.@rename
Tidier.@right_join
Tidier.@select
Tidier.@slice
Tidier.@summarise
Tidier.@summarize
Tidier.@transmute
Tidier.@ungroup
Reference - Exported functions¤
#
Tidier.Tidier_set
— Method.
Tidier_set(option::AbstractString, value::Bool)
Set package options.
Here are the supported options and what they do:
- "code": Defaults to
false
. If set totrue
, this option displays the DataFrames.jl code generated by the Tidier.jl package. It is useful for debugging whether errors are introduced by Tidier.jl's generated code.
Arguments
option
: "code"value
:true
orfalse
#
Tidier.across
— Method.
across(variable[s], function[s])
Apply functions to multiple variables. If specifiying multiple variables or functions, surround them with a parentheses so that they are recognized as a tuple.
This function should only be called inside of @mutate()
, @summarize
, or @summarise
.
Arguments
variable[s]
: An unquoted variable, or if multiple, an unquoted tuple of variables.function[s]
: A function, or if multiple, a tuple of functions.
Examples
julia> df = DataFrame(a = repeat('a':'e'), b = 1:5, c = 11:15);
julia> @chain df begin
@summarize(across(b, minimum))
end
1×1 DataFrame
Row │ b_minimum
│ Int64
─────┼───────────
1 │ 1
julia> @chain df begin
@summarize(across((b,c), (minimum, maximum)))
end
1×4 DataFrame
Row │ b_minimum c_minimum b_maximum c_maximum
│ Int64 Int64 Int64 Int64
─────┼────────────────────────────────────────────
1 │ 1 11 5 15
julia> @chain df begin
@mutate(across((b,c), (minimum, maximum)))
end
5×7 DataFrame
Row │ a b c b_minimum c_minimum b_maximum c_maximum
│ Char Int64 Int64 Int64 Int64 Int64 Int64
─────┼────────────────────────────────────────────────────────────────
1 │ a 1 11 1 11 5 15
2 │ b 2 12 1 11 5 15
3 │ c 3 13 1 11 5 15
4 │ d 4 14 1 11 5 15
5 │ e 5 15 1 11 5 15
julia> @chain df begin
@mutate(across((b, starts_with("c")), (minimum, maximum)))
end
5×7 DataFrame
Row │ a b c b_minimum c_minimum b_maximum c_maximum
│ Char Int64 Int64 Int64 Int64 Int64 Int64
─────┼────────────────────────────────────────────────────────────────
1 │ a 1 11 1 11 5 15
2 │ b 2 12 1 11 5 15
3 │ c 3 13 1 11 5 15
4 │ d 4 14 1 11 5 15
5 │ e 5 15 1 11 5 15
#
Tidier.case_when
— Method.
case_when(condition => return_value)
case_when(condition_1 => return_value_1, condition_2 => return_value_2, ...)
Return the corresponding return_value
for the first condition
that evaluates to true
.
The most specific condition should be listed first and most general condition should be listed last. If none of the conditions evaluate to true
, then a missing
value is returned.
Arguments
condition
: A condition that evaluates totrue
,false
, ormissing
.return_value
: The value to return if the condition istrue
.
Examples
julia> df = DataFrame(a = [1, 2, missing, 4, 5]);
julia> @chain df begin
@mutate(b = case_when(a > 4 => "hi",
a > 2 => "medium",
a > 0 => "low"))
end
5×2 DataFrame
Row │ a b
│ Int64? String?
─────┼──────────────────
1 │ 1 low
2 │ 2 low
3 │ missing missing
4 │ 4 medium
5 │ 5 hi
julia> @chain df begin
@mutate(b = case_when(a > 4 => "hi",
a > 2 => "medium",
a > 0 => "low",
true => "unknown"))
end
5×2 DataFrame
Row │ a b
│ Int64? String
─────┼──────────────────
1 │ 1 low
2 │ 2 low
3 │ missing unknown
4 │ 4 medium
5 │ 5 hi
julia> @chain df begin
@mutate(b = case_when(a >= 3 => 3,
true => a))
end
5×2 DataFrame
Row │ a b
│ Int64? Int64?
─────┼──────────────────
1 │ 1 1
2 │ 2 2
3 │ missing missing
4 │ 4 3
5 │ 5 3
julia> @chain df begin
@mutate(b = case_when(a >= 3 => 3,
ismissing(a) => 0,
true => a))
end
5×2 DataFrame
Row │ a b
│ Int64? Int64
─────┼────────────────
1 │ 1 1
2 │ 2 2
3 │ missing 0
4 │ 4 3
5 │ 5 3
#
Tidier.desc
— Method.
desc(col)
Orders the rows of a DataFrame column in descending order when used inside of @arrange()
. This function should only be called inside of `@arrange()``.
Arguments
col
: An unquoted column name.
Examples
julia> df = DataFrame(a = repeat('a':'e', inner = 2), b = 1:10, c = 11:20);
julia> @chain df begin
@arrange(a, desc(b))
end
10×3 DataFrame
Row │ a b c
│ Char Int64 Int64
─────┼────────────────────
1 │ a 2 12
2 │ a 1 11
3 │ b 4 14
4 │ b 3 13
5 │ c 6 16
6 │ c 5 15
7 │ d 8 18
8 │ d 7 17
9 │ e 10 20
10 │ e 9 19
#
Tidier.if_else
— Method.
if_else(condition, yes, no, [miss])
Return the yes
value if the condition
is true
and the no
value if the condition
is false
. If miss
is specified, then the provided miss
value is returned when the condition
contains a missing
value. If miss
is not specified, then the returned value is an explicit missing
value.
Arguments
condition
: A condition that evaluates totrue
,false
, ormissing
.yes
: Value to return if the condition istrue
.no
: Value to return if the condition isfalse
.miss
: Optional. Value to return if the condition ismissing
.
Examples
julia> df = DataFrame(a = [1, 2, missing, 4, 5]);
julia> @chain df begin
@mutate(b = if_else(a >= 3, "yes", "no"))
end
5×2 DataFrame
Row │ a b
│ Int64? String?
─────┼──────────────────
1 │ 1 no
2 │ 2 no
3 │ missing missing
4 │ 4 yes
5 │ 5 yes
julia> @chain df begin
@mutate(b = if_else(a >= 3, "yes", "no", "unknown"))
end
5×2 DataFrame
Row │ a b
│ Int64? String
─────┼──────────────────
1 │ 1 no
2 │ 2 no
3 │ missing unknown
4 │ 4 yes
5 │ 5 yes
julia> @chain df begin
@mutate(b = if_else(a >= 3, 3, a))
end
5×2 DataFrame
Row │ a b
│ Int64? Int64?
─────┼──────────────────
1 │ 1 1
2 │ 2 2
3 │ missing missing
4 │ 4 3
5 │ 5 3
julia> @chain df begin
@mutate(b = if_else(a >= 3, 3, a, 0))
end
5×2 DataFrame
Row │ a b
│ Int64? Int64
─────┼────────────────
1 │ 1 1
2 │ 2 2
3 │ missing 0
4 │ 4 3
5 │ 5 3
#
Tidier.@arrange
— Macro.
@arrange(df, exprs...)
Order the rows of a DataFrame by the values of specified columns.
Arguments
df
: A DataFrame.exprs...
: Variables from the input DataFrame. Usedesc()
to sort in descending order. Multiple variables can be specified, separated by commas.
Examples
julia> df = DataFrame(a = repeat('a':'e', inner = 2), b = 1:10, c = 11:20);
julia> @chain df begin
@arrange(a)
end
10×3 DataFrame
Row │ a b c
│ Char Int64 Int64
─────┼────────────────────
1 │ a 1 11
2 │ a 2 12
3 │ b 3 13
4 │ b 4 14
5 │ c 5 15
6 │ c 6 16
7 │ d 7 17
8 │ d 8 18
9 │ e 9 19
10 │ e 10 20
julia> @chain df begin
@arrange(a, desc(b))
end
10×3 DataFrame
Row │ a b c
│ Char Int64 Int64
─────┼────────────────────
1 │ a 2 12
2 │ a 1 11
3 │ b 4 14
4 │ b 3 13
5 │ c 6 16
6 │ c 5 15
7 │ d 8 18
8 │ d 7 17
9 │ e 10 20
10 │ e 9 19
#
Tidier.@filter
— Macro.
@filter(df, exprs...)
Subset a DataFrame and return a copy of DataFrame where specified conditions are satisfied.
Arguments
df
: A DataFrame.exprs...
: transformation(s) that produce vectors containingtrue
orfalse
.
Examples
julia> df = DataFrame(a = repeat('a':'e'), b = 1:5, c = 11:15);
julia> @chain df begin
@filter(b >= mean(b))
end
3×3 DataFrame
Row │ a b c
│ Char Int64 Int64
─────┼────────────────────
1 │ c 3 13
2 │ d 4 14
3 │ e 5 15
julia> @chain df begin
@filter(b in (1, 3))
end
2×3 DataFrame
Row │ a b c
│ Char Int64 Int64
─────┼────────────────────
1 │ a 1 11
2 │ c 3 13
#
Tidier.@full_join
— Macro.
@full_join(df1, df2, [by])
Perform a full join on df1
and df
with an optional by
.
Arguments
df1
: A DataFrame.df2
: A DataFrame.by
: An optional column or tuple of columns.by
supports interpolation of individual columns. Ifby
is not supplied, then it will be inferred from shared names of columns betweendf1
anddf2
.
Examples
julia> df1 = DataFrame(a = ["a", "b"], b = 1:2);
julia> df2 = DataFrame(a = ["a", "c"], c = 3:4);
julia> @full_join(df1, df2)
3×3 DataFrame
Row │ a b c
│ String Int64? Int64?
─────┼──────────────────────────
1 │ a 1 3
2 │ b 2 missing
3 │ c missing 4
julia> @full_join(df1, df2, a)
3×3 DataFrame
Row │ a b c
│ String Int64? Int64?
─────┼──────────────────────────
1 │ a 1 3
2 │ b 2 missing
3 │ c missing 4
julia> @full_join(df1, df2, a = a)
3×3 DataFrame
Row │ a b c
│ String Int64? Int64?
─────┼──────────────────────────
1 │ a 1 3
2 │ b 2 missing
3 │ c missing 4
julia> @full_join(df1, df2, "a")
3×3 DataFrame
Row │ a b c
│ String Int64? Int64?
─────┼──────────────────────────
1 │ a 1 3
2 │ b 2 missing
3 │ c missing 4
julia> @full_join(df1, df2, "a" = "a")
3×3 DataFrame
Row │ a b c
│ String Int64? Int64?
─────┼──────────────────────────
1 │ a 1 3
2 │ b 2 missing
3 │ c missing 4
#
Tidier.@group_by
— Macro.
@group_by(df, exprs...)
Return a GroupedDataFrame
where operations are performed by groups specified by unique sets of cols
.
Arguments
df
: A DataFrame.exprs...
: DataFrame columns to group by or tidy expressions. Can be a single tidy expression or multiple expressions separated by commas.
Examples
julia> df = DataFrame(a = repeat('a':'e'), b = 1:5, c = 11:15);
julia> @chain df begin
@group_by(a)
@summarize(b = mean(b))
end
5×2 DataFrame
Row │ a b
│ Char Float64
─────┼───────────────
1 │ a 1.0
2 │ b 2.0
3 │ c 3.0
4 │ d 4.0
5 │ e 5.0
julia> @chain df begin
@group_by(d = uppercase(a))
@summarize(b = mean(b))
end
5×2 DataFrame
Row │ d b
│ Char Float64
─────┼───────────────
1 │ A 1.0
2 │ B 2.0
3 │ C 3.0
4 │ D 4.0
5 │ E 5.0
#
Tidier.@inner_join
— Macro.
@inner_join(df1, df2, [by])
Perform a inner join on df1
and df
with an optional by
.
Arguments
df1
: A DataFrame.df2
: A DataFrame.by
: An optional column or tuple of columns.by
supports interpolation of individual columns. Ifby
is not supplied, then it will be inferred from shared names of columns betweendf1
anddf2
.
Examples
julia> df1 = DataFrame(a = ["a", "b"], b = 1:2);
julia> df2 = DataFrame(a = ["a", "c"], c = 3:4);
julia> @inner_join(df1, df2)
1×3 DataFrame
Row │ a b c
│ String Int64 Int64
─────┼──────────────────────
1 │ a 1 3
julia> @inner_join(df1, df2, a)
1×3 DataFrame
Row │ a b c
│ String Int64 Int64
─────┼──────────────────────
1 │ a 1 3
julia> @inner_join(df1, df2, a = a)
1×3 DataFrame
Row │ a b c
│ String Int64 Int64
─────┼──────────────────────
1 │ a 1 3
julia> @inner_join(df1, df2, "a")
1×3 DataFrame
Row │ a b c
│ String Int64 Int64
─────┼──────────────────────
1 │ a 1 3
julia> @inner_join(df1, df2, "a" = "a")
1×3 DataFrame
Row │ a b c
│ String Int64 Int64
─────┼──────────────────────
1 │ a 1 3
#
Tidier.@left_join
— Macro.
@left_join(df1, df2, [by])
Perform a left join on df1
and df
with an optional by
.
Arguments
df1
: A DataFrame.df2
: A DataFrame.by
: An optional column or tuple of columns.by
supports interpolation of individual columns. Ifby
is not supplied, then it will be inferred from shared names of columns betweendf1
anddf2
.
Examples
julia> df1 = DataFrame(a = ["a", "b"], b = 1:2);
julia> df2 = DataFrame(a = ["a", "c"], c = 3:4);
julia> @left_join(df1, df2)
2×3 DataFrame
Row │ a b c
│ String Int64 Int64?
─────┼────────────────────────
1 │ a 1 3
2 │ b 2 missing
julia> @left_join(df1, df2, a)
2×3 DataFrame
Row │ a b c
│ String Int64 Int64?
─────┼────────────────────────
1 │ a 1 3
2 │ b 2 missing
julia> @left_join(df1, df2, a = a)
2×3 DataFrame
Row │ a b c
│ String Int64 Int64?
─────┼────────────────────────
1 │ a 1 3
2 │ b 2 missing
julia> @left_join(df1, df2, "a")
2×3 DataFrame
Row │ a b c
│ String Int64 Int64?
─────┼────────────────────────
1 │ a 1 3
2 │ b 2 missing
julia> @left_join(df1, df2, "a" = "a")
2×3 DataFrame
Row │ a b c
│ String Int64 Int64?
─────┼────────────────────────
1 │ a 1 3
2 │ b 2 missing
#
Tidier.@mutate
— Macro.
@mutate(df, exprs...)
Create new columns as functions of existing columns. The results have the same number of rows as df
.
Arguments
df
: A DataFrame.exprs...
: add new columns or replace values of existed columns usingnew_variable = values
syntax.
Examples
julia> df = DataFrame(a = repeat('a':'e'), b = 1:5, c = 11:15);
julia> @chain df begin
@mutate(d = b + c, b_minus_mean_b = b - mean(b))
end
5×5 DataFrame
Row │ a b c d b_minus_mean_b
│ Char Int64 Int64 Int64 Float64
─────┼───────────────────────────────────────────
1 │ a 1 11 12 -2.0
2 │ b 2 12 14 -1.0
3 │ c 3 13 16 0.0
4 │ d 4 14 18 1.0
5 │ e 5 15 20 2.0
julia> @chain df begin
@mutate(d = b in (1,3))
end
5×4 DataFrame
Row │ a b c d
│ Char Int64 Int64 Bool
─────┼───────────────────────────
1 │ a 1 11 true
2 │ b 2 12 false
3 │ c 3 13 true
4 │ d 4 14 false
5 │ e 5 15 false
julia> @chain df begin
@mutate(across((b, c), mean))
end
5×5 DataFrame
Row │ a b c b_mean c_mean
│ Char Int64 Int64 Float64 Float64
─────┼──────────────────────────────────────
1 │ a 1 11 3.0 13.0
2 │ b 2 12 3.0 13.0
3 │ c 3 13 3.0 13.0
4 │ d 4 14 3.0 13.0
5 │ e 5 15 3.0 13.0
#
Tidier.@pivot_longer
— Macro.
@pivotlonger(df, cols, [namesto], [values_to])
Reshapes the DataFrame to make it longer, increasing the number of rows and reducing the number of columns.
Arguments
df
: A DataFrame.cols
: Columns to pivot into longer format. Multiple columns can be selected but providing tuples of columns is not yet supported.names_to
: Optional, defaults tovariable
. The name of the newly created column whose values will contain the input DataFrame's column names.values_to
: Optional, defaults tovalue
. The name of the newly created column containing the input DataFrame's cell values.
Examples
julia> df_wide = DataFrame(id = [1, 2], A = [1, 3], B = [2, 4]);
julia> @pivot_longer(df_wide, A:B)
4×3 DataFrame
Row │ id variable value
│ Int64 String Int64
─────┼────────────────────────
1 │ 1 A 1
2 │ 2 A 3
3 │ 1 B 2
4 │ 2 B 4
julia> @pivot_longer(df_wide, -id)
4×3 DataFrame
Row │ id variable value
│ Int64 String Int64
─────┼────────────────────────
1 │ 1 A 1
2 │ 2 A 3
3 │ 1 B 2
4 │ 2 B 4
julia> @pivot_longer(df_wide, A:B, names_to = letter, values_to = number)
4×3 DataFrame
Row │ id letter number
│ Int64 String Int64
─────┼───────────────────────
1 │ 1 A 1
2 │ 2 A 3
3 │ 1 B 2
4 │ 2 B 4
julia> @pivot_longer(df_wide, A:B, names_to = letter)
4×3 DataFrame
Row │ id letter value
│ Int64 String Int64
─────┼──────────────────────
1 │ 1 A 1
2 │ 2 A 3
3 │ 1 B 2
4 │ 2 B 4
#
Tidier.@pivot_wider
— Macro.
@pivotwider(df, namesfrom, values_from)
Reshapes the DataFrame to make it wider, increasing the number of columns and reducing the number of rows.
Arguments
df
: A DataFrame.names_from
: The name of the column to get the name of the output columns from.values_from
: The name of the column to get the cell values from.
Examples
julia> df_long = DataFrame(id = [1, 1, 2, 2],
variable = ["A", "B", "A", "B"],
value = [1, 2, 3, 4]);
julia> @pivot_wider(df_long, names_from = variable, values_from = value)
2×3 DataFrame
Row │ id A B
│ Int64 Int64? Int64?
─────┼───────────────────────
1 │ 1 1 2
2 │ 2 3 4
#
Tidier.@pull
— Macro.
@pull(df, column)
Pull (or extract) a column as a vector.
Arguments
df
: A DataFrame.column
: A single column, referred to either by its name or number.
Examples
julia> df = DataFrame(a = 'a':'e', b = 1:5, c = 11:15);
julia> @chain df begin
@pull(a)
end
5-element Vector{Char}:
'a': ASCII/Unicode U+0061 (category Ll: Letter, lowercase)
'b': ASCII/Unicode U+0062 (category Ll: Letter, lowercase)
'c': ASCII/Unicode U+0063 (category Ll: Letter, lowercase)
'd': ASCII/Unicode U+0064 (category Ll: Letter, lowercase)
'e': ASCII/Unicode U+0065 (category Ll: Letter, lowercase)
julia> @chain df begin
@pull(2)
end
5-element Vector{Int64}:
1
2
3
4
5
#
Tidier.@rename
— Macro.
@rename(df, exprs...)
Change the names of individual column names in a DataFrame. Users can also use @select()
to rename and select columns.
Arguments
df
: A DataFrame.exprs...
: Usenew_name = old_name
syntax to rename selected columns.
Examples
julia> df = DataFrame(a = repeat('a':'e'), b = 1:5, c = 11:15);
julia> @chain df begin
@rename(d = b, e = c)
end
5×3 DataFrame
Row │ a d e
│ Char Int64 Int64
─────┼────────────────────
1 │ a 1 11
2 │ b 2 12
3 │ c 3 13
4 │ d 4 14
5 │ e 5 15
#
Tidier.@right_join
— Macro.
@right_join(df1, df2, [by])
Perform a right join on df1
and df
with an optional by
.
Arguments
df1
: A DataFrame.df2
: A DataFrame.by
: An optional column or tuple of columns.by
supports interpolation of individual columns. Ifby
is not supplied, then it will be inferred from shared names of columns betweendf1
anddf2
.
Examples
julia> df1 = DataFrame(a = ["a", "b"], b = 1:2);
julia> df2 = DataFrame(a = ["a", "c"], c = 3:4);
julia> @right_join(df1, df2)
2×3 DataFrame
Row │ a b c
│ String Int64? Int64
─────┼────────────────────────
1 │ a 1 3
2 │ c missing 4
julia> @right_join(df1, df2, a)
2×3 DataFrame
Row │ a b c
│ String Int64? Int64
─────┼────────────────────────
1 │ a 1 3
2 │ c missing 4
julia> @right_join(df1, df2, a = a)
2×3 DataFrame
Row │ a b c
│ String Int64? Int64
─────┼────────────────────────
1 │ a 1 3
2 │ c missing 4
julia> @right_join(df1, df2, "a")
2×3 DataFrame
Row │ a b c
│ String Int64? Int64
─────┼────────────────────────
1 │ a 1 3
2 │ c missing 4
julia> @right_join(df1, df2, "a" = "a")
2×3 DataFrame
Row │ a b c
│ String Int64? Int64
─────┼────────────────────────
1 │ a 1 3
2 │ c missing 4
#
Tidier.@select
— Macro.
@select(df, exprs...)
Select variables in a DataFrame.
Arguments
df
: A DataFrame.exprs...
: One or more unquoted variable names separated by commas. Variable names can also be used as their positions in the data, likex:y
, to select a range of variables.
Examples
julia> df = DataFrame(a = repeat('a':'e'), b = 1:5, c = 11:15);
julia> @chain df begin
@select(a, b, c)
end
5×3 DataFrame
Row │ a b c
│ Char Int64 Int64
─────┼────────────────────
1 │ a 1 11
2 │ b 2 12
3 │ c 3 13
4 │ d 4 14
5 │ e 5 15
julia> @chain df begin
@select(a:b)
end
5×2 DataFrame
Row │ a b
│ Char Int64
─────┼─────────────
1 │ a 1
2 │ b 2
3 │ c 3
4 │ d 4
5 │ e 5
julia> @chain df begin
@select(1:2)
end
5×2 DataFrame
Row │ a b
│ Char Int64
─────┼─────────────
1 │ a 1
2 │ b 2
3 │ c 3
4 │ d 4
5 │ e 5
julia> @chain df begin
@select(-(a:b))
end
5×1 DataFrame
Row │ c
│ Int64
─────┼───────
1 │ 11
2 │ 12
3 │ 13
4 │ 14
5 │ 15
julia> @chain df begin
@select(contains("b"), starts_with("c"))
end
5×2 DataFrame
Row │ b c
│ Int64 Int64
─────┼──────────────
1 │ 1 11
2 │ 2 12
3 │ 3 13
4 │ 4 14
5 │ 5 15
julia> @chain df begin
@select(-(1:2))
end
5×1 DataFrame
Row │ c
│ Int64
─────┼───────
1 │ 11
2 │ 12
3 │ 13
4 │ 14
5 │ 15
julia> @chain df begin
@select(-c)
end
5×2 DataFrame
Row │ a b
│ Char Int64
─────┼─────────────
1 │ a 1
2 │ b 2
3 │ c 3
4 │ d 4
5 │ e 5
#
Tidier.@slice
— Macro.
@slice(df, exprs...)
Select, remove or duplicate rows by indexing their integer positions.
Arguments
df
: A DataFrame.exprs...
: integer row values. Use positive values to keep the rows, or negative values to drop. Values provided must be either all positive or all negative, and they must be within the range of DataFrames' row numbers.
Examples
julia> df = DataFrame(a = repeat('a':'e'), b = 1:5, c = 11:15);
julia> @chain df begin
@slice(1:5)
end
5×3 DataFrame
Row │ a b c
│ Char Int64 Int64
─────┼────────────────────
1 │ a 1 11
2 │ b 2 12
3 │ c 3 13
4 │ d 4 14
5 │ e 5 15
julia> @chain df begin
@slice(-(1:2))
end
3×3 DataFrame
Row │ a b c
│ Char Int64 Int64
─────┼────────────────────
1 │ c 3 13
2 │ d 4 14
3 │ e 5 15
julia> @chain df begin
@group_by(a)
@slice(1)
@ungroup
end
5×3 DataFrame
Row │ a b c
│ Char Int64 Int64
─────┼────────────────────
1 │ a 1 11
2 │ b 2 12
3 │ c 3 13
4 │ d 4 14
5 │ e 5 15
#
Tidier.@summarise
— Macro.
@summarize(df, exprs...)
@summarise(df, exprs...)
Create a new DataFrame with one row that aggregating all observations from the input DataFrame or GroupedDataFrame.
Arguments
df
: A DataFrame.exprs...
: anew_variable = function(old_variable)
pair.function()
should be an aggregate function that returns a single value.
Examples
julia> df = DataFrame(a = repeat('a':'e'), b = 1:5, c = 11:15);
julia> @chain df begin
@summarize(mean_b = mean(b), median_b = median(b))
end
1×2 DataFrame
Row │ mean_b median_b
│ Float64 Float64
─────┼───────────────────
1 │ 3.0 3.0
julia> @chain df begin
@summarise(mean_b = mean(b), median_b = median(b))
end
1×2 DataFrame
Row │ mean_b median_b
│ Float64 Float64
─────┼───────────────────
1 │ 3.0 3.0
julia> @chain df begin
@summarize(across((b,c), (minimum, maximum)))
end
1×4 DataFrame
Row │ b_minimum c_minimum b_maximum c_maximum
│ Int64 Int64 Int64 Int64
─────┼────────────────────────────────────────────
1 │ 1 11 5 15
#
Tidier.@summarize
— Macro.
@summarize(df, exprs...)
@summarise(df, exprs...)
Create a new DataFrame with one row that aggregating all observations from the input DataFrame or GroupedDataFrame.
Arguments
df
: A DataFrame.exprs...
: anew_variable = function(old_variable)
pair.function()
should be an aggregate function that returns a single value.
Examples
julia> df = DataFrame(a = repeat('a':'e'), b = 1:5, c = 11:15);
julia> @chain df begin
@summarize(mean_b = mean(b), median_b = median(b))
end
1×2 DataFrame
Row │ mean_b median_b
│ Float64 Float64
─────┼───────────────────
1 │ 3.0 3.0
julia> @chain df begin
@summarise(mean_b = mean(b), median_b = median(b))
end
1×2 DataFrame
Row │ mean_b median_b
│ Float64 Float64
─────┼───────────────────
1 │ 3.0 3.0
julia> @chain df begin
@summarize(across((b,c), (minimum, maximum)))
end
1×4 DataFrame
Row │ b_minimum c_minimum b_maximum c_maximum
│ Int64 Int64 Int64 Int64
─────┼────────────────────────────────────────────
1 │ 1 11 5 15
#
Tidier.@transmute
— Macro.
@transmute(df, exprs...)
Create a new DataFrame with only computed columns.
Arguments
df
: A DataFrame.exprs...
: add new columns or replace values of existed columns usingnew_variable = values
syntax.
Examples
julia> df = DataFrame(a = repeat('a':'e'), b = 1:5, c = 11:15);
julia> @chain df begin
@transmute(d = b + c)
end
5×1 DataFrame
Row │ d
│ Int64
─────┼───────
1 │ 12
2 │ 14
3 │ 16
4 │ 18
5 │ 20
#
Tidier.@ungroup
— Macro.
@ungroup(df)
Return a DataFrame
with all groups removed.
If this is applied to a GroupedDataFrame
, then it removes the grouping. If this is applied to a DataFrame
(without any groups), then it returns the DataFrame
unchanged.
Arguments
df
: AGroupedDataFrame
or `DataFrame``.
Examples
julia> df = DataFrame(a = repeat('a':'e'), b = 1:5, c = 11:15);
julia> @chain df begin
@group_by(a)
end
GroupedDataFrame with 5 groups based on key: a
First Group (1 row): a = 'a': ASCII/Unicode U+0061 (category Ll: Letter, lowercase)
Row │ a b c
│ Char Int64 Int64
─────┼────────────────────
1 │ a 1 11
⋮
Last Group (1 row): a = 'e': ASCII/Unicode U+0065 (category Ll: Letter, lowercase)
Row │ a b c
│ Char Int64 Int64
─────┼────────────────────
1 │ e 5 15
julia> @chain df begin
@group_by(a)
@ungroup
end
5×3 DataFrame
Row │ a b c
│ Char Int64 Int64
─────┼────────────────────
1 │ a 1 11
2 │ b 2 12
3 │ c 3 13
4 │ d 4 14
5 │ e 5 15