1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
use super::*;
use crate::frame::groupby::IntoGroupsProxy;

impl CategoricalChunked {
    pub fn unique(&self) -> PolarsResult<Self> {
        let cat_map = self.get_rev_map();
        if self.can_fast_unique() {
            let ca = match &**cat_map {
                RevMapping::Local(a) => {
                    UInt32Chunked::from_iter_values(self.logical().name(), 0..(a.len() as u32))
                }
                RevMapping::Global(map, _, _) => {
                    UInt32Chunked::from_iter_values(self.logical().name(), map.keys().copied())
                }
            };
            // safety:
            // we only removed some indexes so we are still in bounds
            unsafe {
                let mut out =
                    CategoricalChunked::from_cats_and_rev_map_unchecked(ca, cat_map.clone());
                out.set_fast_unique(true);
                Ok(out)
            }
        } else {
            let ca = self.logical().unique()?;
            // safety:
            // we only removed some indexes so we are still in bounds
            unsafe {
                Ok(CategoricalChunked::from_cats_and_rev_map_unchecked(
                    ca,
                    cat_map.clone(),
                ))
            }
        }
    }

    pub fn n_unique(&self) -> PolarsResult<usize> {
        if self.can_fast_unique() {
            Ok(self.get_rev_map().len())
        } else {
            self.logical().n_unique()
        }
    }

    pub fn value_counts(&self) -> PolarsResult<DataFrame> {
        let groups = self.logical().group_tuples(true, false).unwrap();
        let logical_values = unsafe {
            self.logical()
                .clone()
                .into_series()
                .agg_first(&groups)
                .u32()
                .unwrap()
                .clone()
        };

        let mut values = self.clone();
        *values.logical_mut() = logical_values;

        let mut counts = groups.group_count();
        counts.rename("counts");
        let cols = vec![values.into_series(), counts.into_series()];
        let df = DataFrame::new_no_checks(cols);
        df.sort(["counts"], true)
    }
}