sampling/histogram/
histogram_float.rs

1use{
2    crate::histogram::*,
3    std::{borrow::*, num::*, sync::atomic::AtomicUsize},
4    num_traits::{float::*, cast::*, identities::*}
5};
6
7#[cfg(feature = "serde_support")]
8use serde::{Serialize, Deserialize};
9
10/// Generic Histogram struct
11#[derive(Debug, Clone)]
12#[cfg_attr(feature = "serde_support", derive(Serialize, Deserialize))]
13pub struct HistogramFloat<T>
14{
15    pub(crate) bin_borders: Vec<T>,
16    pub(crate) hist: Vec<usize>,
17}
18
19impl<T> From<AtomicHistogramFloat<T>> for HistogramFloat<T>
20{
21    fn from(other: AtomicHistogramFloat<T>) -> Self {
22        let hist = other.hist.into_iter()
23            .map(AtomicUsize::into_inner)
24            .collect();
25        Self{
26            hist,
27            bin_borders: other.bin_borders
28        }
29    }
30}
31
32impl<T> HistogramFloat<T>{
33    /// similar to `self.borders_clone` but does not allocate memory
34    pub fn borders(&self) -> &Vec<T>
35    {
36        &self.bin_borders
37    }
38}
39
40impl<T> HistogramFloat<T>
41where T: Copy {
42    fn get_right(&self) -> T
43    {
44        self.bin_borders[self.bin_borders.len() - 1]
45    }
46}
47
48impl<T> HistogramFloat<T> 
49where T: Float + PartialOrd + FromPrimitive {
50    /// # Create a new Histogram
51    /// * right exclusive, left inclusive
52    /// * if you want `right` to behave (almost) the same as an inclusive border,
53    ///   consider using `new(left, right + T::EPSILON, bins)` (make sure, that adding Epsilon actually changes the value!)
54    pub fn new(left: T, right: T, bins: usize) -> Result<Self, HistErrors>
55    {
56        if left >= right {
57            return Err(HistErrors::IntervalWidthZero);
58        }
59        else if bins < 1 {
60            return Err(HistErrors::NoBins);
61        }
62        if !left.is_finite() || !right.is_finite() {
63            return Err(HistErrors::InvalidVal);
64        }
65
66        let bins_as_t = match T::from_usize(bins) {
67            Some(val) => val,
68            None => return Err(HistErrors::UsizeCastError),
69        };
70
71        let bin_size = (right - left) / bins_as_t;
72        let hist = vec![0; bins];
73        let mut bin_borders = Vec::with_capacity(bins + 1);
74        bin_borders.extend((0..bins)
75            .map(|val| bin_size.mul_add(T::from_usize(val).unwrap(), left)) 
76        );
77        bin_borders.push(right);
78        Ok(
79            Self{
80                bin_borders,
81                hist
82            }
83        )
84    }
85
86    /// Returns the length of the interval
87    pub fn interval_length(&self) -> T
88    {
89        self.get_right() - self.first_border()
90    }
91
92    /// # Iterator over all the bins
93    /// In HistogramFloat a bin is defined by two values: The left border (inclusive)
94    /// and the right border (exclusive)
95    /// 
96    /// Here you get an iterator which iterates over said borders.
97    /// The Iterator returns a borrowed Array of length two, where the first value is the left (inclusive) border 
98    /// and the second value is the right (exclusive) border
99    /// ## Example
100    /// ```
101    /// use sampling::histogram::*;
102    /// 
103    /// let hist = HistogramFloat::<f32>::new(0.0, 1.0, 2).unwrap();
104    /// let mut iter = hist.bin_iter();
105    /// assert_eq!(iter.next(), Some(&[0.0, 0.5]));
106    /// assert_eq!(iter.next(), Some(&[0.5, 1.0]));
107    /// assert_eq!(iter.next(), None);
108    /// ```
109    pub fn bin_iter(&self) -> impl Iterator<Item = &[T;2]>
110    {
111        BorderWindow::new(self.bin_borders.as_slice())
112    }
113
114    /// # Iterate over all bins
115    /// In HistogramFloat a bin is defined by two values: The left border (inclusive)
116    /// and the right border (exclusive)
117    /// 
118    /// This Iterator iterates over these values as well as the corresponding hit count (
119    /// i.e., how often a bin was hit)
120    /// ## Item of Iterator
121    /// (&[left_border, right_border], number_of_hits)
122    /// ## Example
123    /// ```
124    /// use sampling::histogram::*;
125    /// 
126    /// let mut hist = HistogramFloat::<f64>::new(0.0, 1.0, 2).unwrap();
127    /// 
128    /// hist.increment_quiet(0.5);
129    /// hist.increment_quiet(0.71253782387);
130    /// 
131    /// let mut iter = hist.bin_hits_iter();
132    /// assert_eq!(iter.next(), Some((&[0.0, 0.5], 0)));
133    /// assert_eq!(iter.next(), Some((&[0.5, 1.0], 2)));
134    /// assert_eq!(iter.next(), None);
135    /// ```
136    pub fn bin_hits_iter(&self) -> impl Iterator<Item = (&[T;2], usize)>
137    {
138        self.bin_iter()
139            .zip(
140                self.hist
141                    .iter()
142                    .copied()
143            )
144    }
145
146    #[inline]
147    /// # Increment hit count of bin
148    /// This will increment the hit count of the bin corresponding to the value `val`.
149    /// If the bin was valid it will return the index of the corresponding bin
150    pub fn increment<B: Borrow<T>>(&mut self, val: B)-> Result<usize, HistErrors>
151    {
152        self.count_val(val)
153    }
154
155    #[inline]
156    /// # Increment hit count
157    /// Increments the hit count of the bin corresponding to `val`.
158    /// If no bin corresponding to `val` exists, nothing happens
159    pub fn increment_quiet<B: Borrow<T>>(&mut self, val: B)
160    {
161        let _ = self.increment(val);
162    }
163}
164
165impl<T> Histogram for HistogramFloat<T>
166{
167    #[inline(always)]
168    fn bin_count(&self) -> usize {
169        self.hist.len()
170    }
171
172    #[inline(always)]
173    fn hist(&self) -> &Vec<usize> {
174        &self.hist
175    }
176
177    #[inline]
178    fn increment_index_by(&mut self, index: usize, count: usize) -> Result<(), HistErrors> {
179        match self.hist.get_mut(index) {
180            None => Err(HistErrors::OutsideHist),
181            Some(val) => {
182                *val += count;
183                Ok(())
184            },
185        }
186    }
187
188    #[inline]
189    fn reset(&mut self) {
190        // compiles to memset ^__^
191        self.hist
192            .iter_mut()
193            .for_each(|h| *h = 0);
194    }
195
196
197}
198
199impl<T> HistogramVal<T> for HistogramFloat<T>
200where T: Float + Zero + NumCast + PartialOrd + FromPrimitive
201{
202
203    fn count_val<V: Borrow<T>>(&mut self, val: V) -> Result<usize, HistErrors>
204    {
205        let id = self.get_bin_index(val)?;
206        self.increment_index(id)
207            .map(|_| id)
208    }
209
210    fn distance<V: Borrow<T>>(&self, val: V) -> f64 {
211        let val = val.borrow();
212        if self.is_inside(val) {
213            0.0
214        } else if !val.is_finite() {
215            f64::INFINITY
216        } else if *val < self.first_border() {
217            (self.first_border() - *val).to_f64().unwrap()
218        } else {
219            (*val - self.get_right() + T::epsilon())
220                .to_f64()
221                .unwrap()
222        }
223    }
224
225    #[inline]
226    fn first_border(&self) -> T {
227        self.bin_borders[0]
228    }
229
230    #[inline]
231    fn last_border(&self) -> T {
232        self.bin_borders[self.bin_borders.len() - 1]
233    }
234
235    #[inline(always)]
236    fn last_border_is_inclusive(&self) -> bool {
237        false
238    }
239
240    fn is_inside<V: Borrow<T>>(&self, val: V) -> bool {
241        *val.borrow() >= self.bin_borders[0] 
242            && *val.borrow() < self.bin_borders[self.bin_borders.len() - 1]
243    }
244
245    fn not_inside<V: Borrow<T>>(&self, val: V) -> bool {
246        !(*val.borrow()).is_finite() 
247            || *val.borrow() < self.bin_borders[0] 
248            || *val.borrow() >= self.bin_borders[self.bin_borders.len() - 1]
249    }
250
251
252    fn get_bin_index<V: Borrow<T>>(&self, val: V) -> Result<usize, HistErrors>
253    {
254        let val = val.borrow();
255        if !val.is_finite(){
256            Err(HistErrors::InvalidVal)
257        } 
258        else if self.is_inside(val)
259        {
260            let search_res = self.bin_borders.binary_search_by(
261                |v|
262                v.partial_cmp(val).expect("Should never be NaN")
263            );
264            match search_res
265            {
266                Result::Ok(index) => {
267                    Ok(index)
268                },
269                Result::Err(index_p1) => {
270                    Ok(index_p1 - 1)
271                }
272            }
273        }
274        else {
275            Err(HistErrors::OutsideHist)
276        } 
277    }
278
279    /// # consider using `self.bin_iter()` instead
280    /// * This gives you a dynamic iterator over all bins-
281    /// * For this type all bins are InclusiveExclusive -> Usage of `self.bin_iter`
282    ///   is more efficient
283    fn bin_enum_iter(&self) -> Box<dyn Iterator<Item=Bin<T>> + '_> {
284        let iter = self
285            .bin_iter()
286            .map(|slice| Bin::InclusiveExclusive(slice[0], slice[1]));
287        Box::new(iter)
288    }
289}
290
291impl<T> HistogramIntervalDistance<T> for HistogramFloat<T> 
292where T: Float + FromPrimitive + Zero + NumCast
293{
294    fn interval_distance_overlap<V: Borrow<T>>(&self, val: V, overlap: NonZeroUsize) -> usize {
295        let val = val.borrow();
296        
297        debug_assert!(self.interval_length() > T::zero());
298        debug_assert!(val.is_finite());
299        if self.not_inside(val) {
300            let num_bins_overlap = self.bin_count() / overlap.get();
301            let dist = 
302            if *val < self.first_border() { 
303                let tmp = self.first_border() - *val;
304                (tmp / self.interval_length()).floor()
305            } else {
306                let tmp = *val - self.get_right();
307                (tmp / self.interval_length()).ceil()
308            };
309            let int_dist = dist.to_usize().unwrap();
310            1 + int_dist / num_bins_overlap
311        } else {
312            0
313        }
314    }
315}
316
317/// Histogram for binning `f32` - alias for `HistogramFloat<f32>`
318pub type HistF32 = HistogramFloat<f32>;
319
320/// Histogram for binning `f64` - alias for `HistogramFloat<f64>`
321pub type HistF64 = HistogramFloat<f64>;
322
323
324#[cfg(test)]
325mod tests{
326    use rand_pcg::Pcg64Mcg;
327    use rand::{distr::*, SeedableRng};
328    use super::*;
329    use num_traits::Bounded;
330    #[test]
331    fn f64_hist()
332    {
333        let rng = Pcg64Mcg::new(0xcafef00dd15ea5e5);
334        let dist = Uniform::new(f64::EPSILON, 1.0)
335            .unwrap();
336        let mut iter = dist.sample_iter(rng);
337
338        for i in 1..100 {
339            let left = iter.next().unwrap();
340            let right = left + iter.next().unwrap();
341
342            let hist = HistogramFloat::<f64>::new(left, right, i).unwrap();
343
344            assert_eq!(left, hist.first_border(), "i={}", i);
345            assert_eq!(right, hist.get_right(), "i={}", i);
346            assert_eq!(i+1, hist.borders().len(), "i={}", i);
347
348        }
349    }
350
351    fn hist_test_float<T>(left: T, right: T, bin_count: usize)
352    where T: Float + num_traits::Bounded + PartialOrd 
353        + One + NumCast + Copy + FromPrimitive + Bounded + std::fmt::Debug
354        + PartialOrd,
355    {
356
357        let hist_wrapped =  HistogramFloat::<T>::new(left, right, bin_count);
358        if hist_wrapped.is_err(){
359            dbg!(&hist_wrapped);
360        }
361        let hist = hist_wrapped.unwrap();
362        assert!(hist.not_inside(T::infinity()));
363        assert!(hist.not_inside(T::nan()));
364        let len = hist.borders().len();
365        
366        for (id, border) in hist.borders()
367            .iter()
368            .take(len - 1)
369            .enumerate()
370        {
371            assert!(hist.is_inside(border));
372            assert_eq!(hist.is_inside(border), !hist.not_inside(border));
373            assert_eq!(hist.get_bin_index(border).unwrap(), id);
374        }
375        
376        let last_border = hist.borders()[len - 1];
377        assert!(hist.not_inside(last_border));
378        assert_eq!(hist.is_inside(last_border), !hist.not_inside(last_border));
379        assert!(hist.get_bin_index(last_border).is_err());
380        
381
382        for (id, border) in hist.borders()
383            .iter()
384            .skip(1)
385            .enumerate()
386        {
387            let mut m_epsilon = *border;
388            for mut i in 1..{
389                if i > 100 {
390                    i = i * i;
391                }
392                m_epsilon = T::epsilon().mul_add(
393                    T::from_isize(-i).unwrap(), 
394                    *border
395                );
396                if m_epsilon < *border {
397                    break;
398                }
399            }
400            assert!(hist.is_inside(m_epsilon));
401            assert_eq!(hist.get_bin_index(m_epsilon).unwrap(), id);
402        }
403       
404        assert_eq!(
405            HistErrors::InvalidVal,
406            HistogramFloat::<T>::new(T::nan(), right, bin_count).unwrap_err()
407        );
408        assert_eq!(
409            HistErrors::InvalidVal,
410            HistogramFloat::<T>::new(left, T::nan(), bin_count).unwrap_err()
411        );
412        assert_eq!(
413            HistErrors::InvalidVal,
414            HistogramFloat::<T>::new(left, T::infinity(), bin_count).unwrap_err()
415        );
416        assert_eq!(
417            HistErrors::InvalidVal,
418            HistogramFloat::<T>::new(T::neg_infinity(), right, bin_count).unwrap_err()
419        );
420    }
421
422    #[test]
423    fn hist_float()
424    { 
425        let mut rng = Pcg64Mcg::new(0xcafef00dd15ea5e5);
426        let dist = Uniform::new(1usize, 111)
427            .unwrap();
428        let mut iter = dist.sample_iter(
429            Pcg64Mcg::from_rng(&mut rng)
430        );
431        hist_test_float(20.0, 31.0, iter.next().unwrap());
432        hist_test_float(-23.0f32, 31.1232f32, iter.next().unwrap());
433        hist_test_float(-13.0f32, 31.4657f32, iter.next().unwrap());
434        hist_test_float(1.0f64, 3f64, iter.next().unwrap());
435
436        let dist2 = Uniform::new(0.0, 76257f64)
437            .unwrap();
438        for _ in 0..10 {
439            let (left, right) = loop{
440                let left = dist2.sample(&mut rng);
441                let right = left + dist2.sample(&mut rng);
442                if left.is_finite() && right.is_finite(){
443                    break (left, right);
444                }
445            };
446            hist_test_float(left, right, iter.next().unwrap());
447        }
448    }
449}