--- a/rust/chat_sanitizer/Cargo.toml Fri Dec 28 03:10:05 2018 +0300
+++ b/rust/chat_sanitizer/Cargo.toml Fri Dec 28 22:47:17 2018 +0100
@@ -6,3 +6,4 @@
[dependencies]
unicode_skeleton = "0.1"
+itertools = "0.8.0"
--- a/rust/chat_sanitizer/src/bad_words.rs Fri Dec 28 03:10:05 2018 +0300
+++ b/rust/chat_sanitizer/src/bad_words.rs Fri Dec 28 22:47:17 2018 +0100
@@ -19,13 +19,17 @@
}
impl<T> MessageChecker<T> for BadWordsChecker<T> {
- fn check(&self, player_id: T, message: &str) -> Severity {
+ fn check(&self, _player_id: T, message: &str) -> Severity {
let msg = normalized_message(message);
// silly implementation, allows bad messages with a single good word
- for badword in &self.blacklist {
- if msg.contains(badword) {
- if !self.whitelist.iter().any(|goodword| msg.contains(goodword)) {
+ for bad_word in &self.blacklist {
+ if msg.contains(bad_word) {
+ if !self
+ .whitelist
+ .iter()
+ .any(|good_word| msg.contains(good_word))
+ {
return Severity::Warn;
}
}
@@ -48,7 +52,7 @@
// this one fails
//assert_eq!(checker.check(0, "poop 'fsck -y' poop"), Severity::Warn);
- // ideally this one shouldn't fail
+ // ideally this one shouldn't fail, need a better confusables check
// assert_eq!(checker.check(0, "P00P"), Severity::Warn);
}
}
--- a/rust/chat_sanitizer/src/letter_repeat.rs Fri Dec 28 03:10:05 2018 +0300
+++ b/rust/chat_sanitizer/src/letter_repeat.rs Fri Dec 28 22:47:17 2018 +0100
@@ -1,9 +1,45 @@
use crate::{MessageChecker, Severity};
-struct LetterRepeatChecker {}
+use itertools::Itertools;
+use std::marker::PhantomData;
+
+struct LetterRepeatChecker<T> {
+ threshold: usize,
+ player_id_type: PhantomData<T>,
+}
-impl<T> MessageChecker<T> for LetterRepeatChecker {
- fn check(&self, player_id: T, message: &str) -> Severity {
+impl<T> LetterRepeatChecker<T> {
+ pub fn new(threshold: usize) -> Self {
+ Self {
+ threshold,
+ player_id_type: PhantomData,
+ }
+ }
+}
+
+impl<T> MessageChecker<T> for LetterRepeatChecker<T> {
+ fn check(&self, _player_id: T, message: &str) -> Severity {
+ for (_key, group) in &message.chars().into_iter().group_by(|c| *c) {
+ if group.count() >= self.threshold {
+ return Severity::Warn;
+ }
+ }
+
Severity::Pass
}
}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ #[test]
+ fn it_works() {
+ let checker = LetterRepeatChecker::new(3);
+ assert_eq!(checker.check(0, "Hello world!"), Severity::Pass);
+ assert_eq!(checker.check(0, "ooops"), Severity::Warn);
+ assert_eq!(
+ checker.check(0, "жираф - длинношеее животное"),
+ Severity::Warn
+ );
+ }
+}
--- a/rust/chat_sanitizer/src/lib.rs Fri Dec 28 03:10:05 2018 +0300
+++ b/rust/chat_sanitizer/src/lib.rs Fri Dec 28 22:47:17 2018 +0100
@@ -1,4 +1,5 @@
pub mod bad_words;
+pub mod letter_repeat;
use unicode_skeleton::UnicodeSkeleton;