RapidFuzz
distance.hpp
1/* SPDX-License-Identifier: MIT */
2/* Copyright © 2022-present Max Bachmann */
3
4#pragma once
5#include <rapidfuzz/distance/DamerauLevenshtein.hpp>
6#include <rapidfuzz/distance/Hamming.hpp>
7#include <rapidfuzz/distance/Indel.hpp>
8#include <rapidfuzz/distance/Jaro.hpp>
9#include <rapidfuzz/distance/JaroWinkler.hpp>
10#include <rapidfuzz/distance/LCSseq.hpp>
11#include <rapidfuzz/distance/Levenshtein.hpp>
12#include <rapidfuzz/distance/OSA.hpp>
13#include <rapidfuzz/distance/Postfix.hpp>
14#include <rapidfuzz/distance/Prefix.hpp>
15
16namespace rapidfuzz {
17
18namespace detail {
19template <typename ReturnType, typename InputIt1, typename InputIt2>
20ReturnType editops_apply_impl(const Editops& ops, InputIt1 first1, InputIt1 last1, InputIt2 first2,
21 InputIt2 last2)
22{
23 auto len1 = static_cast<size_t>(std::distance(first1, last1));
24 auto len2 = static_cast<size_t>(std::distance(first2, last2));
25
26 ReturnType res_str;
27 res_str.resize(len1 + len2);
28 size_t src_pos = 0;
29 size_t dest_pos = 0;
30
31 for (const auto& op : ops) {
32 /* matches between last and current editop */
33 while (src_pos < op.src_pos) {
34 res_str[dest_pos] =
35 static_cast<typename ReturnType::value_type>(first1[static_cast<ptrdiff_t>(src_pos)]);
36 src_pos++;
37 dest_pos++;
38 }
39
40 switch (op.type) {
41 case EditType::None:
42 case EditType::Replace:
43 res_str[dest_pos] =
44 static_cast<typename ReturnType::value_type>(first2[static_cast<ptrdiff_t>(op.dest_pos)]);
45 src_pos++;
46 dest_pos++;
47 break;
48 case EditType::Insert:
49 res_str[dest_pos] =
50 static_cast<typename ReturnType::value_type>(first2[static_cast<ptrdiff_t>(op.dest_pos)]);
51 dest_pos++;
52 break;
53 case EditType::Delete: src_pos++; break;
54 }
55 }
56
57 /* matches after the last editop */
58 while (src_pos < len1) {
59 res_str[dest_pos] =
60 static_cast<typename ReturnType::value_type>(first1[static_cast<ptrdiff_t>(src_pos)]);
61 src_pos++;
62 dest_pos++;
63 }
64
65 res_str.resize(dest_pos);
66 return res_str;
67}
68
69template <typename ReturnType, typename InputIt1, typename InputIt2>
70ReturnType opcodes_apply_impl(const Opcodes& ops, InputIt1 first1, InputIt1 last1, InputIt2 first2,
71 InputIt2 last2)
72{
73 auto len1 = static_cast<size_t>(std::distance(first1, last1));
74 auto len2 = static_cast<size_t>(std::distance(first2, last2));
75
76 ReturnType res_str;
77 res_str.resize(len1 + len2);
78 size_t dest_pos = 0;
79
80 for (const auto& op : ops) {
81 switch (op.type) {
82 case EditType::None:
83 for (auto i = op.src_begin; i < op.src_end; ++i) {
84 res_str[dest_pos++] =
85 static_cast<typename ReturnType::value_type>(first1[static_cast<ptrdiff_t>(i)]);
86 }
87 break;
88 case EditType::Replace:
89 case EditType::Insert:
90 for (auto i = op.dest_begin; i < op.dest_end; ++i) {
91 res_str[dest_pos++] =
92 static_cast<typename ReturnType::value_type>(first2[static_cast<ptrdiff_t>(i)]);
93 }
94 break;
95 case EditType::Delete: break;
96 }
97 }
98
99 res_str.resize(dest_pos);
100 return res_str;
101}
102
103} // namespace detail
104
105template <typename CharT, typename InputIt1, typename InputIt2>
106std::basic_string<CharT> editops_apply_str(const Editops& ops, InputIt1 first1, InputIt1 last1,
107 InputIt2 first2, InputIt2 last2)
108{
109 return detail::editops_apply_impl<std::basic_string<CharT>>(ops, first1, last1, first2, last2);
110}
111
112template <typename CharT, typename Sentence1, typename Sentence2>
113std::basic_string<CharT> editops_apply_str(const Editops& ops, const Sentence1& s1, const Sentence2& s2)
114{
115 return detail::editops_apply_impl<std::basic_string<CharT>>(ops, detail::to_begin(s1), detail::to_end(s1),
116 detail::to_begin(s2), detail::to_end(s2));
117}
118
119template <typename CharT, typename InputIt1, typename InputIt2>
120std::basic_string<CharT> opcodes_apply_str(const Opcodes& ops, InputIt1 first1, InputIt1 last1,
121 InputIt2 first2, InputIt2 last2)
122{
123 return detail::opcodes_apply_impl<std::basic_string<CharT>>(ops, first1, last1, first2, last2);
124}
125
126template <typename CharT, typename Sentence1, typename Sentence2>
127std::basic_string<CharT> opcodes_apply_str(const Opcodes& ops, const Sentence1& s1, const Sentence2& s2)
128{
129 return detail::opcodes_apply_impl<std::basic_string<CharT>>(ops, detail::to_begin(s1), detail::to_end(s1),
130 detail::to_begin(s2), detail::to_end(s2));
131}
132
133template <typename CharT, typename InputIt1, typename InputIt2>
134std::vector<CharT> editops_apply_vec(const Editops& ops, InputIt1 first1, InputIt1 last1, InputIt2 first2,
135 InputIt2 last2)
136{
137 return detail::editops_apply_impl<std::vector<CharT>>(ops, first1, last1, first2, last2);
138}
139
140template <typename CharT, typename Sentence1, typename Sentence2>
141std::vector<CharT> editops_apply_vec(const Editops& ops, const Sentence1& s1, const Sentence2& s2)
142{
143 return detail::editops_apply_impl<std::vector<CharT>>(ops, detail::to_begin(s1), detail::to_end(s1),
144 detail::to_begin(s2), detail::to_end(s2));
145}
146
147template <typename CharT, typename InputIt1, typename InputIt2>
148std::vector<CharT> opcodes_apply_vec(const Opcodes& ops, InputIt1 first1, InputIt1 last1, InputIt2 first2,
149 InputIt2 last2)
150{
151 return detail::opcodes_apply_impl<std::vector<CharT>>(ops, first1, last1, first2, last2);
152}
153
154template <typename CharT, typename Sentence1, typename Sentence2>
155std::vector<CharT> opcodes_apply_vec(const Opcodes& ops, const Sentence1& s1, const Sentence2& s2)
156{
157 return detail::opcodes_apply_impl<std::vector<CharT>>(ops, detail::to_begin(s1), detail::to_end(s1),
158 detail::to_begin(s2), detail::to_end(s2));
159}
160
161} // namespace rapidfuzz