1use alloc::{
2 borrow::Cow,
3 string::{String, ToString},
4 vec::Vec,
5};
6use core::{ops::Index, str};
7
8use crate::error::{ReadError, ReadErrorKind};
9
10#[derive(Copy, Clone, Debug)]
11struct FieldRange {
12 start: usize,
13 end: usize,
14 quoted: bool,
15}
16
17#[derive(Copy, Clone, Debug, PartialEq)]
18enum State {
19 StartOfField,
20 InUnquoted,
21 InQuoted,
22 AfterQuote,
23}
24
25pub struct Reader {
39 buf: Vec<u8>,
40 pos: usize,
41 field_ranges: Vec<FieldRange>,
42 field_start: usize,
43 field_start_column: usize,
44 state: State,
45 line: usize,
46 column: usize,
47 delimiter: u8,
48 flexible: bool,
49 eof: bool,
50
51 #[cfg(feature = "std")]
52 source: Option<Box<dyn std::io::Read>>,
53}
54
55impl Reader {
56 pub fn new(data: &[u8]) -> Self {
60 Reader {
61 buf: data.to_vec(),
62 pos: 0,
63 field_ranges: Vec::new(),
64 field_start: 0,
65 field_start_column: 1,
66 state: State::StartOfField,
67 line: 1,
68 column: 1,
69 delimiter: b',',
70 flexible: false,
71 eof: true,
72 #[cfg(feature = "std")]
73 source: None,
74 }
75 }
76
77 pub fn set_delimiter(&mut self, byte: u8) -> &mut Self {
79 self.delimiter = byte;
80 self
81 }
82
83 pub fn set_flexible(&mut self, yes: bool) -> &mut Self {
85 self.flexible = yes;
86 self
87 }
88
89 pub fn rows(self) -> Rows {
107 Rows {
108 reader: self,
109 }
110 }
111
112 fn read_row(&mut self) -> Result<Option<Row>, ReadError> {
113 self.field_ranges.clear();
114 self.state = State::StartOfField;
115
116 loop {
117 if self.pos >= self.buf.len() && !self.fill_buf()? {
118 return if self.field_ranges.is_empty() && self.state == State::StartOfField {
119 Ok(None)
120 } else {
121 if self.state == State::InQuoted {
122 return Err(ReadError::new(
123 ReadErrorKind::UnterminatedQuote,
124 self.line,
125 self.column_at(self.field_start),
126 ));
127 }
128 match self.state {
129 State::InUnquoted => {
130 self.field_ranges.push(FieldRange {
131 start: self.field_start,
132 end: self.pos,
133 quoted: false,
134 });
135 }
136 State::AfterQuote => {
137 self.field_ranges.push(FieldRange {
138 start: self.field_start,
139 end: self.pos,
140 quoted: true,
141 });
142 }
143 State::StartOfField => {
144 if !self.field_ranges.is_empty() {
145 self.field_ranges.push(FieldRange {
146 start: self.pos,
147 end: self.pos,
148 quoted: false,
149 });
150 }
151 }
152 State::InQuoted => unreachable!(),
153 }
154 Ok(Some(self.make_row()?))
155 };
156 }
157
158 if self.pos >= self.buf.len() {
159 break;
160 }
161
162 let byte = self.buf[self.pos];
163
164 match self.state {
165 State::StartOfField => {
166 if byte == b'\r' || byte == b'\n' {
167 if !self.field_ranges.is_empty() {
168 self.field_ranges.push(FieldRange {
169 start: self.pos,
170 end: self.pos,
171 quoted: false,
172 });
173 }
174 self.consume_line_end();
175 if self.field_ranges.is_empty() {
176 continue;
177 }
178 return Ok(Some(self.make_row()?));
179 }
180 if byte == self.delimiter {
181 self.field_ranges.push(FieldRange {
182 start: self.pos,
183 end: self.pos,
184 quoted: false,
185 });
186 self.pos += 1;
187 self.column += 1;
188 continue;
189 }
190 if byte == b'"' {
191 self.field_start = self.pos;
192 self.field_start_column = self.column;
193 self.state = State::InQuoted;
194 self.pos += 1;
195 self.column += 1;
196 } else {
197 self.field_start = self.pos;
198 self.field_start_column = self.column;
199 self.state = State::InUnquoted;
200 self.pos += 1;
201 self.column += 1;
202 }
203 }
204
205 State::InUnquoted => {
206 if byte == self.delimiter {
207 self.field_ranges.push(FieldRange {
208 start: self.field_start,
209 end: self.pos,
210 quoted: false,
211 });
212 self.state = State::StartOfField;
213 self.pos += 1;
214 self.column = 1;
215 } else if byte == b'\r' || byte == b'\n' {
216 self.field_ranges.push(FieldRange {
217 start: self.field_start,
218 end: self.pos,
219 quoted: false,
220 });
221 self.consume_line_end();
222 return Ok(Some(self.make_row()?));
223 } else {
224 self.pos += 1;
225 self.column += 1;
226 }
227 }
228
229 State::InQuoted => {
230 if byte == b'"' {
231 self.state = State::AfterQuote;
232 self.pos += 1;
233 self.column += 1;
234 } else {
235 self.pos += 1;
236 self.column += 1;
237 }
238 }
239
240 State::AfterQuote => {
241 if byte == b'"' {
242 self.state = State::InQuoted;
243 self.pos += 1;
244 self.column += 1;
245 } else if byte == self.delimiter {
246 self.field_ranges.push(FieldRange {
247 start: self.field_start,
248 end: self.pos,
249 quoted: true,
250 });
251 self.state = State::StartOfField;
252 self.pos += 1;
253 self.column = 1;
254 } else if byte == b'\r' || byte == b'\n' {
255 self.field_ranges.push(FieldRange {
256 start: self.field_start,
257 end: self.pos,
258 quoted: true,
259 });
260 self.consume_line_end();
261 return Ok(Some(self.make_row()?));
262 } else {
263 return Err(ReadError::new(
264 ReadErrorKind::TrailingContent,
265 self.line,
266 self.column_at(self.field_start),
267 ));
268 }
269 }
270 }
271 }
272
273 Ok(None)
274 }
275
276 fn make_row(&mut self) -> Result<Row, ReadError> {
277 let ranges = core::mem::take(&mut self.field_ranges);
278 if ranges.is_empty() {
279 return Ok(Row {
280 input: String::new(),
281 fields: Vec::new(),
282 });
283 }
284 let buf_start = ranges[0].start;
285 let buf_end = ranges.last().unwrap().end;
286 let raw = self.buf[buf_start..buf_end].to_vec();
287 let input = String::from_utf8(raw).map_err(|_| ReadError::new(ReadErrorKind::InvalidUtf8, self.line, 0))?;
288 let fields: Vec<FieldRange> = ranges
289 .iter()
290 .map(|r| FieldRange {
291 start: r.start - buf_start,
292 end: r.end - buf_start,
293 quoted: r.quoted,
294 })
295 .collect();
296 if self.pos > 0 {
297 self.buf.drain(..self.pos);
298 self.pos = 0;
299 }
300 Ok(Row {
301 input,
302 fields,
303 })
304 }
305
306 fn consume_line_end(&mut self) {
307 if self.pos < self.buf.len() && self.buf[self.pos] == b'\r' {
308 self.pos += 1;
309 }
310 if self.pos < self.buf.len() && self.buf[self.pos] == b'\n' {
311 self.pos += 1;
312 }
313 self.line += 1;
314 self.column = 1;
315 }
316
317 fn fill_buf(&mut self) -> Result<bool, ReadError> {
318 if self.eof {
319 return Ok(false);
320 }
321 #[cfg(feature = "std")]
322 {
323 if let Some(source) = &mut self.source {
324 let mut tmp = [0u8; 8192];
325 let n = source.read(&mut tmp)?;
326 if n == 0 {
327 self.eof = true;
328 return Ok(false);
329 }
330 self.buf.extend_from_slice(&tmp[..n]);
331 return Ok(true);
332 }
333 }
334 Ok(false)
335 }
336
337 fn column_at(&self, _pos: usize) -> usize {
338 self.field_start_column
339 }
340}
341
342#[cfg(feature = "std")]
343impl Reader {
344 pub fn from_reader(reader: impl std::io::Read + 'static) -> Self {
360 Reader {
361 buf: Vec::new(),
362 pos: 0,
363 field_ranges: Vec::new(),
364 field_start: 0,
365 field_start_column: 1,
366 state: State::StartOfField,
367 line: 1,
368 column: 1,
369 delimiter: b',',
370 flexible: false,
371 eof: false,
372 source: Some(Box::new(reader)),
373 }
374 }
375}
376
377#[derive(Clone, Debug)]
390pub struct Row {
391 input: String,
392 fields: Vec<FieldRange>,
393}
394
395impl Row {
396 pub fn len(&self) -> usize {
398 self.fields.len()
399 }
400
401 pub fn is_empty(&self) -> bool {
403 self.fields.is_empty()
404 }
405
406 pub fn get_raw(&self, index: usize) -> Option<&str> {
409 let range = self.fields.get(index)?;
410 Some(&self.input[range.start..range.end])
411 }
412
413 pub fn fields(&self) -> Fields<'_> {
419 Fields {
420 input: &self.input,
421 ranges: self.fields.iter(),
422 }
423 }
424}
425
426impl Index<usize> for Row {
427 type Output = str;
428
429 fn index(&self, index: usize) -> &str {
430 self.get_raw(index).expect("Row index out of bounds")
431 }
432}
433
434pub struct RowIntoIter {
438 input: String,
439 ranges: alloc::vec::IntoIter<FieldRange>,
440}
441
442impl Iterator for RowIntoIter {
443 type Item = String;
444
445 fn next(&mut self) -> Option<String> {
446 let range = self.ranges.next()?;
447 Some(self.input[range.start..range.end].to_string())
448 }
449
450 fn size_hint(&self) -> (usize, Option<usize>) {
451 self.ranges.size_hint()
452 }
453}
454
455impl ExactSizeIterator for RowIntoIter {
456 fn len(&self) -> usize {
457 self.ranges.len()
458 }
459}
460
461impl IntoIterator for Row {
462 type Item = String;
463 type IntoIter = RowIntoIter;
464
465 fn into_iter(self) -> RowIntoIter {
466 RowIntoIter {
467 input: self.input,
468 ranges: self.fields.into_iter(),
469 }
470 }
471}
472
473pub struct Fields<'a> {
479 input: &'a str,
480 ranges: core::slice::Iter<'a, FieldRange>,
481}
482
483impl<'a> Iterator for Fields<'a> {
484 type Item = Cow<'a, str>;
485
486 fn next(&mut self) -> Option<Cow<'a, str>> {
487 let range = self.ranges.next()?;
488 let raw = &self.input[range.start..range.end];
489
490 if range.quoted {
491 if raw.len() < 2 {
492 return Some(Cow::Borrowed(""));
493 }
494 let content = &raw[1..raw.len() - 1];
495 if content.contains("\"\"") {
496 Some(Cow::Owned(content.replace("\"\"", "\"")))
497 } else {
498 Some(Cow::Borrowed(content))
499 }
500 } else {
501 Some(Cow::Borrowed(raw))
502 }
503 }
504
505 fn size_hint(&self) -> (usize, Option<usize>) {
506 self.ranges.size_hint()
507 }
508}
509
510impl<'a> ExactSizeIterator for Fields<'a> {
511 fn len(&self) -> usize {
512 self.ranges.len()
513 }
514}
515
516pub struct Rows {
534 reader: Reader,
535}
536
537impl Iterator for Rows {
538 type Item = Result<Row, ReadError>;
539
540 fn next(&mut self) -> Option<Self::Item> {
541 match self.reader.read_row() {
542 Ok(Some(row)) => Some(Ok(row)),
543 Ok(None) => None,
544 Err(e) => Some(Err(e)),
545 }
546 }
547}