1#[cfg(feature = "serde")]
2use alloc::collections::BTreeMap;
3use alloc::{
4 string::{String, ToString},
5 vec::Vec,
6};
7use core::{fmt, marker::PhantomData, str};
8
9use crate::error::{ReadError, ReadErrorKind};
10
11#[derive(Clone, Copy, Debug, PartialEq)]
12enum State {
13 StartOfField,
14 InUnquoted,
15 InQuoted,
16 AfterQuote,
17}
18
19#[derive(Clone, Copy, Debug)]
20pub(crate) enum Src {
21 Buf,
22 Scratch,
23}
24
25#[derive(Clone, Copy, Debug)]
26pub(crate) struct FieldRange {
27 pub(crate) start: usize,
28 pub(crate) end: usize,
29 pub(crate) src: Src,
30}
31
32pub struct Reader<R> {
59 pub(crate) buf: Vec<u8>,
60 pub(crate) scratch: Vec<u8>,
61 pub(crate) ranges: Vec<FieldRange>,
62 start: usize,
63 end: usize,
64 line: usize,
65 field_start: usize,
66 scratch_field_start: usize,
67 state: State,
68 delimiter: u8,
69 headers_parsed: bool,
70 pending_cr: bool,
71 pub(crate) headers: Vec<String>,
72 #[cfg(feature = "serde")]
73 pub(crate) header_map: Option<BTreeMap<String, usize>>,
74 source: R,
75 eof: bool,
76}
77
78impl<R> Reader<R> {
79 pub fn set_delimiter(&mut self, byte: u8) -> &mut Self {
81 self.delimiter = byte;
82 self
83 }
84
85 pub fn set_headers(&mut self, headers: Vec<String>) -> &mut Self {
104 self.headers_parsed = true;
105 self.headers = headers;
106 #[cfg(feature = "serde")]
107 {
108 self.header_map = Some(
109 self.headers
110 .iter()
111 .enumerate()
112 .map(|(i, name)| (name.clone(), i))
113 .collect(),
114 );
115 }
116 self
117 }
118
119 pub fn headers(&self) -> Option<&[String]> {
124 if self.headers_parsed { Some(&self.headers) } else { None }
125 }
126
127 fn compact(&mut self) {
128 if self.start > 0 {
129 let remaining = self.end - self.start;
130 if remaining > 0 {
131 self.buf.copy_within(self.start..self.end, 0);
132 }
133 self.buf.truncate(remaining);
134 self.end = remaining;
135 self.start = 0;
136 }
137 }
138
139 fn consume_newline(&mut self) {
140 if self.start < self.end && self.buf[self.start] == b'\r' {
141 self.start += 1;
142 }
143 if self.start < self.end && self.buf[self.start] == b'\n' {
144 self.start += 1;
145 } else if self.start > 0 && self.start >= self.end && self.buf[self.start - 1] == b'\r' {
146 self.pending_cr = true;
147 }
148 self.line += 1;
149 }
150
151 fn validate_utf8(&self) -> Option<ReadError> {
152 for fr in &self.ranges {
153 let ok = match fr.src {
154 Src::Buf => core::str::from_utf8(&self.buf[fr.start..fr.end]).is_ok(),
155 Src::Scratch => core::str::from_utf8(&self.scratch[fr.start..fr.end]).is_ok(),
156 };
157 if !ok {
158 return Some(ReadError::new(ReadErrorKind::InvalidUtf8, self.line, 0));
159 }
160 }
161 None
162 }
163
164 fn finalize_current_field(&mut self) {
165 match self.state {
166 State::InUnquoted => {
167 self.ranges.push(FieldRange {
168 start: self.field_start,
169 end: self.start,
170 src: Src::Buf,
171 });
172 }
173 State::AfterQuote => {}
174 State::StartOfField => {
175 if !self.ranges.is_empty() {
176 self.ranges.push(FieldRange {
177 start: self.start,
178 end: self.start,
179 src: Src::Buf,
180 });
181 }
182 }
183 State::InQuoted => unreachable!(),
184 }
185 }
186
187 fn build_row_with(&self, error: Option<ReadError>) -> Row<'_> {
188 let error = error.or_else(|| self.validate_utf8());
189 Row {
190 buf: &self.buf[..self.end],
191 scratch: &self.scratch,
192 ranges: &self.ranges,
193 error,
194 #[cfg(feature = "serde")]
195 header_map: self.header_map.as_ref(),
196 }
197 }
198}
199
200fn is_newline(b: u8) -> bool {
201 b == b'\n' || b == b'\r'
202}
203
204#[cfg(not(feature = "std"))]
207impl<R> Reader<R> {
208 fn fill_buf(&mut self) -> Result<bool, ReadError> {
209 if self.eof {
210 return Ok(false);
211 }
212 self.eof = true;
213 Ok(false)
214 }
215
216 fn read_row<'s>(&'s mut self) -> Option<Row<'s>> {
217 self.compact();
218 self.ranges.clear();
219 self.scratch.clear();
220 self.state = State::StartOfField;
221
222 loop {
223 if self.start >= self.end {
224 match self.fill_buf() {
225 Err(e) => {
226 self.eof = true;
227 return match self.state {
228 State::InQuoted => Some(self.build_row_with(Some(e))),
229 _ => {
230 self.finalize_current_field();
231 Some(self.build_row_with(Some(e)))
232 }
233 };
234 }
235 Ok(false) => {
236 if self.ranges.is_empty() && self.state == State::StartOfField {
237 return None;
238 }
239 return match self.state {
240 State::InQuoted => Some(self.build_row_with(Some(ReadError::new(
241 ReadErrorKind::UnterminatedQuote,
242 self.line,
243 0,
244 )))),
245 _ => {
246 self.finalize_current_field();
247 Some(self.build_row_with(None))
248 }
249 };
250 }
251 Ok(true) => {}
252 }
253 }
254
255 if self.pending_cr && self.buf[self.start] == b'\n' {
256 self.start += 1;
257 self.pending_cr = false;
258 continue;
259 }
260
261 let byte = self.buf[self.start];
262
263 match self.state {
264 State::StartOfField => {
265 if byte == b'\r' || byte == b'\n' {
266 if !self.ranges.is_empty() {
267 self.ranges.push(FieldRange {
268 start: self.start,
269 end: self.start,
270 src: Src::Buf,
271 });
272 }
273 self.consume_newline();
274 if self.ranges.is_empty() {
275 continue;
276 }
277 return Some(self.build_row_with(None));
278 }
279 self.field_start = self.start;
280 if byte == self.delimiter {
281 self.ranges.push(FieldRange {
282 start: self.start,
283 end: self.start,
284 src: Src::Buf,
285 });
286 self.start += 1;
287 continue;
288 }
289 if byte == b'"' {
290 self.scratch_field_start = self.scratch.len();
291 self.start += 1;
292 self.state = State::InQuoted;
293 } else {
294 self.start += 1;
295 self.state = State::InUnquoted;
296 }
297 }
298
299 State::InUnquoted => {
300 let haystack = &self.buf[self.start..self.end];
301 match memchr::memchr3(self.delimiter, b'\r', b'\n', haystack) {
302 Some(offset) => {
303 let pos = self.start + offset;
304 self.ranges.push(FieldRange {
305 start: self.field_start,
306 end: pos,
307 src: Src::Buf,
308 });
309 let b = self.buf[pos];
310 if b == self.delimiter {
311 self.start = pos + 1;
312 self.state = State::StartOfField;
313 } else {
314 self.start = pos;
315 self.consume_newline();
316 return Some(self.build_row_with(None));
317 }
318 }
319 None => {
320 self.start = self.end;
321 }
322 }
323 }
324
325 State::InQuoted => {
326 let haystack = &self.buf[self.start..self.end];
327 match memchr::memchr(b'"', haystack) {
328 Some(offset) => {
329 let quote_pos = self.start + offset;
330 self.scratch.extend_from_slice(&self.buf[self.start..quote_pos]);
331 let after_quote = quote_pos + 1;
332 if after_quote < self.end && self.buf[after_quote] == b'"' {
333 self.scratch.push(b'"');
334 self.start = after_quote + 1;
335 } else if after_quote < self.end {
336 self.ranges.push(FieldRange {
337 start: self.scratch_field_start,
338 end: self.scratch.len(),
339 src: Src::Scratch,
340 });
341 self.start = after_quote;
342 self.state = State::AfterQuote;
343 } else if self.fill_buf().ok().unwrap_or(false) && self.buf[after_quote] == b'"' {
344 self.scratch.push(b'"');
345 self.start = after_quote + 1;
346 } else {
347 self.ranges.push(FieldRange {
348 start: self.scratch_field_start,
349 end: self.scratch.len(),
350 src: Src::Scratch,
351 });
352 self.start = after_quote;
353 self.state = State::AfterQuote;
354 }
355 }
356 None => {
357 self.scratch.extend_from_slice(&self.buf[self.start..self.end]);
358 self.start = self.end;
359 }
360 }
361 }
362
363 State::AfterQuote => {
364 if byte == self.delimiter {
365 self.start += 1;
366 self.state = State::StartOfField;
367 } else if is_newline(byte) {
368 self.consume_newline();
369 return Some(self.build_row_with(None));
370 } else {
371 return Some(self.build_row_with(Some(ReadError::new(
372 ReadErrorKind::TrailingContent,
373 self.line,
374 0,
375 ))));
376 }
377 }
378 }
379 }
380 }
381
382 pub fn parse_headers(&mut self) -> Result<Vec<String>, ReadError> {
388 self.headers_parsed = true;
389 let row = match self.read_row() {
390 Some(row) => row,
391 None => return Ok(Vec::new()),
392 };
393 let h: Vec<String> = row.fields()?.map(|s| s.to_string()).collect();
394 self.headers = h.clone();
395 #[cfg(feature = "serde")]
396 {
397 self.header_map = Some(h.iter().enumerate().map(|(i, name)| (name.clone(), i)).collect());
398 }
399 Ok(h)
400 }
401
402 pub fn rows(&mut self) -> Rows<'_, R> {
404 Rows {
405 reader: self,
406 _marker: PhantomData,
407 }
408 }
409}
410
411#[cfg(feature = "std")]
414impl<R: std::io::Read> Reader<R> {
415 pub fn from_reader(source: R) -> Self {
420 Reader {
421 buf: Vec::with_capacity(65536),
422 scratch: Vec::new(),
423 ranges: Vec::new(),
424 start: 0,
425 end: 0,
426 line: 1,
427 field_start: 0,
428 scratch_field_start: 0,
429 state: State::StartOfField,
430 delimiter: b',',
431 headers_parsed: false,
432 pending_cr: false,
433 headers: Vec::new(),
434 #[cfg(feature = "serde")]
435 header_map: None,
436 source,
437 eof: false,
438 }
439 }
440
441 fn fill_buf(&mut self) -> Result<bool, ReadError> {
442 if self.eof {
443 return Ok(false);
444 }
445 let mut tmp = [0u8; 16384];
446 let n = self.source.read(&mut tmp)?;
447 if n == 0 {
448 self.eof = true;
449 return Ok(false);
450 }
451 self.buf.extend_from_slice(&tmp[..n]);
452 self.end = self.buf.len();
453 Ok(true)
454 }
455
456 fn read_row<'s>(&'s mut self) -> Option<Row<'s>> {
457 self.compact();
458 self.ranges.clear();
459 self.scratch.clear();
460 self.state = State::StartOfField;
461
462 loop {
463 if self.start >= self.end {
464 match self.fill_buf() {
465 Err(e) => {
466 self.eof = true;
467 return match self.state {
468 State::InQuoted => Some(self.build_row_with(Some(e))),
469 _ => {
470 self.finalize_current_field();
471 Some(self.build_row_with(Some(e)))
472 }
473 };
474 }
475 Ok(false) => {
476 if self.ranges.is_empty() && self.state == State::StartOfField {
477 return None;
478 }
479 return match self.state {
480 State::InQuoted => Some(self.build_row_with(Some(ReadError::new(
481 ReadErrorKind::UnterminatedQuote,
482 self.line,
483 0,
484 )))),
485 _ => {
486 self.finalize_current_field();
487 Some(self.build_row_with(None))
488 }
489 };
490 }
491 Ok(true) => {}
492 }
493 }
494
495 if self.pending_cr && self.buf[self.start] == b'\n' {
496 self.start += 1;
497 self.pending_cr = false;
498 continue;
499 }
500
501 let byte = self.buf[self.start];
502
503 match self.state {
504 State::StartOfField => {
505 if byte == b'\r' || byte == b'\n' {
506 if !self.ranges.is_empty() {
507 self.ranges.push(FieldRange {
508 start: self.start,
509 end: self.start,
510 src: Src::Buf,
511 });
512 }
513 self.consume_newline();
514 if self.ranges.is_empty() {
515 continue;
516 }
517 return Some(self.build_row_with(None));
518 }
519 self.field_start = self.start;
520 if byte == self.delimiter {
521 self.ranges.push(FieldRange {
522 start: self.start,
523 end: self.start,
524 src: Src::Buf,
525 });
526 self.start += 1;
527 continue;
528 }
529 if byte == b'"' {
530 self.scratch_field_start = self.scratch.len();
531 self.start += 1;
532 self.state = State::InQuoted;
533 } else {
534 self.start += 1;
535 self.state = State::InUnquoted;
536 }
537 }
538
539 State::InUnquoted => {
540 let haystack = &self.buf[self.start..self.end];
541 match memchr::memchr3(self.delimiter, b'\r', b'\n', haystack) {
542 Some(offset) => {
543 let pos = self.start + offset;
544 self.ranges.push(FieldRange {
545 start: self.field_start,
546 end: pos,
547 src: Src::Buf,
548 });
549 let b = self.buf[pos];
550 if b == self.delimiter {
551 self.start = pos + 1;
552 self.state = State::StartOfField;
553 } else {
554 self.start = pos;
555 self.consume_newline();
556 return Some(self.build_row_with(None));
557 }
558 }
559 None => {
560 self.start = self.end;
561 }
562 }
563 }
564
565 State::InQuoted => {
566 let haystack = &self.buf[self.start..self.end];
567 match memchr::memchr(b'"', haystack) {
568 Some(offset) => {
569 let quote_pos = self.start + offset;
570 self.scratch.extend_from_slice(&self.buf[self.start..quote_pos]);
571 let after_quote = quote_pos + 1;
572 if after_quote < self.end && self.buf[after_quote] == b'"' {
573 self.scratch.push(b'"');
574 self.start = after_quote + 1;
575 } else if after_quote < self.end {
576 self.ranges.push(FieldRange {
577 start: self.scratch_field_start,
578 end: self.scratch.len(),
579 src: Src::Scratch,
580 });
581 self.start = after_quote;
582 self.state = State::AfterQuote;
583 } else if self.fill_buf().ok().unwrap_or(false) && self.buf[after_quote] == b'"' {
584 self.scratch.push(b'"');
585 self.start = after_quote + 1;
586 } else {
587 self.ranges.push(FieldRange {
588 start: self.scratch_field_start,
589 end: self.scratch.len(),
590 src: Src::Scratch,
591 });
592 self.start = after_quote;
593 self.state = State::AfterQuote;
594 }
595 }
596 None => {
597 self.scratch.extend_from_slice(&self.buf[self.start..self.end]);
598 self.start = self.end;
599 }
600 }
601 }
602
603 State::AfterQuote => {
604 if byte == self.delimiter {
605 self.start += 1;
606 self.state = State::StartOfField;
607 } else if is_newline(byte) {
608 self.consume_newline();
609 return Some(self.build_row_with(None));
610 } else {
611 return Some(self.build_row_with(Some(ReadError::new(
612 ReadErrorKind::TrailingContent,
613 self.line,
614 0,
615 ))));
616 }
617 }
618 }
619 }
620 }
621
622 pub fn parse_headers(&mut self) -> Result<Vec<String>, ReadError> {
628 self.headers_parsed = true;
629 let row = match self.read_row() {
630 Some(row) => row,
631 None => return Ok(Vec::new()),
632 };
633 let h: Vec<String> = row.fields()?.map(|s| s.to_string()).collect();
634 self.headers = h.clone();
635 #[cfg(feature = "serde")]
636 {
637 self.header_map = Some(h.iter().enumerate().map(|(i, name)| (name.clone(), i)).collect());
638 }
639 Ok(h)
640 }
641
642 pub fn rows(&mut self) -> Rows<'_, R> {
644 Rows {
645 reader: self,
646 _marker: PhantomData,
647 }
648 }
649}
650
651impl<'a> Reader<&'a [u8]> {
652 pub fn from_bytes(bytes: &'a [u8]) -> Self {
664 Reader {
665 buf: bytes.to_vec(),
666 scratch: Vec::new(),
667 ranges: Vec::new(),
668 start: 0,
669 end: bytes.len(),
670 line: 1,
671 field_start: 0,
672 scratch_field_start: 0,
673 state: State::StartOfField,
674 delimiter: b',',
675 headers_parsed: false,
676 pending_cr: false,
677 headers: Vec::new(),
678 #[cfg(feature = "serde")]
679 header_map: None,
680 source: bytes,
681 eof: true,
682 }
683 }
684}
685
686pub struct Row<'a> {
698 buf: &'a [u8],
699 scratch: &'a [u8],
700 ranges: &'a [FieldRange],
701 error: Option<ReadError>,
702 #[cfg(feature = "serde")]
703 pub(crate) header_map: Option<&'a BTreeMap<String, usize>>,
704}
705
706impl Row<'_> {
707 pub fn error(&self) -> Option<&ReadError> {
709 self.error.as_ref()
710 }
711
712 pub fn len(&self) -> usize {
714 self.ranges.len()
715 }
716
717 pub fn is_empty(&self) -> bool {
719 self.ranges.is_empty()
720 }
721
722 pub fn fields(&self) -> Result<Fields<'_>, ReadError> {
736 if let Some(e) = &self.error {
737 return Err(e.clone());
738 }
739 Ok(Fields {
740 buf: self.buf,
741 scratch: self.scratch,
742 iter: self.ranges.iter(),
743 })
744 }
745
746 pub fn all(&self) -> Result<Vec<String>, ReadError> {
750 Ok(self.fields()?.map(|f| f.to_string()).collect())
751 }
752}
753
754impl<'a> fmt::Debug for Row<'a> {
755 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
756 match &self.error {
757 Some(e) => write!(f, "Row(Err({e}))"),
758 None => f
759 .debug_list()
760 .entries(self.ranges.iter().map(|r| {
761 let slice = match r.src {
762 Src::Buf => &self.buf[r.start..r.end],
763 Src::Scratch => &self.scratch[r.start..r.end],
764 };
765 unsafe { str::from_utf8_unchecked(slice) }
768 }))
769 .finish(),
770 }
771 }
772}
773
774pub struct Fields<'a> {
778 buf: &'a [u8],
779 scratch: &'a [u8],
780 iter: core::slice::Iter<'a, FieldRange>,
781}
782
783impl<'a> Iterator for Fields<'a> {
784 type Item = &'a str;
785
786 fn next(&mut self) -> Option<&'a str> {
787 let r = self.iter.next()?;
788 let slice = match r.src {
789 Src::Buf => &self.buf[r.start..r.end],
790 Src::Scratch => &self.scratch[r.start..r.end],
791 };
792 Some(unsafe { str::from_utf8_unchecked(slice) })
796 }
797
798 fn size_hint(&self) -> (usize, Option<usize>) {
799 self.iter.size_hint()
800 }
801}
802
803impl<'a> ExactSizeIterator for Fields<'a> {}
804
805pub struct Rows<'r, R> {
819 reader: *mut Reader<R>,
820 _marker: PhantomData<&'r mut Reader<R>>,
821}
822
823#[cfg(feature = "std")]
824impl<'r, R: std::io::Read> Iterator for Rows<'r, R> {
825 type Item = Row<'r>;
826
827 fn next(&mut self) -> Option<Self::Item> {
828 let reader = unsafe { &mut *self.reader };
833 reader
834 .read_row()
835 .map(|row| unsafe { core::mem::transmute::<Row<'_>, Row<'r>>(row) })
840 }
841}
842
843#[cfg(not(feature = "std"))]
844impl<'r, R> Iterator for Rows<'r, R> {
845 type Item = Row<'r>;
846
847 fn next(&mut self) -> Option<Self::Item> {
848 let reader = unsafe { &mut *self.reader };
849 reader
850 .read_row()
851 .map(|row| unsafe { core::mem::transmute::<Row<'_>, Row<'r>>(row) })
852 }
853}