From 0d93a42626c8f497d19551177f398a760f62613f Mon Sep 17 00:00:00 2001 From: Aaron Ross Date: Sun, 3 Jan 2021 00:20:49 -0800 Subject: [PATCH] support SGML OFX responses with no line breaks Some financial institutions (*cough* Wells Fargo *cough*) export OFX files as a single line, which is technically valid according to the v1 spec. In order to parse them correctly, `readSGMLHeaders` now uses a regular expression that allows for all whitespace/line breaks to be optionally excluded. A new sample response (wellsfargo.qfx) has been added to document this behaviour. --- response.go | 92 +++++++++++++------------- samples/valid_responses/wellsfargo.qfx | 1 + 2 files changed, 46 insertions(+), 47 deletions(-) create mode 100644 samples/valid_responses/wellsfargo.qfx diff --git a/response.go b/response.go index 355a491..7d27665 100644 --- a/response.go +++ b/response.go @@ -7,6 +7,7 @@ import ( "fmt" "io" "reflect" + "regexp" "strings" "github.com/aclindsa/xml" @@ -35,78 +36,75 @@ type Response struct { } func (or *Response) readSGMLHeaders(r *bufio.Reader) error { - var seenHeader, seenVersion bool = false, false - for { - // Some financial institutions do not properly leave an empty line after the last header. - // Avoid attempting to read another header in that case. - next, err := r.Peek(1) - if err != nil { - return err - } - if next[0] == '<' { - break + b, err := r.ReadSlice('<') + if err != nil { + return err + } + + s := string(b) + err = r.UnreadByte() + if err != nil { + return err + } + + // According to the latest OFX SGML spec (1.6), headers should be CRLF-separated + // and written as KEY:VALUE. However, some banks include a whitespace after the + // colon (KEY: VALUE), while others include no line breaks at all. The spec doesn't + // require a line break after the OFX headers, but it is allowed, and will be + // optionally captured & discarded by the trailing `\s*`. Valid SGML headers must + // always be present in exactly this order, so a regular expression is acceptable. + headerExp := regexp.MustCompile( + `OFXHEADER:\s*(?P\d+)\s*` + + `DATA:\s*(?P[A-Z]+)\s*` + + `VERSION:\s*(?P\d+)\s*` + + `SECURITY:\s*(?P[\w]+)\s*` + + `ENCODING:\s*(?P[A-Z0-9-]+)\s*` + + `CHARSET:\s*(?P[\w-]+)\s*` + + `COMPRESSION:\s*(?P[A-Z]+)\s*` + + `OLDFILEUID:\s*(?P[\w-]+)\s*` + + `NEWFILEUID:\s*(?P[\w-]+)\s*`) + + matches := headerExp.FindStringSubmatch(s) + if len(matches) == 0 { + return errors.New("OFX headers malformed") + } + + for i, name := range headerExp.SubexpNames() { + if i == 0 { + continue } - line, err := r.ReadString('\n') - if err != nil { - return err - } - // r.ReadString leaves the '\n' on the end... - line = strings.TrimSpace(line) - - if len(line) == 0 { - if seenHeader { - break - } else { - continue - } - } - header := strings.SplitN(line, ":", 2) - if header == nil || len(header) != 2 { - return errors.New("OFX headers malformed") - } - - // Some OFX servers put a space after the colon - headervalue := strings.TrimSpace(header[1]) - - switch header[0] { + headerValue := matches[i] + switch name { case "OFXHEADER": - if headervalue != "100" { + if headerValue != "100" { return errors.New("OFXHEADER is not 100") } - seenHeader = true case "DATA": - if headervalue != "OFXSGML" { + if headerValue != "OFXSGML" { return errors.New("OFX DATA header does not contain OFXSGML") } case "VERSION": - err := or.Version.FromString(headervalue) + err := or.Version.FromString(headerValue) if err != nil { return err } - seenVersion = true - if or.Version > OfxVersion160 { return errors.New("OFX VERSION > 160 in SGML header") } case "SECURITY": - if headervalue != "NONE" { + if headerValue != "NONE" { return errors.New("OFX SECURITY header not NONE") } case "COMPRESSION": - if headervalue != "NONE" { + if headerValue != "NONE" { return errors.New("OFX COMPRESSION header not NONE") } case "ENCODING", "CHARSET", "OLDFILEUID", "NEWFILEUID": - // TODO check/handle these headers? - default: - return errors.New("Invalid OFX header: " + header[0]) + // TODO: check/handle these headers? } } - if !seenVersion { - return errors.New("OFX VERSION header missing") - } return nil } diff --git a/samples/valid_responses/wellsfargo.qfx b/samples/valid_responses/wellsfargo.qfx new file mode 100644 index 0000000..ffc667d --- /dev/null +++ b/samples/valid_responses/wellsfargo.qfx @@ -0,0 +1 @@ +OFXHEADER:100DATA:OFXSGMLVERSION:102SECURITY:NONEENCODING:USASCIICHARSET:1252COMPRESSION:NONEOLDFILEUID:NONENEWFILEUID:NONE0INFOSUCCESS20210102211014.201[-8:PST]ENGWF1000abc-1231000jane_doe00INFOSUCCESSUSD1234567899876543210CHECKING20201201120000.000[-8:PST]20201231120000.000[-8:PST]DIRECTDEBIT20201201120000.000[-8:PST]-12.34202012011AE Visa Card AE EPAY XXXXX1234123.4520201231120000.000[-8:PST]123.4520201231120000.000[-8:PST] \ No newline at end of file