diff --git a/go.mod b/go.mod index a21d526435..8dffa47416 100644 --- a/go.mod +++ b/go.mod @@ -42,7 +42,7 @@ require ( github.com/gofrs/uuid v4.4.0+incompatible github.com/golang-jwt/jwt/v4 v4.5.0 github.com/golang/protobuf v1.5.3 - github.com/google/go-tika v0.2.0 + github.com/google/go-tika v0.3.0 github.com/google/uuid v1.3.0 github.com/gookit/config/v2 v2.1.8 github.com/gorilla/mux v1.8.0 diff --git a/go.sum b/go.sum index 56dea46bdc..690987bf62 100644 --- a/go.sum +++ b/go.sum @@ -966,8 +966,8 @@ github.com/google/go-github/v32 v32.1.0/go.mod h1:rIEpZD9CTDQwDK9GDrtMTycQNA4JU3 github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD/fhyJ8= github.com/google/go-querystring v1.1.0/go.mod h1:Kcdr2DB4koayq7X8pmAG4sNG59So17icRSOU623lUBU= -github.com/google/go-tika v0.2.0 h1:+1dnOoJ/pJrko2XH/3Rm5ssG9+ixOgjmPEz94ikUsxI= -github.com/google/go-tika v0.2.0/go.mod h1:vnMADwNG1A2AJx+ycQgTNMGe3ZG4CZUowEhK2FykumQ= +github.com/google/go-tika v0.3.0 h1:JncwikDcIJrSwwoTjWg6NE7g3IW7XTrI6ZeeOGp03Y8= +github.com/google/go-tika v0.3.0/go.mod h1:k/Afo/0kDgo9g/9gjIIUBoDXgyGgYO3JAISqowoJdVE= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= @@ -1786,7 +1786,6 @@ golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190522155817-f3200d17e092/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= diff --git a/vendor/github.com/google/go-tika/tika/doc.go b/vendor/github.com/google/go-tika/tika/doc.go index cae42735bb..f9a1af6cb4 100644 --- a/vendor/github.com/google/go-tika/tika/doc.go +++ b/vendor/github.com/google/go-tika/tika/doc.go @@ -21,11 +21,15 @@ Start with basic imports: import "github.com/google/go-tika/tika" You will need a running Server to make API calls to. So, if you don't -have a server that is already running and you don't have the Server +have a server that is already running, and you don't have the Server JAR already downloaded, you can download one. The caller is responsible for removing the file when no longer needed. - err := tika.DownloadServer(context.Background(), "1.16", "tika-server-1.16.jar") +Version is a custom type, and should be passed as such. There are constants in the code for these. +The following example downloads version 1.21 to the named JAR in the +current working directory. + + err := tika.DownloadServer(context.Background(), tika.Version121, "tika-server-1.21.jar") if err != nil { log.Fatal(err) } @@ -33,7 +37,7 @@ for removing the file when no longer needed. If you don't have a running Tika Server, you can start one. // Optionally pass a port as the second argument. - s, err := tika.NewServer("tika-server-1.16.jar", "") + s, err := tika.NewServer("tika-server-1.21.jar", "") if err != nil { log.Fatal(err) } diff --git a/vendor/github.com/google/go-tika/tika/server.go b/vendor/github.com/google/go-tika/tika/server.go index 37d33c7079..758d77ccb7 100644 --- a/vendor/github.com/google/go-tika/tika/server.go +++ b/vendor/github.com/google/go-tika/tika/server.go @@ -37,7 +37,6 @@ import ( // since you can pass its URL directly to a Client. // Additional Java system properties can be added to a Taka Server before // startup by adding to the JavaProps map - type Server struct { jar string url string // url is derived from port. @@ -91,6 +90,12 @@ func NewServer(jar, port string) (*Server, error) { if jar == "" { return nil, fmt.Errorf("no jar file specified") } + + // Check if the jar file exists. + if _, err := os.Stat(jar); os.IsNotExist(err) { + return nil, fmt.Errorf("jar file %q does not exist", jar) + } + if port == "" { port = "9998" } diff --git a/vendor/github.com/google/go-tika/tika/tika.go b/vendor/github.com/google/go-tika/tika/tika.go index 6877ea5f1e..e040508d34 100644 --- a/vendor/github.com/google/go-tika/tika/tika.go +++ b/vendor/github.com/google/go-tika/tika/tika.go @@ -24,8 +24,6 @@ import ( "net/http" "reflect" "strings" - - "golang.org/x/net/context/ctxhttp" ) // ClientError is returned by Client's various parse methods and @@ -97,7 +95,7 @@ type Detector struct { // Translator represents the Java package of a Tika Translator. type Translator string -// Translators available by defult in Tika. You must configure all required +// Translators available by default in Tika. You must configure all required // authentication details in Tika Server (for example, an API key). const ( Lingo24Translator Translator = "org.apache.tika.language.translate.Lingo24Translator" @@ -119,13 +117,13 @@ func (c *Client) call(ctx context.Context, input io.Reader, method, path string, c.httpClient = http.DefaultClient } - req, err := http.NewRequest(method, c.url+path, input) + req, err := http.NewRequestWithContext(ctx, method, c.url+path, input) if err != nil { return nil, err } req.Header = header - resp, err := ctxhttp.Do(ctx, c.httpClient, req) + resp, err := c.httpClient.Do(req) if err != nil { return nil, err } @@ -138,8 +136,8 @@ func (c *Client) call(ctx context.Context, input io.Reader, method, path string, // callString makes the given request to c and returns the result as a string // and error. callString returns an error if the response code is not 200 StatusOK. -func (c *Client) callString(ctx context.Context, input io.Reader, method, path string) (string, error) { - body, err := c.call(ctx, input, method, path, nil) +func (c *Client) callString(ctx context.Context, input io.Reader, method, path string, header http.Header) (string, error) { + body, err := c.call(ctx, input, method, path, header) if err != nil { return "", err } @@ -156,13 +154,27 @@ func (c *Client) callString(ctx context.Context, input io.Reader, method, path s // Parse parses the given input, returning the body of the input as a string and an error. // If the error is not nil, the body is undefined. func (c *Client) Parse(ctx context.Context, input io.Reader) (string, error) { - return c.callString(ctx, input, "PUT", "/tika") + return c.ParseWithHeader(ctx, input, nil) } // ParseReader parses the given input, returning the body of the input as a reader and an error. // If the error is nil, the returned reader must be closed, else, the reader is nil. func (c *Client) ParseReader(ctx context.Context, input io.Reader) (io.ReadCloser, error) { - return c.call(ctx, input, "PUT", "/tika", nil) + return c.ParseReaderWithHeader(ctx, input, nil) +} + +// ParseWithHeader parses the given input, returning the body of the input as a string and an error. +// If the error is not nil. the body is undefined. +// This function also accepts a header so the caller can specify things like `Accept` +func (c *Client) ParseWithHeader(ctx context.Context, input io.Reader, header http.Header) (string, error) { + return c.callString(ctx, input, "PUT", "/tika", header) +} + +// ParseReaderWithHeader parses the given input, returning the body of the input as a reader and an error. +// If the error is nil, the returned reader must be closed, else, the reader is nil. +// This function also accepts a header so the caller can specify things like `Accept` +func (c *Client) ParseReaderWithHeader(ctx context.Context, input io.Reader, header http.Header) (io.ReadCloser, error) { + return c.call(ctx, input, "PUT", "/tika", header) } // ParseRecursive parses the given input and all embedded documents, returning a @@ -186,26 +198,40 @@ func (c *Client) ParseRecursive(ctx context.Context, input io.Reader) ([]string, // Meta parses the metadata from the given input, returning the metadata and an // error. If the error is not nil, the metadata is undefined. func (c *Client) Meta(ctx context.Context, input io.Reader) (string, error) { - return c.callString(ctx, input, "PUT", "/meta") + return c.MetaWithHeader(ctx, input, nil) +} + +// MetaWithHeader parses the metadata from the given input, returning the metadata and an +// error. If the error is not nil, the metadata is undefined. +// This function also accepts a header so the caller can specify things like `Accept` +func (c *Client) MetaWithHeader(ctx context.Context, input io.Reader, header http.Header) (string, error) { + return c.callString(ctx, input, "PUT", "/meta", header) } // MetaField parses the metadata from the given input and returns the given // field. If the error is not nil, the result string is undefined. func (c *Client) MetaField(ctx context.Context, input io.Reader, field string) (string, error) { - return c.callString(ctx, input, "PUT", fmt.Sprintf("/meta/%v", field)) + return c.MetaFieldWithHeader(ctx, input, field, nil) +} + +// MetaFieldWithHeader parses the metadata from the given input and returns the given +// field. If the error is not nil, the result string is undefined. +// This function also accepts a header so the caller can specify things like `Accept` +func (c *Client) MetaFieldWithHeader(ctx context.Context, input io.Reader, field string, header http.Header) (string, error) { + return c.callString(ctx, input, "PUT", fmt.Sprintf("/meta/%v", field), header) } // Detect gets the mimetype of the given input, returning the mimetype and an // error. If the error is not nil, the mimetype is undefined. func (c *Client) Detect(ctx context.Context, input io.Reader) (string, error) { - return c.callString(ctx, input, "PUT", "/detect/stream") + return c.callString(ctx, input, "PUT", "/detect/stream", nil) } // Language detects the language of the given input, returning the two letter // language code and an error. If the error is not nil, the language is // undefined. func (c *Client) Language(ctx context.Context, input io.Reader) (string, error) { - return c.callString(ctx, input, "PUT", "/language/stream") + return c.callString(ctx, input, "PUT", "/language/stream", nil) } // LanguageString detects the language of the given string, returning the two letter @@ -213,7 +239,7 @@ func (c *Client) Language(ctx context.Context, input io.Reader) (string, error) // undefined. func (c *Client) LanguageString(ctx context.Context, input string) (string, error) { r := strings.NewReader(input) - return c.callString(ctx, r, "PUT", "/language/string") + return c.callString(ctx, r, "PUT", "/language/string", nil) } // MetaRecursive parses the given input and all embedded documents. The result @@ -272,7 +298,7 @@ func (c *Client) MetaRecursiveType(ctx context.Context, input io.Reader, content // Translate returns an error and the translated input from src language to // dst language using t. If the error is not nil, the translation is undefined. func (c *Client) Translate(ctx context.Context, input io.Reader, t Translator, src, dst string) (string, error) { - return c.callString(ctx, input, "POST", fmt.Sprintf("/translate/all/%s/%s/%s", t, src, dst)) + return c.callString(ctx, input, "POST", fmt.Sprintf("/translate/all/%s/%s/%s", t, src, dst), nil) } // TranslateReader translates the given input from src language to dst language using t. @@ -284,7 +310,7 @@ func (c *Client) TranslateReader(ctx context.Context, input io.Reader, t Transla // Version returns the default hello message from Tika server. func (c *Client) Version(ctx context.Context) (string, error) { - return c.callString(ctx, nil, "GET", "/version") + return c.callString(ctx, nil, "GET", "/version", nil) } var jsonHeader = http.Header{"Accept": []string{"application/json"}} diff --git a/vendor/modules.txt b/vendor/modules.txt index f6f2aeb6cc..8466d4b30a 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1024,7 +1024,7 @@ github.com/google/go-cmp/cmp/internal/value # github.com/google/go-querystring v1.1.0 ## explicit; go 1.10 github.com/google/go-querystring/query -# github.com/google/go-tika v0.2.0 +# github.com/google/go-tika v0.3.0 ## explicit; go 1.11 github.com/google/go-tika/tika # github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1