Skip to content

Commit

Permalink
Fix page meta retrieve
Browse files Browse the repository at this point in the history
  • Loading branch information
derfenix committed Nov 16, 2023
1 parent 3147a0b commit e27fdab
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 1 deletion.
23 changes: 22 additions & 1 deletion adapters/processors/processors.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,29 @@ func (p *Processors) GetMeta(ctx context.Context, url string) (entity.Meta, erro
return entity.Meta{}, fmt.Errorf("parse response body: %w", err)
}

var fc *html.Node
for fc = htmlNode.FirstChild; fc != nil && fc.Data != "html"; fc = fc.NextSibling {
}

if fc == nil {
return entity.Meta{}, fmt.Errorf("failed to find html tag")
}

fc = fc.NextSibling
if fc == nil {
return entity.Meta{}, fmt.Errorf("failed to find html tag")
}

for fc = fc.FirstChild; fc != nil && fc.Data != "head"; fc = fc.NextSibling {
fmt.Println(fc.Data)
}

if fc == nil {
return entity.Meta{}, fmt.Errorf("failed to find html tag")
}

meta := entity.Meta{}
getMetaData(htmlNode, &meta)
getMetaData(fc, &meta)
meta.Encoding = encodingFromHeader(response.Header)

return meta, nil
Expand Down
26 changes: 26 additions & 0 deletions adapters/processors/processors_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package processors

import (
"context"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/derfenix/webarchive/config"
)

func TestProcessors_GetMeta(t *testing.T) {
t.Parallel()

ctx := context.Background()
cfg, err := config.NewConfig(ctx)
require.NoError(t, err)

procs, err := NewProcessors(cfg)
require.NoError(t, err)

meta, err := procs.GetMeta(ctx, "https://habr.com/ru/companies/wirenboard/articles/722718/")
require.NoError(t, err)
assert.Equal(t, "Сколько стоит умный дом? Рассказываю, как строил свой и что получилось за 1000 руб./м² / Хабр", meta.Title)
}

0 comments on commit e27fdab

Please sign in to comment.