Skip to content

Commit

Permalink
main functionality of backend done
Browse files Browse the repository at this point in the history
  • Loading branch information
smeggmann99 committed Oct 3, 2024
1 parent 1223ba2 commit e56c548
Show file tree
Hide file tree
Showing 3 changed files with 131 additions and 19 deletions.
14 changes: 14 additions & 0 deletions app/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,18 @@ func main() {
}

fmt.Println(division.Schedule.String())

teacher, err := scraper.ScrapeTeacher(1)
if err != nil {
panic(err)
}

fmt.Println(teacher)

room, err := scraper.ScrapeRoom(1)
if err != nil {
panic(err)
}

fmt.Println(room)
}
2 changes: 0 additions & 2 deletions core/scraper/models.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,7 @@ type Teacher struct {

type Room struct {
Designator string `json:"designator"`
FullName string `json:"full_name"`
Schedule Schedule `json:"schedule"`
BuildingName string `json:"building_name"`
}

type Lesson struct {
Expand Down
134 changes: 117 additions & 17 deletions core/scraper/scraper.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,18 @@ func splitDivisionTitle(s string) (string, string) {
return strings.TrimSpace(parts[0]), strings.TrimSpace(parts[1])
}

func splitTeacherTitle(s string) (string, string) {
parts := strings.Split(s, " ")
if len(parts) < 2 {
return "", ""
}

rawDesignator := strings.TrimSpace(parts[1])
rawDesignator = strings.Trim(rawDesignator, "()")

return strings.TrimSpace(parts[0]), rawDesignator
}

func parseTimeRange(s string) (TimeRange, error) {
s = strings.ReplaceAll(s, " ", "")
parts := strings.Split(s, "-")
Expand Down Expand Up @@ -86,7 +98,7 @@ func parseLessons(rowElement *goquery.Selection, timeRange TimeRange) ([]Lesson,
return lessons, nil
}

func scrapeTitle(doc *goquery.Document) (string, string, error) {
func scrapeDivisionTitle(doc *goquery.Document) (string, string, error) {
titleSelection := doc.Find("body > table > tbody > tr > td").First()
if titleSelection.Length() == 0 {
return "", "", fmt.Errorf("no division title found")
Expand All @@ -98,6 +110,28 @@ func scrapeTitle(doc *goquery.Document) (string, string, error) {
return designator, fullName, nil
}

func scrapeTeacherTitle(doc *goquery.Document) (string, string, error) {
titleSelection := doc.Find("body > table > tbody > tr > td").First()
if titleSelection.Length() == 0 {
return "", "", fmt.Errorf("no division title found")
}

title := titleSelection.Text()
designator, fullName := splitTeacherTitle(title)

return designator, fullName, nil
}

func scrapeRoomTitle(doc *goquery.Document) (string, error) {
titleSelection := doc.Find("body > table > tbody > tr > td").First()
if titleSelection.Length() == 0 {
return "", fmt.Errorf("no room title found")
}

title := titleSelection.Text()
return title, nil
}

func scrapeSchedule(doc *goquery.Document) (Schedule, error) {
var schedule Schedule
var timeRange TimeRange
Expand Down Expand Up @@ -167,22 +201,10 @@ func scrapeSchedule(doc *goquery.Document) (Schedule, error) {
}

func ScrapeDivision(index uint) (*Division, error) {
url := Config.OptivumBaseUrl + fmt.Sprintf(Config.DivisionEndpoint, index)
fmt.Printf("scraping division from URL: %s\n", url)

res, err := http.Get(url)
endpoint := fmt.Sprintf(Config.DivisionEndpoint, index)
doc, err := OpenDoc(endpoint)
if err != nil {
return nil, fmt.Errorf("error fetching URL: %w", err)
}
defer res.Body.Close()

if res.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status code: %d", res.StatusCode)
}

doc, err := goquery.NewDocumentFromReader(res.Body)
if err != nil {
return nil, fmt.Errorf("error loading HTML: %w", err)
return nil, fmt.Errorf("error opening document: %w", err)
}

division := Division{
Expand All @@ -192,7 +214,7 @@ func ScrapeDivision(index uint) (*Division, error) {
Schedule: Schedule{},
}

designator, fullName, err := scrapeTitle(doc)
designator, fullName, err := scrapeDivisionTitle(doc)
if err != nil {
return nil, fmt.Errorf("error scraping division title: %w", err)
}
Expand All @@ -208,6 +230,84 @@ func ScrapeDivision(index uint) (*Division, error) {
return &division, nil
}

func ScrapeTeacher(index uint) (*Teacher, error) {
endpoint := fmt.Sprintf(Config.TeacherEndpoint, index)
doc, err := OpenDoc(endpoint)
if err != nil {
return nil, fmt.Errorf("error opening document: %w", err)
}

teacher := Teacher{
Designator: "",
FullName: "",
Schedule: Schedule{},
}

designator, fullName, err := scrapeTeacherTitle(doc)
if err != nil {
return nil, fmt.Errorf("error scraping division title: %w", err)
}
teacher.Designator = designator
teacher.FullName = fullName

schedule, err := scrapeSchedule(doc)
if err != nil {
return nil, fmt.Errorf("error scraping division schedule: %w", err)
}
teacher.Schedule = schedule

return &teacher, nil
}

func ScrapeRoom(index uint) (*Room, error) {
endpoint := fmt.Sprintf(Config.RoomEndpoint, index)
doc, err := OpenDoc(endpoint)
if err != nil {
return nil, fmt.Errorf("error opening document: %w", err)
}

room := Room{
Designator: "",
Schedule: Schedule{},
}

designator, err := scrapeRoomTitle(doc)
if err != nil {
return nil, fmt.Errorf("error scraping division title: %w", err)
}
room.Designator = designator

schedule, err := scrapeSchedule(doc)
if err != nil {
return nil, fmt.Errorf("error scraping division schedule: %w", err)
}
room.Schedule = schedule

return &room, nil
}

func OpenDoc(endpoint string) (*goquery.Document, error) {
url := Config.OptivumBaseUrl + endpoint
fmt.Printf("scraping teacher from URL: %s\n", url)

res, err := http.Get(url)
if err != nil {
return nil, fmt.Errorf("error fetching URL: %w", err)
}
defer res.Body.Close()

if res.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status code: %d", res.StatusCode)
}

doc, err := goquery.NewDocumentFromReader(res.Body)
if err != nil {
return nil, fmt.Errorf("error loading HTML: %w", err)
}

return doc, nil
}

func Initialize() error {
Config = config.Global.Scraper
GeneralConfig = config.Global.General
Expand Down

0 comments on commit e56c548

Please sign in to comment.