English > Support

Imdb People script issues

<< < (5/9) > >>

afrocuban:
And probably FINALLY this one is a winner (have to try to tweak Recipient yet):






--- Quote ---Function ParsePage_IMDBPeopleAWARDS(HTML: String): Cardinal;
Var
  curPos, endPos, awardPos, categoryPos, recipientPos, yearPos, eventEndPos, namePos: Integer;
  Event, Award, AwardName, Category, Recipient, Year: String;
  Won: Boolean;
Begin
  LogMessage('Function ParsePage_IMDBPeopleAWARDS BEGIN=====================||');
  Result := prFinished;


  // Locate the start of the first event section
  curPos := Pos('<section class="ipc-page-section ipc-page-section--base">', HTML);
  While curPos > 0 Do Begin
    LogMessage('curPos after finding event section: ' + IntToStr(curPos));


    // Extract event name
    curPos := PosFrom('<span id="ev', HTML, curPos);
    If curPos = 0 Then Begin
      LogMessage('Event name not found');
      Break;
    End;
    curPos := PosFrom('>', HTML, curPos) + 1;
    endPos := PosFrom('</span>', HTML, curPos);
    Event := Trim(Copy(HTML, curPos, endPos - curPos));
    LogMessage('Parsed Event: ' + Event);


    // Move cursor to start processing awards within the event
    curPos := endPos;
    eventEndPos := PosFrom('<section class="ipc-page-section ipc-page-section--base">', HTML, curPos);
    If eventEndPos = 0 Then
      eventEndPos := Length(HTML);  // Set to the end of HTML if no more events


    // Process awards within the event
    While curPos < eventEndPos Do Begin
      // Find next award div within the current event
      awardPos := PosFrom('<li class="ipc-metadata-list-summary-item sc-15fc9ae6-1 gQbMPJ" data-testid="list-item">', HTML, curPos);
      If (awardPos = 0) Or (awardPos >= eventEndPos) Then Begin
        LogMessage('No more awards found in this event');
        Break;
      End;
      LogMessage('curPos after finding award div: ' + IntToStr(awardPos));


      // Extract entire award block
      curPos := awardPos;
      endPos := PosFrom('</li>', HTML, curPos);
      If endPos = 0 Then Begin
        LogMessage('No closing tag for award div found');
        Break;
      End;


      Award := Copy(HTML, curPos, endPos - curPos);
      curPos := endPos + Length('</li>');
      LogMessage('Award Content Extracted Successfully: ' + Award);


      // Extract year
      yearPos := PosFrom('<a class="ipc-metadata-list-summary-item__t"', Award, 1);
      If yearPos = 0 Then Begin
        LogMessage('Year not found');
        Continue;
      End;
      yearPos := PosFrom('>', Award, yearPos) + 1;
      endPos := PosFrom(' ', Award, yearPos);
      Year := Copy(Award, yearPos, endPos - yearPos);
      Year := Trim(Year);
      LogMessage('Parsed Year: ' + Year);


      // Determine if the award was won
      Won := PosFrom('Winner', Award, 1) > 0;
      If Won Then
        LogMessage('Parsed Won: True')
      Else
        LogMessage('Parsed Won: False');


      // Extract award name
      namePos := PosFrom('<span class="ipc-metadata-list-summary-item__tst">', Award, 1);
      If namePos > 0 Then Begin
        namePos := PosFrom('>', Award, namePos) + 1;
        endPos := PosFrom('</span>', Award, namePos);
        AwardName := Copy(Award, namePos, endPos - namePos);
        LogMessage('Parsed Award Name: ' + AwardName);
      End Else Begin
        LogMessage('Award Name not found');
        AwardName := '';
      End;


      // Extract category
      categoryPos := PosFrom('<span class="ipc-metadata-list-summary-item__li awardCategoryName" aria-disabled="false">', Award, 1);
      If categoryPos > 0 Then Begin
        categoryPos := PosFrom('>', Award, categoryPos) + 1;
        endPos := PosFrom('</span>', Award, categoryPos);
        Category := Copy(Award, categoryPos, endPos - categoryPos);
        LogMessage('Parsed Category: ' + Category);
      End Else Begin
        LogMessage('Category tag not found');
        Category := '';
      End;


      // Extract recipient
      recipientPos := PosFrom('<a class="ipc-metadata-list-summary-item__li ipc-metadata-list-summary-item__li--link"', Award, endPos + Length('</span>') + 1);
      If recipientPos > 0 Then Begin
        recipientPos := PosFrom('>', Award, recipientPos) + 1;
        endPos := PosFrom('</a>', Award, recipientPos);
        Recipient := Copy(Award, recipientPos, endPos - recipientPos);
        LogMessage('Parsed Recipient: ' + Recipient);
      End Else Begin
        LogMessage('Recipient tag not found');
        Recipient := '';
      End;


      // Add award to the database
      AddAward(Event, AwardName, Category, Recipient, Year, Won);
      If Won Then
        LogMessage('AddAward executed successfully: Event=' + Event + ', Award=' + AwardName + ', Category=' + Category + ', Recipient=' + Recipient + ', Year=' + Year + ', Won=True')
      Else
        LogMessage('AddAward executed successfully: Event=' + Event + ', Award=' + AwardName + ', Category=' + Category + ', Recipient=' + Recipient + ', Year=' + Year + ', Won=False');
    End;


    // Move to the next event section
    curPos := eventEndPos;
    curPos := PosFrom('<section class="ipc-page-section ipc-page-section--base">', HTML, curPos);
  End;


  LogMessage('Function ParsePage_IMDBPeopleAWARDS END=====================||');
  Result := prFinished;
End;
//BlockClose
--- End quote ---


Here's the beginning of the log (first event is "Ariel Awards, Mexico" and the first award in it is "Golden Ariel") and the end of the log ("BOFA" is the last award of the last event of this page - "Brazil Online Film Award"):



--- Quote ---(12/28/2024 8:35:07 PM) Function ParsePage_IMDBPeopleAWARDS BEGIN=====================||
(12/28/2024 8:35:07 PM) curPos after finding event section: 148924
(12/28/2024 8:35:07 PM) Parsed Event: Ariel Awards, Mexico
(12/28/2024 8:35:07 PM) curPos after finding award div: 149982
(12/28/2024 8:35:07 PM) Parsed Year: 2019
(12/28/2024 8:35:07 PM) Parsed Won: True
(12/28/2024 8:35:07 PM) Parsed Award Name:  Golden Ariel
(12/28/2024 8:35:07 PM) Parsed Category: Best Picture (Mejor Película)
(12/28/2024 8:35:07 PM) Recipient tag not found
(12/28/2024 8:35:07 PM) AddAward executed successfully: Event=Ariel Awards, Mexico, Award= Golden Ariel, Category=Best Picture (Mejor Película), Recipient=, Year=2019, Won=True
============= intermediate logs here
(12/28/2024 8:35:10 PM) AddAward executed successfully: Event=Premios Eres, Award= Premio Eres, Category=Best Picture (Mejor Película), Recipient=, Year=1993, Won=False
(12/28/2024 8:35:10 PM) No more awards found in this event
(12/28/2024 8:35:10 PM) curPos after finding event section: 1574223
(12/28/2024 8:35:10 PM) Parsed Event: Brazil Online Film Award
(12/28/2024 8:35:10 PM) curPos after finding award div: 1575285
(12/28/2024 8:35:10 PM) Parsed Year: 2019
(12/28/2024 8:35:10 PM) Parsed Won: True
(12/28/2024 8:35:10 PM) Parsed Award Name:  BOFA
(12/28/2024 8:35:10 PM) Parsed Category: Best Director
(12/28/2024 8:35:10 PM) Recipient tag not found
(12/28/2024 8:35:10 PM) AddAward executed successfully: Event=Brazil Online Film Award, Award= BOFA, Category=Best Director, Recipient=, Year=2019, Won=True
(12/28/2024 8:35:10 PM) No more awards found in this event
(12/28/2024 8:35:10 PM) Function ParsePage_IMDBPeopleAWARDS END=====================||
(12/28/2024 8:35:10 PM) After calling ParsePage_IMDBPeopleAWARDS
(12/28/2024 8:35:10 PM) Parsed awards page.
(12/28/2024 8:35:10 PM) Parsing awards page finished successfully.
(12/28/2024 8:35:10 PM)     Provider data info retrieved Ok on 2024-12-28 20:35:10|
(12/28/2024 8:35:10 PM) Function ParsePage smNormal END======================|
(12/28/2024 8:35:10 PM) Person -> LoadStatic -> 0ms
(12/28/2024 8:35:10 PM) Person -> LoadMultivalues -> 0ms
(12/28/2024 8:35:10 PM) Person -> LoadFilms -> 0ms
(12/28/2024 8:35:10 PM) Person -> LoadAwards -> 15ms
(12/28/2024 8:35:10 PM) Person -> LoadImages -> 0ms
(12/28/2024 8:35:10 PM) Person -> LoadStatic -> 0ms
(12/28/2024 8:35:10 PM) Person -> LoadMultivalues -> 0ms
(12/28/2024 8:35:10 PM) Person -> LoadFilms -> 0ms
(12/28/2024 8:35:10 PM) Person -> LoadAwards -> 0ms
(12/28/2024 8:35:10 PM) Person -> LoadImages -> 16ms
--- End quote ---

Ivek23:

--- Quote ---      // Extract entire award block
      curPos := awardPos;
      endPos := PosFrom('</li>', HTML, curPos);
      If endPos = 0 Then Begin
        LogMessage('No closing tag for award div found');
        Break;
      End;


      Award := Copy(HTML, curPos, endPos - curPos);
      curPos := endPos + Length('</li>');
      LogMessage('Award Content Extracted Successfully: ' + Award);
--- End quote ---

Just change this part of the code above with this part of the code below and Recipient will work.


--- Quote ---      // Extract entire award block
      curPos := awardPos;
      endPos := PosFrom('</div></div></li>', HTML, curPos);
      If endPos = 0 Then Begin
        LogMessage('No closing tag for award div found');
        Break;
      End;


      Award := Copy(HTML, curPos, endPos - curPos);
      curPos := endPos + Length('</div></div></li>');
      LogMessage('Award Content Extracted Successfully: ' + Award);
--- End quote ---

afrocuban:

--- Quote from: Ivek23 on December 29, 2024, 09:46:28 am ---
--- Quote ---      // Extract entire award block
      curPos := awardPos;
      endPos := PosFrom('</li>', HTML, curPos);
      If endPos = 0 Then Begin
        LogMessage('No closing tag for award div found');
        Break;
      End;


      Award := Copy(HTML, curPos, endPos - curPos);
      curPos := endPos + Length('</li>');
      LogMessage('Award Content Extracted Successfully: ' + Award);
--- End quote ---


Just change this part of the code above with this part of the code below and Recipient will work.


--- Quote ---      // Extract entire award block
      curPos := awardPos;
      endPos := PosFrom('<><></li>', HTML, curPos);
      If endPos = 0 Then Begin
        LogMessage('No closing tag for award div found');
        Break;
      End;


      Award := Copy(HTML, curPos, endPos - curPos);
      curPos := endPos + Length('<><></li>');
      LogMessage('Award Content Extracted Successfully: ' + Award);
--- End quote ---

--- End quote ---


Is that for Recipient? Because everything else works except Recipient.

afrocuban:
Ohhhhh, I seee now!!!! Award extracted didn't contain Recipient!!! Thank you I will try it later!

afrocuban:
I can now confirm that parsing awards works completely.

What doesn't work is populating to database, at least for me. No award or event is populated, although everything is properly parsed. Here's the log for the person and page given above.

What that can be???

Navigation

[0] Message Index

[#] Next page

[*] Previous page

Go to full version