English > Support
Imdb People script issues
afrocuban:
And probably FINALLY this one is a winner (have to try to tweak Recipient yet):
--- Quote ---Function ParsePage_IMDBPeopleAWARDS(HTML: String): Cardinal;
Var
curPos, endPos, awardPos, categoryPos, recipientPos, yearPos, eventEndPos, namePos: Integer;
Event, Award, AwardName, Category, Recipient, Year: String;
Won: Boolean;
Begin
LogMessage('Function ParsePage_IMDBPeopleAWARDS BEGIN=====================||');
Result := prFinished;
// Locate the start of the first event section
curPos := Pos('<section class="ipc-page-section ipc-page-section--base">', HTML);
While curPos > 0 Do Begin
LogMessage('curPos after finding event section: ' + IntToStr(curPos));
// Extract event name
curPos := PosFrom('<span id="ev', HTML, curPos);
If curPos = 0 Then Begin
LogMessage('Event name not found');
Break;
End;
curPos := PosFrom('>', HTML, curPos) + 1;
endPos := PosFrom('</span>', HTML, curPos);
Event := Trim(Copy(HTML, curPos, endPos - curPos));
LogMessage('Parsed Event: ' + Event);
// Move cursor to start processing awards within the event
curPos := endPos;
eventEndPos := PosFrom('<section class="ipc-page-section ipc-page-section--base">', HTML, curPos);
If eventEndPos = 0 Then
eventEndPos := Length(HTML); // Set to the end of HTML if no more events
// Process awards within the event
While curPos < eventEndPos Do Begin
// Find next award div within the current event
awardPos := PosFrom('<li class="ipc-metadata-list-summary-item sc-15fc9ae6-1 gQbMPJ" data-testid="list-item">', HTML, curPos);
If (awardPos = 0) Or (awardPos >= eventEndPos) Then Begin
LogMessage('No more awards found in this event');
Break;
End;
LogMessage('curPos after finding award div: ' + IntToStr(awardPos));
// Extract entire award block
curPos := awardPos;
endPos := PosFrom('</li>', HTML, curPos);
If endPos = 0 Then Begin
LogMessage('No closing tag for award div found');
Break;
End;
Award := Copy(HTML, curPos, endPos - curPos);
curPos := endPos + Length('</li>');
LogMessage('Award Content Extracted Successfully: ' + Award);
// Extract year
yearPos := PosFrom('<a class="ipc-metadata-list-summary-item__t"', Award, 1);
If yearPos = 0 Then Begin
LogMessage('Year not found');
Continue;
End;
yearPos := PosFrom('>', Award, yearPos) + 1;
endPos := PosFrom(' ', Award, yearPos);
Year := Copy(Award, yearPos, endPos - yearPos);
Year := Trim(Year);
LogMessage('Parsed Year: ' + Year);
// Determine if the award was won
Won := PosFrom('Winner', Award, 1) > 0;
If Won Then
LogMessage('Parsed Won: True')
Else
LogMessage('Parsed Won: False');
// Extract award name
namePos := PosFrom('<span class="ipc-metadata-list-summary-item__tst">', Award, 1);
If namePos > 0 Then Begin
namePos := PosFrom('>', Award, namePos) + 1;
endPos := PosFrom('</span>', Award, namePos);
AwardName := Copy(Award, namePos, endPos - namePos);
LogMessage('Parsed Award Name: ' + AwardName);
End Else Begin
LogMessage('Award Name not found');
AwardName := '';
End;
// Extract category
categoryPos := PosFrom('<span class="ipc-metadata-list-summary-item__li awardCategoryName" aria-disabled="false">', Award, 1);
If categoryPos > 0 Then Begin
categoryPos := PosFrom('>', Award, categoryPos) + 1;
endPos := PosFrom('</span>', Award, categoryPos);
Category := Copy(Award, categoryPos, endPos - categoryPos);
LogMessage('Parsed Category: ' + Category);
End Else Begin
LogMessage('Category tag not found');
Category := '';
End;
// Extract recipient
recipientPos := PosFrom('<a class="ipc-metadata-list-summary-item__li ipc-metadata-list-summary-item__li--link"', Award, endPos + Length('</span>') + 1);
If recipientPos > 0 Then Begin
recipientPos := PosFrom('>', Award, recipientPos) + 1;
endPos := PosFrom('</a>', Award, recipientPos);
Recipient := Copy(Award, recipientPos, endPos - recipientPos);
LogMessage('Parsed Recipient: ' + Recipient);
End Else Begin
LogMessage('Recipient tag not found');
Recipient := '';
End;
// Add award to the database
AddAward(Event, AwardName, Category, Recipient, Year, Won);
If Won Then
LogMessage('AddAward executed successfully: Event=' + Event + ', Award=' + AwardName + ', Category=' + Category + ', Recipient=' + Recipient + ', Year=' + Year + ', Won=True')
Else
LogMessage('AddAward executed successfully: Event=' + Event + ', Award=' + AwardName + ', Category=' + Category + ', Recipient=' + Recipient + ', Year=' + Year + ', Won=False');
End;
// Move to the next event section
curPos := eventEndPos;
curPos := PosFrom('<section class="ipc-page-section ipc-page-section--base">', HTML, curPos);
End;
LogMessage('Function ParsePage_IMDBPeopleAWARDS END=====================||');
Result := prFinished;
End;
//BlockClose
--- End quote ---
Here's the beginning of the log (first event is "Ariel Awards, Mexico" and the first award in it is "Golden Ariel") and the end of the log ("BOFA" is the last award of the last event of this page - "Brazil Online Film Award"):
--- Quote ---(12/28/2024 8:35:07 PM) Function ParsePage_IMDBPeopleAWARDS BEGIN=====================||
(12/28/2024 8:35:07 PM) curPos after finding event section: 148924
(12/28/2024 8:35:07 PM) Parsed Event: Ariel Awards, Mexico
(12/28/2024 8:35:07 PM) curPos after finding award div: 149982
(12/28/2024 8:35:07 PM) Parsed Year: 2019
(12/28/2024 8:35:07 PM) Parsed Won: True
(12/28/2024 8:35:07 PM) Parsed Award Name: Golden Ariel
(12/28/2024 8:35:07 PM) Parsed Category: Best Picture (Mejor Película)
(12/28/2024 8:35:07 PM) Recipient tag not found
(12/28/2024 8:35:07 PM) AddAward executed successfully: Event=Ariel Awards, Mexico, Award= Golden Ariel, Category=Best Picture (Mejor Película), Recipient=, Year=2019, Won=True
============= intermediate logs here
(12/28/2024 8:35:10 PM) AddAward executed successfully: Event=Premios Eres, Award= Premio Eres, Category=Best Picture (Mejor Película), Recipient=, Year=1993, Won=False
(12/28/2024 8:35:10 PM) No more awards found in this event
(12/28/2024 8:35:10 PM) curPos after finding event section: 1574223
(12/28/2024 8:35:10 PM) Parsed Event: Brazil Online Film Award
(12/28/2024 8:35:10 PM) curPos after finding award div: 1575285
(12/28/2024 8:35:10 PM) Parsed Year: 2019
(12/28/2024 8:35:10 PM) Parsed Won: True
(12/28/2024 8:35:10 PM) Parsed Award Name: BOFA
(12/28/2024 8:35:10 PM) Parsed Category: Best Director
(12/28/2024 8:35:10 PM) Recipient tag not found
(12/28/2024 8:35:10 PM) AddAward executed successfully: Event=Brazil Online Film Award, Award= BOFA, Category=Best Director, Recipient=, Year=2019, Won=True
(12/28/2024 8:35:10 PM) No more awards found in this event
(12/28/2024 8:35:10 PM) Function ParsePage_IMDBPeopleAWARDS END=====================||
(12/28/2024 8:35:10 PM) After calling ParsePage_IMDBPeopleAWARDS
(12/28/2024 8:35:10 PM) Parsed awards page.
(12/28/2024 8:35:10 PM) Parsing awards page finished successfully.
(12/28/2024 8:35:10 PM) Provider data info retrieved Ok on 2024-12-28 20:35:10|
(12/28/2024 8:35:10 PM) Function ParsePage smNormal END======================|
(12/28/2024 8:35:10 PM) Person -> LoadStatic -> 0ms
(12/28/2024 8:35:10 PM) Person -> LoadMultivalues -> 0ms
(12/28/2024 8:35:10 PM) Person -> LoadFilms -> 0ms
(12/28/2024 8:35:10 PM) Person -> LoadAwards -> 15ms
(12/28/2024 8:35:10 PM) Person -> LoadImages -> 0ms
(12/28/2024 8:35:10 PM) Person -> LoadStatic -> 0ms
(12/28/2024 8:35:10 PM) Person -> LoadMultivalues -> 0ms
(12/28/2024 8:35:10 PM) Person -> LoadFilms -> 0ms
(12/28/2024 8:35:10 PM) Person -> LoadAwards -> 0ms
(12/28/2024 8:35:10 PM) Person -> LoadImages -> 16ms
--- End quote ---
Ivek23:
--- Quote --- // Extract entire award block
curPos := awardPos;
endPos := PosFrom('</li>', HTML, curPos);
If endPos = 0 Then Begin
LogMessage('No closing tag for award div found');
Break;
End;
Award := Copy(HTML, curPos, endPos - curPos);
curPos := endPos + Length('</li>');
LogMessage('Award Content Extracted Successfully: ' + Award);
--- End quote ---
Just change this part of the code above with this part of the code below and Recipient will work.
--- Quote --- // Extract entire award block
curPos := awardPos;
endPos := PosFrom('</div></div></li>', HTML, curPos);
If endPos = 0 Then Begin
LogMessage('No closing tag for award div found');
Break;
End;
Award := Copy(HTML, curPos, endPos - curPos);
curPos := endPos + Length('</div></div></li>');
LogMessage('Award Content Extracted Successfully: ' + Award);
--- End quote ---
afrocuban:
--- Quote from: Ivek23 on December 29, 2024, 09:46:28 am ---
--- Quote --- // Extract entire award block
curPos := awardPos;
endPos := PosFrom('</li>', HTML, curPos);
If endPos = 0 Then Begin
LogMessage('No closing tag for award div found');
Break;
End;
Award := Copy(HTML, curPos, endPos - curPos);
curPos := endPos + Length('</li>');
LogMessage('Award Content Extracted Successfully: ' + Award);
--- End quote ---
Just change this part of the code above with this part of the code below and Recipient will work.
--- Quote --- // Extract entire award block
curPos := awardPos;
endPos := PosFrom('<><></li>', HTML, curPos);
If endPos = 0 Then Begin
LogMessage('No closing tag for award div found');
Break;
End;
Award := Copy(HTML, curPos, endPos - curPos);
curPos := endPos + Length('<><></li>');
LogMessage('Award Content Extracted Successfully: ' + Award);
--- End quote ---
--- End quote ---
Is that for Recipient? Because everything else works except Recipient.
afrocuban:
Ohhhhh, I seee now!!!! Award extracted didn't contain Recipient!!! Thank you I will try it later!
afrocuban:
I can now confirm that parsing awards works completely.
What doesn't work is populating to database, at least for me. No award or event is populated, although everything is properly parsed. Here's the log for the person and page given above.
What that can be???
Navigation
[0] Message Index
[#] Next page
[*] Previous page
Go to full version