English > Support

Imdb People script issues

<< < (2/9) > >>

afrocuban:
Thanks. Are you saying I should try to comment out downloading photo to check if that is the culprit?

Ivek23:

--- Quote from: afrocuban on December 08, 2024, 09:09:23 pm ---Thanks. Are you saying I should try to comment out downloading photo to check if that is the culprit?
--- End quote ---

Yes, I hope that the culprit of the problem is only this code for downloading the photo of people, and not something else, because I haven't tested it myself.

Ivek23:

--- Quote from: Ivek23 on December 09, 2024, 08:17:17 am ---
--- Quote from: afrocuban on December 08, 2024, 09:09:23 pm ---Thanks. Are you saying I should try to comment out downloading photo to check if that is the culprit?
--- End quote ---

Yes, I hope that the culprit of the problem is only this code for downloading the photo of people, and not something else, because I haven't tested it myself.
--- End quote ---

The code for Function ParsePage_IMDBPeopleBIO is to blame, so that PVD freezes.


--- Code: ---Function ParsePage_IMDBPeopleBIO(HTML:String):Cardinal; //BlockOpen
    //Returns:
    //     Result:=prFinished; Script has finished gathering data
    //     Result:=prError; If żany big problem? with exit;
    //Retrieve: ~bio~ Biography from "Mini Bio" IMDB section
// (* *)
  Var
    curPos,endPos,debug_pos1:Integer;
    ItemValue:String;
    PersonID,ItemValue0,ItemValue10,ItemValue1,ItemValue11:String;
ItemList,ItemList00,ItemList0,ItemList1,ItemList11,ItemList12:String;
  Begin
    LogMessage('Function ParsePage_IMDBPeopleBIO BEGIN=====================||');       
    Result:=prFinished;  //It will change to prError if any big problem with exit;
(*
    //Get "Biography" info
    curPos:=Pos('<h1 class="ipc-title__text">Biography</h1>',HTML);      //Strings start which opens the block content data. WEB_SPECIFIC
    if (curPos=0) then Exit;
ItemList0:=TextBetWeenFirst(HTML,'<h1 class="ipc-title__text','<h3 class="ipc-title__text"><span>Contribute to this page</span></h3>');
//LogMessage('  ** Parse Biography '+#13+ItemList0+' **');

If (Length(ItemList0)>0) Then Begin
ItemValue1:=TextBetWeenFirst(ItemList0,'<span class="ipc-metadata-list-item__label" aria-disabled="false">Birth name</span>','</div>');
if BIRTH_NAME_IN_TRANSNAME then
if ItemValue1 <> '' then AddFieldValueXML('transname',ItemValue1);
If ItemValue1 <> '' then LogMessage('      Get result from Birth Name01:'+ItemValue1+'||');
End;

ItemList:='';
ItemList11:='';
//Get PersonID
PersonID:=TextBetWeenFirst(HTML,'<meta property="imdb:pageConst" content="','"/>');   //WEB_SPECIFIC.
    if (2<Length(PersonID)) then begin
ItemList:='<link url="http://www.imdb.com/name/'+PersonID+'/bio/#overview">Biography Info</link>';
        LogMessage('      Get result PersonID:'+PersonID+'||');
    end;
    //Get "Mini bio" Biography text
If Pos('<h1 class="ipc-title__text">Biography</h1>',HTML)>0 Then Begin
curPos:=Pos('<h3 class="ipc-title__text"><span id="mini_bio">Mini Bio</span>',HTML);       //WEB_SPECIFIC.
If 0<curPos Then Begin
curPos:=PosFrom('<li role="presentation" class="ipc-metadata-list__item" id="mini_bio_0" data-testid="list-item"><div class="ipc-metadata-list-item__content-container"><ul class="ipc-inline-list ipc-inline-list--show-dividers ipc-inline-list--inline ipc-metadata-list-item__list-content base" role="presentation"><div class="ipc-html-content ipc-html-content--base ipc-metadata-list-item-html-item" role="presentation"><div class="ipc-html-content-inner-div">',HTML,EndPos)+Length('<li role="presentation" class="ipc-metadata-list__item" id="mini_bio_0" data-testid="list-item"><div class="ipc-metadata-list-item__content-container"><ul class="ipc-inline-list ipc-inline-list--show-dividers ipc-inline-list--inline ipc-metadata-list-item__list-content base" role="presentation"><div class="ipc-html-content ipc-html-content--base ipc-metadata-list-item-html-item" role="presentation"><div class="ipc-html-content-inner-div">');
EndPos:=PosFrom('</div>',HTML,curPos);
//ItemValue:=Copy(HTML,curPos,endPos-curPos);
ItemValue:=Trim(Copy(HTML,curPos,endPos-curPos)); //ItemValue:=Copy(HTML,curPos+425,endPos-curPos-425);
//LogMessage('      Get result bio (from Mini bio)1:'+ItemValue+'||');
ItemValue:=StringReplace(ItemValue,#10,#160,True,False,True);
//LogMessage('      Get result bio (from Mini bio)2:'+ItemValue+'||');
ItemValue:=StringReplace(ItemValue,'<a class="ipc-md-link ipc-md-link--entity" href="','<link url="http://www.imdb.com' ,True,False,True);
ItemValue:=StringReplace(ItemValue,'/?ref_=nmbio_mbio">',+'/">',True,False,True);
ItemValue:=StringReplace(ItemValue,'</a>','</link>',True,False,True);
//LogMessage('      Get result bio (from Mini bio)0:'+ItemValue+'||');
//curPos:=Pos('###',ItemValue);
//If 0<curPos then ItemValue:=Copy(ItemValue,0,curPos-2);
//curPos:=Pos('</p>',ItemValue);      //WEB_SPECIFIC. Chr(13)
//If 0<curPos then Delete(ItemValue,curPos,Length(ItemValue)-curPos);
//if Pos('~  ', ItemValue) = 0 then Delete(ItemValue,1,2);
If BIO_URL_IN_BIO then ItemValue:=RemoveTags(ItemValue, False);
LogMessage('      Get result bio (from Mini bio):'+ItemValue+'||');
If ItemValue <> '' then ItemList11:=ItemList11+ItemValue;
//if ItemValue <> '' then AddFieldValueXML('bio',ItemValue);
End;
End;

//If (ItemList11 = '') AND (ItemList <> '') Then
ItemList12:=ItemList;
If (ItemList11 <> '') AND (ItemList <> '') Then 
ItemList12:=ItemList11;

//ItemList12:=ItemList11+#13+'--------------------------------------------------------------------------'+#13+ItemList+#32#32#32+'<link url="http://www.imdb.com/name/'+PersonID+'/bio/#mini_bio">Mini bio Biography</link>';

///If BIO_INFO_IN_BIO then AddFieldValueXML('bio',ItemList12);

  ///If Not(BIO_INFO_IN_BIO) Then AddFieldValueXML('bio',ItemList11);

//Get "Birth name" Biography text
ItemList00:='';
ItemList00:=TextBetWeenFirst(HTML,'<h1 class="ipc-title__text','<h3 class="ipc-title__text"><span>Contribute to this page</span></h3>'); 
//LogMessage('  *** Parse Biography '+#13+ItemList00+' ***');
If (Length(ItemList00)>0) Then Begin
ItemValue0:=TextBetWeenFirst(ItemList00,'<span class="ipc-metadata-list-item__label" aria-disabled="false">Birth name</span>','</div></div></div>');
if BIRTH_NAME_IN_TRANSNAME then
//if ItemValue0 <> '' then AddFieldValueXML('transname',ItemValue0);
If ItemValue <> '' then LogMessage('      Get result from Birth Name02:'+ItemValue0+'||');
If ItemValue0 <> '' then ItemValue0:='BirthName:  '+ItemValue0;
If ItemValue0 <> '' then ItemList12:=ItemList12+#13+'--------------------------------------------------------------------------'+#13+ItemValue0;
End;

If BIO_INFO_IN_BIO then AddFieldValueXML('bio',ItemList12);

  If Not(BIO_INFO_IN_BIO) Then AddFieldValueXML('bio',ItemList11);
         
*)
    LogMessage('Function ParsePage_IMDBPeopleBIO END=====================||');
  End; //BlockClose
--- End code ---



Below is the added code and IMDB_People_[EN][HTTPS] (2) script, where this function is blocked. The script needs massive changes due to major changes in the source code of the website.

afrocuban:
As I said somewhere else, i fixed bio and genre fields, and now I'm dealing with integrating selenium into PVD for downloading dymanic HTML content.
Reference from this point forward
Now, I have passed the phase to parse the Awards page manually downloaded with selenium, and I'm having hard time with it. I have fixed the code to parse the page, and it successfuly parse it as you can see here:

--- Quote ---(12/21/2024 10:24:56 PM) Parsed Event: Ariel Awards, Mexico
(12/21/2024 10:24:56 PM) Parsed Award:  Golden Ariel
(12/21/2024 10:24:56 PM) Parsed Category: Best Picture (Mejor Película)
(12/21/2024 10:24:56 PM) Parsed Recipient: Roma
(12/21/2024 10:24:56 PM) Parsed Year: 2019
(12/21/2024 10:24:56 PM) Parsed Won: True
(12/21/2024 10:24:56 PM) Before calling AddAward with parameters:
(12/21/2024 10:24:56 PM) Event: Ariel Awards, Mexico
(12/21/2024 10:24:56 PM) Award:  Golden Ariel
(12/21/2024 10:24:56 PM) Category: Best Picture (Mejor Película)
(12/21/2024 10:24:56 PM) Recipient: Roma
(12/21/2024 10:24:56 PM) Year: 2019
(12/21/2024 10:24:56 PM) Won: True
(12/21/2024 10:24:56 PM) AddAward executed successfully.
(12/21/2024 10:24:56 PM) IMDb People Awards added Event=Ariel Awards, Mexico, Award= Golden Ariel, Category=Best Picture (Mejor Película), Recipient=Roma, Year=2019, Won=True
(12/21/2024 10:24:56 PM) Added Award to Database: Event=Ariel Awards, Mexico, Award= Golden Ariel, Category=Best Picture (Mejor Película), Recipient=Roma, Year=2019, Won: True
--- End quote ---


But for some reason the value is not populated/displayed in PVD. So I thought I'll create custom memo field "IMDb People Awards" to populate there the value to check what it looks like, only to realize no custom field is visible in PVD's People section???
Is it possible at all to add custom fields in People section?
I manually put the value in the field (added to my dark people skin), but when I exit edit mode, it's not displayed. When I enter edit mode, it's there. If I restart PVD value dissappears from the custom field.


Anyway, does  anyone know looking at the log, why this properly parsed award wouldn't populate to field although reported that it did?

Here's whole function (I even added some extra logging around adding value to the field in order to see what is going on, but to no avail - everything looks perfect yet value is not there):


--- Quote ---Function CustomBoolToStr(Value: Boolean): String;
Begin
  If Value Then
    Result := 'True'
  Else
    Result := 'False';
End;


Function ParsePage_IMDBPeopleAWARDS(HTML: String): Cardinal;
Var
  curPos, endPos: Integer;
  ItemList, Event, Award, Category, Recipient, Year: String;
  AValue: String; // Declaring AValue as a String
  Won: Boolean;
  FailSafe: Integer;  // To prevent infinite loops


Begin
  LogMessage('Function ParsePage_IMDBPeopleAWARDS BEGIN=====================||');


  try
    Result := prFinished;


    // Log the initial HTML snippet being parsed
    LogMessage('Initial HTML snippet: ' + Copy(HTML, 1, 500));


    // Find the position of the Awards title
    curPos := Pos('<h1 class="ipc-title__text">Awards</h1>', HTML);
    If curPos > 0 Then Begin
      // Find the position of the Awards section
      curPos := PosFrom('<section class="ipc-page-section ipc-page-section--base">', HTML, curPos);
    End;


    If curPos > 0 Then Begin
      // Find the end position of the Awards section
      endPos := PosFrom('</section>', HTML, curPos);
      If endPos = 0 Then endPos := Length(HTML);


      If (curPos > 0) AND (endPos > curPos) Then Begin
        // Extract the Awards block
        ItemList := Copy(HTML, curPos, endPos - curPos);


        // Extract and log the event name
        curPos := PosFrom('<h3 class="ipc-title__text">', ItemList, 1);
        If curPos > 0 Then Begin
          curPos := PosFrom('>', ItemList, curPos) + 1;
          endPos := PosFrom('</span>', ItemList, curPos);
          Event := Copy(ItemList, curPos, endPos - curPos);
          Event := Trim(Event);


          // Remove the <span> tag
          Event := Copy(Event, Pos('>', Event) + 1, Length(Event));
          LogMessage('Parsed Event: ' + Event);
        End Else LogMessage('Error: Event title div not found.');


        // Parse each award item manually
        curPos := PosFrom('<li class="ipc-metadata-list-summary-item sc-15fc9ae6-1 gQbMPJ" data-testid="list-item"', ItemList, 1);
        FailSafe := 0;  // Initialize fail-safe counter


        While (curPos > 0) And (FailSafe < 10) Do Begin
          // Extract and log the award name
          curPos := PosFrom('<span class="ipc-metadata-list-summary-item__tst">', ItemList, curPos);
          If curPos > 0 Then Begin
            curPos := PosFrom('>', ItemList, curPos) + 1;
            endPos := PosFrom('</span>', ItemList, curPos);
            Award := Copy(ItemList, curPos, endPos - curPos);
            LogMessage('Parsed Award: ' + Award);


            // Extract and log the category name
            curPos := PosFrom('<span class="ipc-metadata-list-summary-item__li awardCategoryName"', ItemList, curPos);
            If curPos > 0 Then Begin
              curPos := PosFrom('>', ItemList, curPos) + 1;
              endPos := PosFrom('</span>', ItemList, curPos);
              Category := Copy(ItemList, curPos, endPos - curPos);
              LogMessage('Parsed Category: ' + Category);


              // Extract and log the recipient name
              curPos := PosFrom('<a class="ipc-metadata-list-summary-item__li ipc-metadata-list-summary-item__li--link"', ItemList, curPos);
              If curPos > 0 Then Begin
                curPos := PosFrom('>', ItemList, curPos) + 1;
                endPos := PosFrom('</a>', ItemList, curPos);
                Recipient := Copy(ItemList, curPos, endPos - curPos);
                LogMessage('Parsed Recipient: ' + Recipient);


                // Extract and log the year
                curPos := PosFrom('<a class="ipc-metadata-list-summary-item__t"', ItemList, curPos);
                If curPos > 0 Then Begin
                  curPos := PosFrom('>', ItemList, curPos) + 1;
                  endPos := PosFrom(' ', ItemList, curPos);  // Find the space after the year
                  Year := Copy(ItemList, curPos, endPos - curPos);
                  Year := Trim(Year);
                  LogMessage('Parsed Year: ' + Year);
                End Else LogMessage('Error: Year not found.');


                // Determine if the award was won
                Won := Pos('Winner', ItemList) > 0;
                If Won Then
                  LogMessage('Parsed Won: True')
                Else
                  LogMessage('Parsed Won: False');


                // Construct the AValue string
                AValue := 'Event=' + Event + ', Award=' + Award + ', Category=' + Category + ', Recipient=' + Recipient + ', Year=' + Year + ', Won=' + CustomBoolToStr(Won);


                // Log the parameters before calling AddAward
                LogMessage('Before calling AddAward with parameters:');
                LogMessage('Event: ' + Event);
                LogMessage('Award: ' + Award);
                LogMessage('Category: ' + Category);
                LogMessage('Recipient: ' + Recipient);
                LogMessage('Year: ' + Year);
                LogMessage('Won: ' + CustomBoolToStr(Won));


                // Add the award to the database with error handling
                try
                  AddAward(Event, Award, Category, Recipient, Year, Won);
                  LogMessage('AddAward executed successfully.');
                except
                  Begin
                    LogMessage('Exception encountered in AddAward');
                    Result := prError;
                  End;
                end;


                // Populate the custom field with AValue
                AddCustomFieldValueByName('IMDb People Awards', AValue);
                    LogMessage('IMDb People Awards added ' + AValue)


                // Log the action of adding the award
                If Won Then
                  LogMessage('Added Award to Database: Event=' + Event + ', Award=' + Award + ', Category=' + Category + ', Recipient=' + Recipient + ', Year=' + Year + ', Won: True')
                Else
                  LogMessage('Added Award to Database: Event=' + Event + ', Award=' + Award + ', Category=' + Category + ', Recipient=' + Recipient + ', Year=' + Year + ', Won: False');
              End Else LogMessage('Error: Recipient not found.');
            End Else LogMessage('Error: Category not found.');
          End Else LogMessage('Error: Award not found.');


          // Move to the next item
          curPos := PosFrom('<li class="ipc-metadata-list-summary-item sc-15fc9ae6-1 gQbMPJ" data-testid="list-item"', ItemList, curPos + 1);
        End;
      End Else LogMessage('Error: Invalid endPos or curPos for Awards section');
    End Else LogMessage('Error: Awards section not found');


  except
    Begin
      LogMessage('Exception encountered');
      Result := prError;
    End;
  end;


  LogMessage('Function ParsePage_IMDBPeopleAWARDS END=====================||');
  Result := prFinished;
End;


//BlockClose
--- End quote ---

Ivek23:

--- Quote from: afrocuban on December 21, 2024, 10:57:39 pm ---But for some reason the value is not populated/displayed in PVD. So I thought I'll create custom memo field "IMDb People Awards" to populate there the value to check what it looks like, only to realize no custom field is visible in PVD's People section???
Is it possible at all to add custom fields in People section?
I manually put the value in the field (added to my dark people skin), but when I exit edit mode, it's not displayed. When I enter edit mode, it's there. If I restart PVD value dissappears from the custom field.

--- End quote ---

You can only see this in the comment box for people to see what is happening.


The code you have now does not complete the process, so this is the code


--- Quote ---AddAward(EventName, AwardName, AwardCategory, AwardRecipient, EventYear, AwardWon);
--- End quote ---

therefore, it cannot write the award data so that the awards will then be visible in the awards field in the database.

Here is the awards code to help you.


--- Quote ---Function ParsePage_IMDBPeopleAWARDS(HTML:String):Cardinal; //BlockOpen
    //Returns:
    //     Result:=prFinished; Script has finished gathering data
    //     Result:=prError; If żany big problem? with exit
    //Retrieve: AddAward(Event, Award, Category, Recipient, Year, Won)
  Var
    curPos,endPos,endPosAux,index,curPos0,curPos1,curPos2,curPos3,curPos4,endPos0,endPos1,endPos2:Integer;
    ItemList:String;
    ItemArray: TWideArray;
    MovieURL,MovieYear,EventBlock,EventName,EventYear,YearBlock,AwardBlock,AwardName,AwardCategory,AwardRecipient:String;
    AwardWon: Boolean;
  Begin
    LogMessage('Function ParsePage_IMDBPeopleAWARDS BEGIN=====================||');
    Result:=prFinished;  //It will change to prError if any big problem with exit   
//Get award (several values save in PVD with AddAward(Event, Award, Category, Recipient, Year, Won)
        // Parameters: Example Al Pacino
        //Event (Academy Awards, USA): Name of the event 
        //Year (1993) = EventYear
        //Won (True,Winner/Nominee) set to true if the recipient won the award and to false otherwise       
        //Award (Oscar): Best award name
        //Category (Best Actor in a Leading Role): award category
        //Recipient (Scent of a Woman): for people records the variable should contain the title of a movie for which the person won the award
        //          for movie records this variable should contain the name of a specific person who won the award
        //Year (1973): release year of a movie (only applicable when adding award to a person record) -> NO: Use EventYear allways, in movie and in people
        //Won (True,Winner/Nominee) set to true if the recipient won the award and to false otherwise
//Go to "Awards" There is 4 levels: 1) Event (name) 2) Year (not saved) 3) Award (with outcome-Winner and name) 4) Recipient (award_description and Movie(name and year)) 
    curPos:=Pos('<h1 class="header">Awards',HTML);                                     //Strings start which opens the block content data. WEB_SPECIFIC
    curPos:=PosFrom('</h1>',HTML,curPos);                                              //Strings end which opens the block content data.  WEB_SPECIFIC
    curPos:=curPos+Length('</h1>');                                                    //Strings end which opens the block content data.  WEB_SPECIFIC
    //Event Level
    curPos:=PosFrom('<table class="awards"',HTML,curPos);                              //String which opens/closes the Event close but not the name. Search directly '<h3>' is very inconsistent. WEB_SPECIFIC
    index:=1;
    While curPos>0 Do Begin
        If (index>EVENTS_LIMIT) Then break;     //Limited depassed (Remember index begin in 0).
        //Go back for get the EventName and EventYear (Get all "raw" list data for create good values separators)
        curPos:=PrevPos('<h3>',HTML,curPos);                                             //String which opens the EventName and EventYear list data. WEB_SPECIFIC
        endPos:=PosFrom('</h3>',HTML,curPos)+Length('</h3>');                            //Strings which opens/closes the data. WEB_SPECIFIC
        ItemList:=Copy(HTML,curPos,endPos-curPos);
        EventName:=RemoveTags(ItemList, False);
        //LogMessage('           Parse results ('+IntToStr(curPos)+','+IntToStr(endPos)+') complex ItemList:'+ItemList+'||');         
        //Get all "raw" Event data for create good values separators
        curPos:=PosFrom('<table class="awards"',HTML,endPos);                               //String which opens/closes the Event table data but not the name. WEB_SPECIFIC
        endPos:=PosFrom('</table>',HTML,curPos);
        //Strings which opens/closes the data. WEB_SPECIFIC
        EventBlock:=Copy(HTML,curPos,endPos-curPos);
        //LogMessage('           Parse results ('+IntToStr(curPos)+','+IntToStr(endPos)+') complex EventBlock:'+EventBlock+'||');
        //Year Level
        curPos0:=Pos('<td class="award_year"',EventBlock);                                  //String which opens the AwardYear list data. WEB_SPECIFIC
        While curPos0>0 Do Begin
            //Get EventYear
            endPos0:=PosFrom('</td>',EventBlock,curPos0)+Length('</td>');                   //Strings which opens/closes the data. WEB_SPECIFIC
            ItemList:=Copy(EventBlock,curPos0,endPos0-curPos0);
            EventYear:=Trim(RemoveTags(ItemList, False));
            //Get all "raw" Year data for create good values separators
            endPosAux:=PosFrom('<td class="award_year"',EventBlock,endPos0);                 //Strings which opens/closes the next block data. WEB_SPECIFIC
            If (endPosAux=0) Then endPosAux:=Length(EventBlock);                           //If no more blocks, set endPosAux at the last character.
            YearBlock:=Copy(EventBlock,curPos0,endPosAux-curPos0);
            //LogMessage('           Parse results ('+IntToStr(curPos0)+','+IntToStr(endPosAux)+') complex YearBlock:'+YearBlock+'||');
            //Award Level     
            curPos1:=Pos('<td class="award_outcome"',YearBlock);                         //String which opens the AwardName and Won list data. WEB_SPECIFIC
            While curPos1>0 Do Begin
                //Get AwardWon and AwardName
                endPos1:=PosFrom('</td>',YearBlock,curPos1)+Length('</td>');                   //Strings which opens/closes the data. WEB_SPECIFIC
                ItemList:=Copy(YearBlock,curPos1,endPos1-curPos1);
                ItemList:=StringReplace(ItemList,'category','>;<',True,True,False);              //WEB_SPECIFIC
                ItemList:=RemoveTags(ItemList, False);
                //LogMessage('           Parse results ('+IntToStr(curPos1)+','+IntToStr(endPos1)+') complex ItemList:'+ItemList+'||');
                ExplodeString(ItemList,ItemArray,';');
                AwardWon:= False;                                                           //Normaly in 'Nominee' case. WEB_SPECIFIC
                If Pos('Winner',ItemArray[0])>0 Then AwardWon:= True;                          //WEB_SPECIFIC
                AwardName:=ItemArray[1];
                //Get all "raw" Award data for create good values separators
                endPosAux:=PosFrom('<td class="award_outcome"',YearBlock,endPos1);         //Strings which opens/closes the next block data. WEB_SPECIFIC
                If (endPosAux=0) Then endPosAux:=Length(YearBlock);                           //If no more blocks, set endPosAux at the last character.
                AwardBlock:=Copy(YearBlock,curPos1,endPosAux-curPos1);
                //LogMessage('           Parse results ('+IntToStr(curPos1)+','+IntToStr(endPosAux)+') complex AwardBlock:'+AwardBlock+'||');
                //Recipient Level     
                curPos2:=Pos('<td class="award_description">',AwardBlock);                       //String which opens the AwardCategory and AwardRecipient list data. WEB_SPECIFIC
                While curPos2>0 Do Begin
                    //Get all "raw" list data for create good values separators (not use TextBetWeen)
                    endPos2:=PosFrom('</td>',AwardBlock,curPos2)+Length('</td>');                 //Strings which opens/closes the data. WEB_SPECIFIC
                    ItemList:=Copy(AwardBlock,curPos2,endPos2-curPos2);
                    //LogMessage('           Parse results ('+IntToStr(curPos2)+','+IntToStr(curPos2)+') complex ItemList:'+ItemList+'||');
                    //The Receipt awards ItemList may have:  1) empty description or not have name (not interesting) and break ItemArray[]. 2) Several titles with year 3) Detail o full Notes
                    //So is better search sequentily by token in a block than with ItemArray
                    endPosAux:=PosFrom(#13,ItemList,2);                                        //Strings which opens/closes the data. WEB_SPECIFIC
                    curPos3:=PosFrom('title',ItemList,2);                                       //Strings which opens/closes the data. WEB_SPECIFIC
                    If (endPosAux<curPos3) Or (curPos3=0) Then Begin                            //There is Awardcategory because #13 is befor name or there isn't name. WEB_SPECIFIC
                        curPos4:=1;
                        AwardCategory:=TextBetWeen(ItemList,'<td class="award_description">',#13,false,curPos4);   //Strings which opens/closes the data. WEB_SPECIFIC
                        LogMessage('     Parse Results in AwardCategory:'+AwardCategory+'||');
                        curPos4:=Pos('Shared with:',AwardCategory);                          //WEB_SPECIFIC.
                        If 0<curPos4 then AwardCategory:=Copy(AwardCategory,0,curPos4-1);
                        LogMessage('     Parse Results in AwardCategory0:'+AwardCategory+'||');
                    End Else Begin
                        AwardCategory:='';
                    End;
                    If curPos3=0 Then Begin //Award without Recipient
                        AddAward(EventName, AwardName, AwardCategory, '', EventYear, AwardWon);
                        LogMessage('      Get results Awards:#'+IntToStr(index)+'|'+EventName+'|'+AwardName+'|'+AwardCategory+'|'+''+'|'+EventYear+'|'); //+BoolToStr(AwardWon)+'||');
                    End;
                    While curPos3>0 Do Begin
                        MovieURL:='http://www.imdb.com/title'+TextBetWeen(ItemList,'<a href="/title','?ref_=nmawd_awd_',true,curPos4)+'/';                                      //Strings which opens/closes the data. WEB_SPECIFIC
                        LogMessage('  **  Parse Results in MovieURL: '+MovieURL);
                        AwardRecipient:=TextBetWeen(ItemList,'>','<',false,curPos3);              //Strings which opens/closes the data. WEB_SPECIFIC
                        LogMessage('      Parse Results in AwardRecipient:'+AwardRecipient+'||');
                        MovieYear:=TextBetWeen(ItemList,'(',')',false,curPos3);                  //Strings which opens/closes the data. WEB_SPECIFIC
                        LogMessage('  **  Parse Results in MovieYear:'+MovieYear);
                        AddAward(EventName, AwardName, AwardCategory, AwardRecipient, EventYear, AwardWon);
                        LogMessage('      Get results Awards:#'+IntToStr(index)+'|'+EventName+'|'+AwardName+'|'+AwardCategory+'|'+AwardRecipient+'|'+EventYear+'|'); //+BoolToStr(AwardWon)+'||');                   
                        endPosAux:=PosFrom('truncated-note',ItemList,curPos3);                   //Strings which opens/closes the data. WEB_SPECIFIC
                        curPos3:=PosFrom('title',ItemList,curPos3);                             //Strings which opens/closes the data. WEB_SPECIFIC
                        If curPos3>endPosAux Then curPos3:=0                                   //Avoid Names in notes. WEB_SPECIFIC                                                                                                                                     
                    End;
                    curPos2:=PosFrom('<td class="award_description">',AwardBlock,endPos2);        //String which opens the AwardCategory and AwardRecipient list data. WEB_SPECIFIC
                End;
                curPos1:=PosFrom('<td class="award_outcome"',YearBlock,endPos1);               //String which opens the AwardName and Won list data. WEB_SPECIFIC
            End;
            curPos0:=PosFrom('<td class="award_year"',EventBlock,endPos0);                      //String which opens the AwardYearlist data. WEB_SPECIFIC
        End;
        curPos:=PosFrom('<table class="awards"',HTML,endPos);                               //String which detectecs the Event. Search directly '<h3>' is very inconsistent. WEB_SPECIFIC
        index:=index+1;
    End;
    LogMessage('Function ParsePage_IMDBMovieAWARDS END=====================||');
  End; //BlockClose
--- End quote ---

Navigation

[0] Message Index

[#] Next page

[*] Previous page

Go to full version