Scraping web content by using YQL


The YQL (Yahoo! Query Language) platform enables you to query, filter, and combine data across the web through a single interface. It exposes a SQL-like syntax that is both familiar to developers and expressive enough for getting the right data.

Read first: HTML XPath examples

Demo web page 1:

Example 1: raw XPath

select content from html where url="http://demo.tutorialspots.com/html/?html=PGh0bWw%2BPGJvZHk%2BIDxoMT5Mb3JlbSBpcHN1bSBkb2xvciBzaXQgYW1ldDwvaDE%2BIDxwIGlkPSJwMSI%2BTG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gPGEgaHJlZj0iYmxhaC5odG1sIiB0aXRsZT0iQ3JhcyBtYXNzYSBwdXJ1cyI%2BQ3JhcyBtYXNzYSBwdXJ1czwvYT4sIGV1aXNtb2Qgbm9uIGR1aSBlZ2V0LCB1bGxhbWNvcnBlciBjb25zZXF1YXQgYXVndWUuIEZ1c2NlIGVuaW0gc2VtLCBzdXNjaXBpdCBhIG51bGxhIHF1aXMsIHZlc3RpYnVsdW0gZmVybWVudHVtIG51bGxhLjwvcD4gPHAgaWQ9InAyIj5QaGFzZWxsdXMgYWxpcXVhbSBsaWd1bGEgaWQgbWV0dXMgcGVsbGVudGVzcXVlIHVsdHJpY2VzLiBVdCBhbGlxdWFtIG51bGxhIGFudGUsIHZpdGFlIHVsdHJpY2llcyBkdWkgcnV0cnVtIHF1aXMuIDxhIGhyZWY9ImJsYWguaHRtbCIgdGl0bGU9IlN1c3BlbmRpc3NlIHBvdGVudGkiPlN1c3BlbmRpc3NlIHBvdGVudGk8L2E%2BLiBOdW5jIGV1IGhlbmRyZXJpdCBleC48L3A%2BIDwvYm9keT48L2h0bWw%2B" and xpath='/html/body/h1'

Result:

{
 "query": {
  "count": 1,
  "created": "2016-12-14T08:09:39Z",
  "lang": "en-US",
  "diagnostics": {
  ... 
  "results": {
   "h1": "Lorem ipsum dolor sit amet"
  }
 }
}

Example 2: find an element by ID

select * from html where url="http://demo.tutorialspots.com/html/?html=PGh0bWw%2BPGJvZHk%2BIDxoMT5Mb3JlbSBpcHN1bSBkb2xvciBzaXQgYW1ldDwvaDE%2BIDxwIGlkPSJwMSI%2BTG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gPGEgaHJlZj0iYmxhaC5odG1sIiB0aXRsZT0iQ3JhcyBtYXNzYSBwdXJ1cyI%2BQ3JhcyBtYXNzYSBwdXJ1czwvYT4sIGV1aXNtb2Qgbm9uIGR1aSBlZ2V0LCB1bGxhbWNvcnBlciBjb25zZXF1YXQgYXVndWUuIEZ1c2NlIGVuaW0gc2VtLCBzdXNjaXBpdCBhIG51bGxhIHF1aXMsIHZlc3RpYnVsdW0gZmVybWVudHVtIG51bGxhLjwvcD4gPHAgaWQ9InAyIj5QaGFzZWxsdXMgYWxpcXVhbSBsaWd1bGEgaWQgbWV0dXMgcGVsbGVudGVzcXVlIHVsdHJpY2VzLiBVdCBhbGlxdWFtIG51bGxhIGFudGUsIHZpdGFlIHVsdHJpY2llcyBkdWkgcnV0cnVtIHF1aXMuIDxhIGhyZWY9ImJsYWguaHRtbCIgdGl0bGU9IlN1c3BlbmRpc3NlIHBvdGVudGkiPlN1c3BlbmRpc3NlIHBvdGVudGk8L2E%2BLiBOdW5jIGV1IGhlbmRyZXJpdCBleC48L3A%2BIDwvYm9keT48L2h0bWw%2B" and xpath='//*[@id="p1"]'

Result:

{
 "query": {
  "count": 1,
  "created": "2016-12-14T08:12:23Z",
  "lang": "en-US",
  "diagnostics": {
   ...
  "results": {
   "p": {
    "id": "p1",
    "a": {
     "href": "blah.html",
     "title": "Cras massa purus",
     "content": "Cras massa purus"
    },
    "content": "Lorem ipsum dolor sit amet, consectetur adipiscing elit. , euismod non dui eget, ullamcorper consequat augue. Fusce enim sem, suscipit a nulla quis, vestibulum fermentum nulla."
   }
  }
 }
}

Example 3: child of Element ID

select content from html where url="http://demo.tutorialspots.com/html/?html=PGh0bWw%2BPGJvZHk%2BIDxoMT5Mb3JlbSBpcHN1bSBkb2xvciBzaXQgYW1ldDwvaDE%2BIDxwIGlkPSJwMSI%2BTG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gPGEgaHJlZj0iYmxhaC5odG1sIiB0aXRsZT0iQ3JhcyBtYXNzYSBwdXJ1cyI%2BQ3JhcyBtYXNzYSBwdXJ1czwvYT4sIGV1aXNtb2Qgbm9uIGR1aSBlZ2V0LCB1bGxhbWNvcnBlciBjb25zZXF1YXQgYXVndWUuIEZ1c2NlIGVuaW0gc2VtLCBzdXNjaXBpdCBhIG51bGxhIHF1aXMsIHZlc3RpYnVsdW0gZmVybWVudHVtIG51bGxhLjwvcD4gPHAgaWQ9InAyIj5QaGFzZWxsdXMgYWxpcXVhbSBsaWd1bGEgaWQgbWV0dXMgcGVsbGVudGVzcXVlIHVsdHJpY2VzLiBVdCBhbGlxdWFtIG51bGxhIGFudGUsIHZpdGFlIHVsdHJpY2llcyBkdWkgcnV0cnVtIHF1aXMuIDxhIGhyZWY9ImJsYWguaHRtbCIgdGl0bGU9IlN1c3BlbmRpc3NlIHBvdGVudGkiPlN1c3BlbmRpc3NlIHBvdGVudGk8L2E%2BLiBOdW5jIGV1IGhlbmRyZXJpdCBleC48L3A%2BIDwvYm9keT48L2h0bWw%2B" and xpath='//*[@id="p1"]/a'

Result:

{
 "query": {
  "count": 1,
  "created": "2016-12-14T08:14:09Z",
  "lang": "en-US",
  "diagnostics": {
   ...
  "results": {
   "a": "Cras massa purus"
  }
 }
}

Example 4: find element contains a part of text

select * from html where url="http://demo.tutorialspots.com/html/?html=PGh0bWw%2BPGJvZHk%2BIDxoMT5Mb3JlbSBpcHN1bSBkb2xvciBzaXQgYW1ldDwvaDE%2BIDxwIGlkPSJwMSI%2BTG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gPGEgaHJlZj0iYmxhaC5odG1sIiB0aXRsZT0iQ3JhcyBtYXNzYSBwdXJ1cyI%2BQ3JhcyBtYXNzYSBwdXJ1czwvYT4sIGV1aXNtb2Qgbm9uIGR1aSBlZ2V0LCB1bGxhbWNvcnBlciBjb25zZXF1YXQgYXVndWUuIEZ1c2NlIGVuaW0gc2VtLCBzdXNjaXBpdCBhIG51bGxhIHF1aXMsIHZlc3RpYnVsdW0gZmVybWVudHVtIG51bGxhLjwvcD4gPHAgaWQ9InAyIj5QaGFzZWxsdXMgYWxpcXVhbSBsaWd1bGEgaWQgbWV0dXMgcGVsbGVudGVzcXVlIHVsdHJpY2VzLiBVdCBhbGlxdWFtIG51bGxhIGFudGUsIHZpdGFlIHVsdHJpY2llcyBkdWkgcnV0cnVtIHF1aXMuIDxhIGhyZWY9ImJsYWguaHRtbCIgdGl0bGU9IlN1c3BlbmRpc3NlIHBvdGVudGkiPlN1c3BlbmRpc3NlIHBvdGVudGk8L2E%2BLiBOdW5jIGV1IGhlbmRyZXJpdCBleC48L3A%2BIDwvYm9keT48L2h0bWw%2B" and xpath='//p[contains(.,"dolor sit")]'

Result:

{
 "query": {
  "count": 1,
  "created": "2016-12-14T08:15:29Z",
  "lang": "en-US",
  "diagnostics": {
   ...
  "results": {
   "p": {
    "id": "p1",
    "a": {
     "href": "blah.html",
     "title": "Cras massa purus",
     "content": "Cras massa purus"
    },
    "content": "Lorem ipsum dolor sit amet, consectetur adipiscing elit. , euismod non dui eget, ullamcorper consequat augue. Fusce enim sem, suscipit a nulla quis, vestibulum fermentum nulla."
   }
  }
 }
}

Example 5: find element whose container is match with a text

select * from html where url="http://demo.tutorialspots.com/html/?html=PGh0bWw%2BPGJvZHk%2BIDxoMT5Mb3JlbSBpcHN1bSBkb2xvciBzaXQgYW1ldDwvaDE%2BIDxwIGlkPSJwMSI%2BTG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gPGEgaHJlZj0iYmxhaC5odG1sIiB0aXRsZT0iQ3JhcyBtYXNzYSBwdXJ1cyI%2BQ3JhcyBtYXNzYSBwdXJ1czwvYT4sIGV1aXNtb2Qgbm9uIGR1aSBlZ2V0LCB1bGxhbWNvcnBlciBjb25zZXF1YXQgYXVndWUuIEZ1c2NlIGVuaW0gc2VtLCBzdXNjaXBpdCBhIG51bGxhIHF1aXMsIHZlc3RpYnVsdW0gZmVybWVudHVtIG51bGxhLjwvcD4gPHAgaWQ9InAyIj5QaGFzZWxsdXMgYWxpcXVhbSBsaWd1bGEgaWQgbWV0dXMgcGVsbGVudGVzcXVlIHVsdHJpY2VzLiBVdCBhbGlxdWFtIG51bGxhIGFudGUsIHZpdGFlIHVsdHJpY2llcyBkdWkgcnV0cnVtIHF1aXMuIDxhIGhyZWY9ImJsYWguaHRtbCIgdGl0bGU9IlN1c3BlbmRpc3NlIHBvdGVudGkiPlN1c3BlbmRpc3NlIHBvdGVudGk8L2E%2BLiBOdW5jIGV1IGhlbmRyZXJpdCBleC48L3A%2BIDwvYm9keT48L2h0bWw%2B" and xpath='//h1[.="Lorem ipsum dolor sit amet"]'

Result:

{
 "query": {
  "count": 1,
  "created": "2016-12-14T08:16:18Z",
  "lang": "en-US",
  "diagnostics": {
   ...
  "results": {
   "h1": "Lorem ipsum dolor sit amet"
  }
 }
}

Example 6: find element by one or some attribute

select content from html where url="http://demo.tutorialspots.com/html/?html=PGh0bWw%2BPGJvZHk%2BIDxoMT5Mb3JlbSBpcHN1bSBkb2xvciBzaXQgYW1ldDwvaDE%2BIDxwIGlkPSJwMSI%2BTG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gPGEgaHJlZj0iYmxhaC5odG1sIiB0aXRsZT0iQ3JhcyBtYXNzYSBwdXJ1cyI%2BQ3JhcyBtYXNzYSBwdXJ1czwvYT4sIGV1aXNtb2Qgbm9uIGR1aSBlZ2V0LCB1bGxhbWNvcnBlciBjb25zZXF1YXQgYXVndWUuIEZ1c2NlIGVuaW0gc2VtLCBzdXNjaXBpdCBhIG51bGxhIHF1aXMsIHZlc3RpYnVsdW0gZmVybWVudHVtIG51bGxhLjwvcD4gPHAgaWQ9InAyIj5QaGFzZWxsdXMgYWxpcXVhbSBsaWd1bGEgaWQgbWV0dXMgcGVsbGVudGVzcXVlIHVsdHJpY2VzLiBVdCBhbGlxdWFtIG51bGxhIGFudGUsIHZpdGFlIHVsdHJpY2llcyBkdWkgcnV0cnVtIHF1aXMuIDxhIGhyZWY9ImJsYWguaHRtbCIgdGl0bGU9IlN1c3BlbmRpc3NlIHBvdGVudGkiPlN1c3BlbmRpc3NlIHBvdGVudGk8L2E%2BLiBOdW5jIGV1IGhlbmRyZXJpdCBleC48L3A%2BIDwvYm9keT48L2h0bWw%2B" and xpath='//a[@title="Cras massa purus"]'

Result:

{
 "query": {
  "count": 1,
  "created": "2016-12-14T08:17:46Z",
  "lang": "en-US",
  "diagnostics": {
   ...
  "results": {
   "a": "Cras massa purus"
  }
 }
}

Example 6: find element by child

select * from html where url="http://demo.tutorialspots.com/html/?html=PGh0bWw%2BPGJvZHk%2BIDxoMT5Mb3JlbSBpcHN1bSBkb2xvciBzaXQgYW1ldDwvaDE%2BIDxwIGlkPSJwMSI%2BTG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gPGEgaHJlZj0iYmxhaC5odG1sIiB0aXRsZT0iQ3JhcyBtYXNzYSBwdXJ1cyI%2BQ3JhcyBtYXNzYSBwdXJ1czwvYT4sIGV1aXNtb2Qgbm9uIGR1aSBlZ2V0LCB1bGxhbWNvcnBlciBjb25zZXF1YXQgYXVndWUuIEZ1c2NlIGVuaW0gc2VtLCBzdXNjaXBpdCBhIG51bGxhIHF1aXMsIHZlc3RpYnVsdW0gZmVybWVudHVtIG51bGxhLjwvcD4gPHAgaWQ9InAyIj5QaGFzZWxsdXMgYWxpcXVhbSBsaWd1bGEgaWQgbWV0dXMgcGVsbGVudGVzcXVlIHVsdHJpY2VzLiBVdCBhbGlxdWFtIG51bGxhIGFudGUsIHZpdGFlIHVsdHJpY2llcyBkdWkgcnV0cnVtIHF1aXMuIDxhIGhyZWY9ImJsYWguaHRtbCIgdGl0bGU9IlN1c3BlbmRpc3NlIHBvdGVudGkiPlN1c3BlbmRpc3NlIHBvdGVudGk8L2E%2BLiBOdW5jIGV1IGhlbmRyZXJpdCBleC48L3A%2BIDwvYm9keT48L2h0bWw%2B" and xpath='//p[a[@title="Cras massa purus"]]'

Result:

{
 "query": {
  "count": 1,
  "created": "2016-12-14T08:30:08Z",
  "lang": "en-US",
  "diagnostics": {
   ...
  "results": {
   "p": {
    "id": "p1",
    "a": {
     "href": "blah.html",
     "title": "Cras massa purus",
     "content": "Cras massa purus"
    },
    "content": "Lorem ipsum dolor sit amet, consectetur adipiscing elit. , euismod non dui eget, ullamcorper consequat augue. Fusce enim sem, suscipit a nulla quis, vestibulum fermentum nulla."
   }
  }
 }
}

Example 7: the Nth element

select content from html where url="http://demo.tutorialspots.com/html/?html=PGh0bWw%2BPGJvZHk%2BIDxoMT5Mb3JlbSBpcHN1bSBkb2xvciBzaXQgYW1ldDwvaDE%2BIDxwIGlkPSJwMSI%2BTG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gPGEgaHJlZj0iYmxhaC5odG1sIiB0aXRsZT0iQ3JhcyBtYXNzYSBwdXJ1cyI%2BQ3JhcyBtYXNzYSBwdXJ1czwvYT4sIGV1aXNtb2Qgbm9uIGR1aSBlZ2V0LCB1bGxhbWNvcnBlciBjb25zZXF1YXQgYXVndWUuIEZ1c2NlIGVuaW0gc2VtLCBzdXNjaXBpdCBhIG51bGxhIHF1aXMsIHZlc3RpYnVsdW0gZmVybWVudHVtIG51bGxhLjwvcD4gPHAgaWQ9InAyIj5QaGFzZWxsdXMgYWxpcXVhbSBsaWd1bGEgaWQgbWV0dXMgcGVsbGVudGVzcXVlIHVsdHJpY2VzLiBVdCBhbGlxdWFtIG51bGxhIGFudGUsIHZpdGFlIHVsdHJpY2llcyBkdWkgcnV0cnVtIHF1aXMuIDxhIGhyZWY9ImJsYWguaHRtbCIgdGl0bGU9IlN1c3BlbmRpc3NlIHBvdGVudGkiPlN1c3BlbmRpc3NlIHBvdGVudGk8L2E%2BLiBOdW5jIGV1IGhlbmRyZXJpdCBleC48L3A%2BIDwvYm9keT48L2h0bWw%2B" and xpath='(//a)[2]'

Result:

{
 "query": {
  "count": 1,
  "created": "2016-12-14T08:31:51Z",
  "lang": "en-US",
  "diagnostics": {
   ...
  "results": {
   "a": "Suspendisse potenti"
  }
 }
}

scraping web content using yql

HTML 2:

Example 8:

select * from html where url="http://demo.tutorialspots.com/html/?html=PGh0bWw%2BPGJvZHk%2BIDxkaXYgaWQ9InByb2R1Y3RzIj4gPGgxPlByb2R1Y3Q8L2gxPiA8dWw%2BICAgICA8bGkgZGF0YS1pZD0iMSI%2BICAgICAgICAgPGltZyBzcmM9Imh0dHA6Ly90dXRvcmlhbHNwb3RzLmNvbS93cC1jb250ZW50L3VwbG9hZHMvMjAxMy8wNC8xLnBuZyIgLz4gICAgICAgICA8ZGl2IGNsYXNzPSJuYW1lIj5Qcm9kdWN0IDE8L2Rpdj4gICAgICAgICA8ZGl2IGNsYXNzPSJwcmljZSI%2BMTAwPC9kaXY%2BICAgICA8L2xpPiAgICAgPGxpIGRhdGEtaWQ9IjIiPiAgICAgICAgIDxpbWcgc3JjPSJodHRwOi8vdHV0b3JpYWxzcG90cy5jb20vd3AtY29udGVudC91cGxvYWRzLzIwMTMvMDQvMS5wbmciIC8%2BICAgICAgICAgPGRpdiBjbGFzcz0ibmFtZSI%2BUHJvZHVjdCAyPC9kaXY%2BICAgICAgICAgPGRpdiBjbGFzcz0icHJpY2UiPjIwMDwvZGl2PiAgICAgPC9saT4gICAgIDxsaSBkYXRhLWlkPSIzIj4gICAgICAgICA8aW1nIHNyYz0iaHR0cDovL3R1dG9yaWFsc3BvdHMuY29tL3dwLWNvbnRlbnQvdXBsb2Fkcy8yMDEzLzA0LzEucG5nIiAvPiAgICAgICAgIDxkaXYgY2xhc3M9Im5hbWUiPlByb2R1Y3QgMzwvZGl2PiAgICAgICAgIDxkaXYgY2xhc3M9InByaWNlIj4zMDA8L2Rpdj4gICAgIDwvbGk%2BIDwvdWw%2BIDwvZGl2PiA8L2JvZHk%2BPC9odG1sPg%3D%3D" and xpath='//li[position() >= 2]'

Return

{
 "query": {
  "count": 2,
  "created": "2016-12-14T08:40:47Z",
  "lang": "en-US",
  "diagnostics": {
   ...
  "results": {
   "li": [
    {
     "data-id": "2",
     "img": {
      "src": "http://tutorialspots.com/wp-content/uploads/2013/04/1.png"
     },
     "div": [
      {
       "class": "name",
       "content": "Product 2"
      },
      {
       "class": "price",
       "content": "200"
      }
     ]
    },
    {
     "data-id": "3",
     "img": {
      "src": "http://tutorialspots.com/wp-content/uploads/2013/04/1.png"
     },
     "div": [
      {
       "class": "name",
       "content": "Product 3"
      },
      {
       "class": "price",
       "content": "300"
      }
     ]
    }
   ]
  }
 }
}

Example 9:

select * from html where url="http://demo.tutorialspots.com/html/?html=PGh0bWw%2BPGJvZHk%2BIDxkaXYgaWQ9InByb2R1Y3RzIj4gPGgxPlByb2R1Y3Q8L2gxPiA8dWw%2BICAgICA8bGkgZGF0YS1pZD0iMSI%2BICAgICAgICAgPGltZyBzcmM9Imh0dHA6Ly90dXRvcmlhbHNwb3RzLmNvbS93cC1jb250ZW50L3VwbG9hZHMvMjAxMy8wNC8xLnBuZyIgLz4gICAgICAgICA8ZGl2IGNsYXNzPSJuYW1lIj5Qcm9kdWN0IDE8L2Rpdj4gICAgICAgICA8ZGl2IGNsYXNzPSJwcmljZSI%2BMTAwPC9kaXY%2BICAgICA8L2xpPiAgICAgPGxpIGRhdGEtaWQ9IjIiPiAgICAgICAgIDxpbWcgc3JjPSJodHRwOi8vdHV0b3JpYWxzcG90cy5jb20vd3AtY29udGVudC91cGxvYWRzLzIwMTMvMDQvMS5wbmciIC8%2BICAgICAgICAgPGRpdiBjbGFzcz0ibmFtZSI%2BUHJvZHVjdCAyPC9kaXY%2BICAgICAgICAgPGRpdiBjbGFzcz0icHJpY2UiPjIwMDwvZGl2PiAgICAgPC9saT4gICAgIDxsaSBkYXRhLWlkPSIzIj4gICAgICAgICA8aW1nIHNyYz0iaHR0cDovL3R1dG9yaWFsc3BvdHMuY29tL3dwLWNvbnRlbnQvdXBsb2Fkcy8yMDEzLzA0LzEucG5nIiAvPiAgICAgICAgIDxkaXYgY2xhc3M9Im5hbWUiPlByb2R1Y3QgMzwvZGl2PiAgICAgICAgIDxkaXYgY2xhc3M9InByaWNlIj4zMDA8L2Rpdj4gICAgIDwvbGk%2BIDwvdWw%2BIDwvZGl2PiA8L2JvZHk%2BPC9odG1sPg%3D%3D" and xpath='//li[*="Product 1"]'

Return all li elements containing any child element whose value is “Product 1”.

{
 "query": {
  "count": 1,
  "created": "2016-12-14T08:41:50Z",
  "lang": "en-US",
  "diagnostics": {
   ...
  "results": {
   "li": {
    "data-id": "1",
    "img": {
     "src": "http://tutorialspots.com/wp-content/uploads/2013/04/1.png"
    },
    "div": [
     {
      "class": "name",
      "content": "Product 1"
     },
     {
      "class": "price",
      "content": "100"
     }
    ]
   }
  }
 }
}

Example 10:

select * from html where url="http://demo.tutorialspots.com/html/?html=PGh0bWw%2BPGJvZHk%2BIDxkaXYgaWQ9InByb2R1Y3RzIj4gPGgxPlByb2R1Y3Q8L2gxPiA8dWw%2BICAgICA8bGkgZGF0YS1pZD0iMSI%2BICAgICAgICAgPGltZyBzcmM9Imh0dHA6Ly90dXRvcmlhbHNwb3RzLmNvbS93cC1jb250ZW50L3VwbG9hZHMvMjAxMy8wNC8xLnBuZyIgLz4gICAgICAgICA8ZGl2IGNsYXNzPSJuYW1lIj5Qcm9kdWN0IDE8L2Rpdj4gICAgICAgICA8ZGl2IGNsYXNzPSJwcmljZSI%2BMTAwPC9kaXY%2BICAgICA8L2xpPiAgICAgPGxpIGRhdGEtaWQ9IjIiPiAgICAgICAgIDxpbWcgc3JjPSJodHRwOi8vdHV0b3JpYWxzcG90cy5jb20vd3AtY29udGVudC91cGxvYWRzLzIwMTMvMDQvMS5wbmciIC8%2BICAgICAgICAgPGRpdiBjbGFzcz0ibmFtZSI%2BUHJvZHVjdCAyPC9kaXY%2BICAgICAgICAgPGRpdiBjbGFzcz0icHJpY2UiPjIwMDwvZGl2PiAgICAgPC9saT4gICAgIDxsaSBkYXRhLWlkPSIzIj4gICAgICAgICA8aW1nIHNyYz0iaHR0cDovL3R1dG9yaWFsc3BvdHMuY29tL3dwLWNvbnRlbnQvdXBsb2Fkcy8yMDEzLzA0LzEucG5nIiAvPiAgICAgICAgIDxkaXYgY2xhc3M9Im5hbWUiPlByb2R1Y3QgMzwvZGl2PiAgICAgICAgIDxkaXYgY2xhc3M9InByaWNlIj4zMDA8L2Rpdj4gICAgIDwvbGk%2BIDwvdWw%2BIDwvZGl2PiA8L2JvZHk%2BPC9odG1sPg%3D%3D" and xpath='//div[@class="name" and ../div[@class="price"]>200]'

Return all div elements whose attribute class is “name” and sibling element div whose attribute class is “price” and whose container is greater than 200.

{
 "query": {
  "count": 1,
  "created": "2016-12-14T08:42:36Z",
  "lang": "en-US",
  "diagnostics": {
   ...
  "results": {
   "div": {
    "class": "name",
    "content": "Product 3"
   }
  }
 }
}

Test yourself: https://developer.yahoo.com/yql/console/

Leave a Reply