【问题标题】:How to access latitude and longitude in a script with beautifulsoup?如何使用 beautifulsoup 在脚本中访问经纬度?
【发布时间】:2015-09-05 08:31:54
【问题描述】:

我想使用 beautifulsoup 从网页中获取纬度和经度,但它们在脚本中:

//<![CDATA[

theForm.oldSubmit = theForm.submit;
theForm.submit = WebForm_SaveScrollPositionSubmit;

theForm.oldOnSubmit = theForm.onsubmit;
theForm.onsubmit = WebForm_SaveScrollPositionOnSubmit;
var GMapsProperties={};function getGMapElementById(mapId,GMapElementId){var _mapId=typeof(mapId)=='string'? mapId : mapId.getDiv().id;var overlayArray=GMapsProperties[_mapId]['overlayArray'];for(var i=0;i < overlayArray.length;i++){if(overlayArray[i][0]==GMapElementId){return overlayArray[i][1];}}return null;}function removeGMapElementById(mapId,GMapElementId){var _mapId=typeof(mapId)=='string'? mapId : mapId.getDiv().id;var overlayArray=GMapsProperties[_mapId]['overlayArray'];for(var i=0;i < overlayArray.length;i++){if(overlayArray[i][0]==GMapElementId){overlayArray.splice(i,1);return;}}}function closeWindows(mapId){for(var i=0;i<GMapsProperties[mapId]['windowArray'].length;i++){GMapsProperties[mapId]['windowArray'][i][1].close();}}var _sg=_sg ||{};_sg.cs=(function(){var p={};p.createMarker=function(opt,id){var m=new google.maps.Marker(opt);if(id && m.getMap())GMapsProperties[m.getMap().getDiv().id]['overlayArray'].push([id,m]);return m;};p.createPolyline=function(opt,id){var m=new google.maps.Polyline(opt);if(id && m.getMap())GMapsProperties[m.getMap().getDiv().id]['overlayArray'].push([id,m]);return m;};p.createPolygon=function(opt,id){var m=new google.maps.Polygon(opt);if(id && m.getMap())GMapsProperties[m.getMap().getDiv().id]['overlayArray'].push([id,m]);return m;};return p;})();function addEvent(el,ev,fn){if(el.addEventListener)el.addEventListener(ev,fn,false);else if(el.attachEvent)el.attachEvent('on'+ev,fn);else el['on'+ev]=fn;}GMapsProperties['subgurim_GoogleMapControl'] = {}; var GMapsProperties_subgurim_GoogleMapControl = GMapsProperties['subgurim_GoogleMapControl']; GMapsProperties_subgurim_GoogleMapControl['enableStore'] = false; GMapsProperties_subgurim_GoogleMapControl['overlayArray'] = new Array(); GMapsProperties_subgurim_GoogleMapControl['windowArray'] = new Array();var subgurim_GoogleMapControl;function load_subgurim_GoogleMapControl(){var mapDOM = document.getElementById('subgurim_GoogleMapControl'); if (!mapDOM) return;subgurim_GoogleMapControl = new google.maps.Map(mapDOM);function subgurim_GoogleMapControlupdateValues(eventId,value){var item=document.getElementById('subgurim_GoogleMapControl_Event'+eventId);item.value=value;}google.maps.event.addListener(subgurim_GoogleMapControl, 'addoverlay', function(overlay) { if(overlay) { GMapsProperties['subgurim_GoogleMapControl']['overlayArray'].push(overlay); } });google.maps.event.addListener(subgurim_GoogleMapControl, 'clearoverlays', function() { GMapsProperties['subgurim_GoogleMapControl']['overlayArray'] = new Array(); });google.maps.event.addListener(subgurim_GoogleMapControl, 'removeoverlay', function(overlay) { removeGMapElementById('subgurim_GoogleMapControl',overlay.id) });google.maps.event.addListener(subgurim_GoogleMapControl, 'maptypeid_changed', function() { var tipo = subgurim_GoogleMapControl.getMapTypeId(); subgurim_GoogleMapControlupdateValues('0', tipo);});google.maps.event.addListener(subgurim_GoogleMapControl, 'dragend', function() { var lat = subgurim_GoogleMapControl.getCenter().lat(); var lng = subgurim_GoogleMapControl.getCenter().lng(); subgurim_GoogleMapControlupdateValues('2', lat+','+lng); });google.maps.event.addListener(subgurim_GoogleMapControl, 'zoom_changed', function() { subgurim_GoogleMapControlupdateValues('1', subgurim_GoogleMapControl.getZoom()); });subgurim_GoogleMapControl.setOptions({center:new google.maps.LatLng(35.6783546483511,51.4196634292603),disableDefaultUI:true,keyboardShortcuts:false,mapTypeControl:false,mapTypeId:google.maps.MapTypeId.ROADMAP,scrollwheel:false,zoom:14});var marker_subgurim_920435_=_sg.cs.createMarker({position:new google.maps.LatLng(35.6783546483511,51.4196634292603),clickable:true,draggable:false,map:subgurim_GoogleMapControl,raiseOnDrag:true,visible:true,icon:'/images/markers/Site/Tourism/vase.png'}, 'marker_subgurim_920435_');}addEvent(window,'load',load_subgurim_GoogleMapControl);//]]>

我想要这部分的信息:

{position:new google.maps.LatLng(35.6783546483511,51.4196634292603)

是否可以使用 beautifulsoup 或任何其他网络抓取工具访问该信息?

【问题讨论】:

    标签: python python-2.7 web-scraping beautifulsoup


    【解决方案1】:

    为此目的使用Regular expression

    import re
    #Suppose script is stored in variable script_file
    m = re.search('LatLng(\(.+?\))', script_file)
    
    latlng = m.group(1)
    latlng = eval(latlng)
    print(latlng) #(35.6783546483511, 51.4196634292603)
    

    【讨论】:

    • 你能解释一下我如何使用正则表达式吗?
    • 我将脚本存储到 script_file 但是当我运行 python 脚本时我得到了这个错误:latlng = eval(latlng) File "", line 1 (3 ^ SyntaxError: unexpected EOF while parsing
    • @Mehdi 出现语法错误现在试试它的工作原理。
    【解决方案2】:
    import re
    s = 'position:new google.maps.LatLng(35.6783546483511,51.4196634292603)'
    lat, lng = map(float, re.search(r'\(([^,]+),([^)]+)', s).groups())
    

    【讨论】:

    • 谢谢@simleo,但是有没有办法从html代码中获取“s”?我必须使用它大约 10 万页
    【解决方案3】:

    如果要分别获取纬度和经度,可以这样使用regex expression

    import re
    
    s = 'position:new google.maps.LatLng(35.6783546483511,51.4196634292603)' 
    
    Lat, Lng = map(float, re.search(r'LatLng\(([\d.]+),([\d.]+)\)',s).groups())
    

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 2016-02-04
      • 1970-01-01
      • 2020-06-28
      • 1970-01-01
      • 2013-07-26
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      相关资源
      最近更新 更多