Load points from a shapefile with GDAL

Load point coordinates and their spatial reference from a shapefile. We first presented this tutorial as part of a three-hour session on Working with Geographic Information Systems in Python during the 2009 Python Conference in Chicago, Illinois.

Loading other geometries such as lines and polygons and their associated fields are covered in Save and load geometries and fields from a shapefile with GDAL.

Requirements

For the walkthrough, you will also need the following:

Example

Download the code and data, unzip and start Python.

wget http://invisibleroads.com/tutorials/_downloads/gdal-shapefile-points.zip
unzip gdal-shapefile-points.zip
cd gdal-shapefile-points
python

Load points and spatial reference.

>>> import point_store
>>> points, spatialReferenceAsProj4 = point_store.load('points.shp')
>>> points
[(474592.60011966614, 4429278.1561588384),
 (474577.93537380069, 4429134.3778895922),
 (474496.64167389442, 4429246.2762765223)]
>>> spatialReferenceAsProj4
'+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs '

Walkthrough

Download the code and data.

wget http://invisibleroads.com/tutorials/_downloads/gdal-shapefile-points.zip
unzip gdal-shapefile-points.zip
cd gdal-shapefile-points

You can peek at the contents of the shapefile using Quantum GIS.

_images/gdal-shapefile-points-load-qgis.png

Open shapefile

Start IPython.

ipython

Import GDAL.

>>> import osgeo.ogr

Make sure we are in the same directory as the shapefile.

>>> ls
points.dbf*  points.prj*  points.shp*  points.shx*  point_store.py*

Open the shapefile.

>>> shapeData = osgeo.ogr.Open('points.shp')
>>> shapeData
<osgeo.ogr.DataSource; proxy of <Swig Object of type 'OGRDataSourceShadow *' at 0x88502e8> >

Examine shapefile

Type shapeData. and press TAB to see what you can do.

>>> shapeData.
shapeData.CopyLayer            shapeData.Reference            shapeData.__len__
shapeData.CreateLayer          shapeData.Release              shapeData.__module__
shapeData.DeleteLayer          shapeData.ReleaseResultSet     shapeData.__new__
shapeData.Dereference          shapeData.TestCapability       shapeData.__reduce__
shapeData.Destroy              shapeData.__class__            shapeData.__reduce_ex__
shapeData.ExecuteSQL           shapeData.__del__              shapeData.__repr__
shapeData.GetDriver            shapeData.__delattr__          shapeData.__setattr__
shapeData.GetLayer             shapeData.__dict__             shapeData.__str__
shapeData.GetLayerByIndex      shapeData.__doc__              shapeData.__swig_destroy__
shapeData.GetLayerByName       shapeData.__getattr__          shapeData.__swig_getmethods__
shapeData.GetLayerCount        shapeData.__getattribute__     shapeData.__swig_setmethods__
shapeData.GetName              shapeData.__getitem__          shapeData.__weakref__
shapeData.GetRefCount          shapeData.__hash__             shapeData.name
shapeData.GetSummaryRefCount   shapeData.__init__             shapeData.this

Get the first layer.

>>> layer = shapeData.GetLayer()

Count the number of features in a layer.

>>> layer.GetFeatureCount()
3

Get the spatial reference of the layer.

>>> spatialReference = layer.GetSpatialRef()
>>> spatialReference.ExportToProj4()
'+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs '

Examine the first point.

>>> feature = layer.GetFeature(0)
>>> geometry = feature.GetGeometryRef()
>>> geometry.GetX()
474592.60011966614
>>> geometry.GetY()
4429278.1561588384

Get points

Collect points in a list.

shapeData = osgeo.ogr.Open('points.shp')
layer = shapeData.GetLayer()
points = []
for index in xrange(layer.GetFeatureCount()):
    feature = layer.GetFeature(index)
    geometry = feature.GetGeometryRef()
    points.append((geometry.GetX(), geometry.GetY()))

Display points.

>>> points
[(474592.60011966614, 4429278.1561588384),
 (474577.93537380069, 4429134.3778895922),
 (474496.64167389442, 4429246.2762765223)]

Code

'Save and load points to a shapefile'
# Import system modules
import osgeo.ogr
import osgeo.osr
import os


# Core

def save(shapePath, geoLocations, proj4):
    'Save points in the given shapePath'
    # Get driver
    driver = osgeo.ogr.GetDriverByName('ESRI Shapefile')
    # Create shapeData
    shapePath = validateShapePath(shapePath)
    if os.path.exists(shapePath): 
        os.remove(shapePath)
    shapeData = driver.CreateDataSource(shapePath)
    # Create spatialReference
    spatialReference = getSpatialReferenceFromProj4(proj4)
    # Create layer
    layerName = os.path.splitext(os.path.split(shapePath)[1])[0]
    layer = shapeData.CreateLayer(layerName, spatialReference, osgeo.ogr.wkbPoint)
    layerDefinition = layer.GetLayerDefn()
    # For each point,
    for pointIndex, geoLocation in enumerate(geoLocations):
        # Create point
        geometry = osgeo.ogr.Geometry(osgeo.ogr.wkbPoint)
        geometry.SetPoint(0, geoLocation[0], geoLocation[1])
        # Create feature
        feature = osgeo.ogr.Feature(layerDefinition)
        feature.SetGeometry(geometry)
        feature.SetFID(pointIndex)
        # Save feature
        layer.CreateFeature(feature)
        # Cleanup
        geometry.Destroy()
        feature.Destroy()
    # Cleanup
    shapeData.Destroy()
    # Return
    return shapePath

def load(shapePath):
    'Given a shapePath, return a list of points in GIS coordinates'
    # Open shapeData
    shapeData = osgeo.ogr.Open(validateShapePath(shapePath))
    # Validate shapeData
    validateShapeData(shapeData)
    # Get the first layer
    layer = shapeData.GetLayer()
    # Initialize
    points = []
    # For each point,
    for index in xrange(layer.GetFeatureCount()):
        # Get
        feature = layer.GetFeature(index)
        geometry = feature.GetGeometryRef()
        # Make sure that it is a point
        if geometry.GetGeometryType() != osgeo.ogr.wkbPoint: 
            raise ShapeDataError('This module can only load points; use geometry_store.py')
        # Get pointCoordinates
        pointCoordinates = geometry.GetX(), geometry.GetY()
        # Append
        points.append(pointCoordinates)
        # Cleanup
        feature.Destroy()
    # Get spatial reference as proj4
    proj4 = layer.GetSpatialRef().ExportToProj4()
    # Cleanup
    shapeData.Destroy()
    # Return
    return points, proj4

def merge(sourcePaths, targetPath):
    'Merge a list of shapefiles into a single shapefile'
    # Load
    items = [load(validateShapePath(x)) for x in sourcePaths]
    pointLists = [x[0] for x in items]
    points = reduce(lambda x,y: x+y, pointLists)
    spatialReferences= [x[1] for x in items]
    # Make sure that all the spatial references are the same
    if len(set(spatialReferences)) != 1: 
        raise ShapeDataError('The shapefiles must have the same spatial reference')
    spatialReference = spatialReferences[0]
    # Save
    save(validateShapePath(targetPath), points, spatialReference)

def getSpatialReferenceFromProj4(proj4):
    'Return GDAL spatial reference object from proj4 string'
    spatialReference = osgeo.osr.SpatialReference()
    spatialReference.ImportFromProj4(proj4)
    return spatialReference


# Validate

def validateShapePath(shapePath):
    'Validate shapefile extension'
    return os.path.splitext(str(shapePath))[0] + '.shp'

def validateShapeData(shapeData):
    'Make sure we can access the shapefile'
    # Make sure the shapefile exists
    if not shapeData:
        raise ShapeDataError('The shapefile is invalid')
    # Make sure there is exactly one layer
    if shapeData.GetLayerCount() != 1:
        raise ShapeDataError('The shapefile must have exactly one layer')


# Error

class ShapeDataError(Exception):
    pass