Load points from a shapefile with GDAL

Load point coordinates and their spatial reference from a shapefile. We first presented this tutorial as part of a three-hour session on Working with Geographic Information Systems in Python during the 2009 Python Conference in Chicago, Illinois.

Requirements

For the walkthrough, you will also need the following:

Example

Download the code and data, unzip and start Python.

wget http://invisibleroads.com/tutorials/_downloads/gdal-shapefile-points.zip
unzip gdal-shapefile-points.zip
cd gdal-shapefile-points
python

Load points and spatial reference

>>> import point_store
>>> points, spatialReferenceAsProj4 = point_store.load('points.shp')
>>> points
[(474592.60011966614, 4429278.1561588384),
 (474577.93537380069, 4429134.3778895922),
 (474496.64167389442, 4429246.2762765223)]
>>> spatialReferenceAsProj4
'+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs '

Walkthrough

Download the code and data

wget http://invisibleroads.com/tutorials/_downloads/gdal-shapefile-points.zip
unzip gdal-shapefile-points.zip
cd gdal-shapefile-points

You can peek at the contents of the shapefile using Quantum GIS

_images/gdal-shapefile-points-load-qgis.png

Open shapefile

Start IPython

ipython

Import GDAL

>>> import osgeo.ogr

Make sure are in the same directory as the shapefile

>>> ls
points.dbf*  points.prj*  points.shp*  points.shx*  point_store.py*

Open the shapefile

>>> shapeData = osgeo.ogr.Open('points.shp')
>>> shapeData
<osgeo.ogr.DataSource; proxy of <Swig Object of type 'OGRDataSourceShadow *' at 0x88502e8> >

Examine shapefile

Type “shapeData.” and press TAB to see what you can do

>>> shapeData.
shapeData.CopyLayer            shapeData.Reference            shapeData.__len__
shapeData.CreateLayer          shapeData.Release              shapeData.__module__
shapeData.DeleteLayer          shapeData.ReleaseResultSet     shapeData.__new__
shapeData.Dereference          shapeData.TestCapability       shapeData.__reduce__
shapeData.Destroy              shapeData.__class__            shapeData.__reduce_ex__
shapeData.ExecuteSQL           shapeData.__del__              shapeData.__repr__
shapeData.GetDriver            shapeData.__delattr__          shapeData.__setattr__
shapeData.GetLayer             shapeData.__dict__             shapeData.__str__
shapeData.GetLayerByIndex      shapeData.__doc__              shapeData.__swig_destroy__
shapeData.GetLayerByName       shapeData.__getattr__          shapeData.__swig_getmethods__
shapeData.GetLayerCount        shapeData.__getattribute__     shapeData.__swig_setmethods__
shapeData.GetName              shapeData.__getitem__          shapeData.__weakref__
shapeData.GetRefCount          shapeData.__hash__             shapeData.name
shapeData.GetSummaryRefCount   shapeData.__init__             shapeData.this

Get the first layer

>>> layer = shapeData.GetLayer()

Count the number of features in a layer

>>> layer.GetFeatureCount()
3

Get the spatial reference of the layer

>>> spatialReference = layer.GetSpatialRef()
>>> spatialReference.ExportToProj4()
'+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs '

Examine the first point

>>> feature = layer.GetFeature(0)
>>> geometry = feature.GetGeometryRef()
>>> geometry.GetX()
474592.60011966614
>>> geometry.GetY()
4429278.1561588384

Get points

Collect points in a list

shapeData = osgeo.ogr.Open('points.shp')
layer = shapeData.GetLayer()
points = []
for index in xrange(layer.GetFeatureCount()):
    feature = layer.GetFeature(index)
    geometry = feature.GetGeometryRef()
    points.append((geometry.GetX(), geometry.GetY()))

Display points

>>> points
[(474592.60011966614, 4429278.1561588384),
 (474577.93537380069, 4429134.3778895922),
 (474496.64167389442, 4429246.2762765223)]

Code

# Import system modules
import osgeo.ogr
import osgeo.osr
import os


# Core

def load(shapePath):
    """Given a shapePath, return a list of points in GIS coordinates."""
    # Open shapeData
    shapeData = osgeo.ogr.Open(validateShapePath(shapePath))
    # Validate shapeData
    validateShapeData(shapeData)
    # Get the first layer
    layer = shapeData.GetLayer()
    # Initialize
    points = []
    # For each point,
    for index in xrange(layer.GetFeatureCount()):
        # Get feature
        feature = layer.GetFeature(index)
        # Get pointObject
        geometry = feature.GetGeometryRef()
        # Make sure that it is a point
        if geometry.GetGeometryType() != osgeo.ogr.wkbPoint: 
            raise ShapeDataError(x_wrongGeometryType)
        # Get pointCoordinates
        pointCoordinates = geometry.GetX(), geometry.GetY()
        # Append
        points.append(pointCoordinates)
    # Return
    return points, layer.GetSpatialRef().ExportToProj4()

def save(shapePath, geoLocations, spatialReferenceAsProj4):
    """Save points in the given shapePath."""
    # Get driver
    driver = osgeo.ogr.GetDriverByName('ESRI Shapefile')
    # Create shapeData
    shapePath = validateShapePath(shapePath)
    if os.path.exists(shapePath): os.remove(shapePath)
    shapeData = driver.CreateDataSource(shapePath)
    # Create spatialReference
    spatialReference = getSpatialReferenceFromProj4(spatialReferenceAsProj4)
    # Create layer
    layerName = os.path.splitext(os.path.split(shapePath)[1])[0]
    layer = shapeData.CreateLayer(layerName, spatialReference, osgeo.ogr.wkbPoint)
    layerDefinition = layer.GetLayerDefn()
    # For each point,
    for pointIndex, geoLocation in enumerate(geoLocations):
        # Create point
        point = osgeo.ogr.Geometry(osgeo.ogr.wkbPoint)
        point.SetPoint(0, geoLocation[0], geoLocation[1])
        # Create feature
        feature = osgeo.ogr.Feature(layerDefinition)
        feature.SetGeometry(point)
        feature.SetFID(pointIndex)
        # Save feature
        layer.CreateFeature(feature)
    # Return
    return shapePath

def merge(sourcePaths, targetPath):
    # Load
    items = [load(validateShapePath(x)) for x in sourcePaths]
    pointLists = [x[0] for x in items]
    points = reduce(lambda x,y: x+y, pointLists)
    spatialReferences= [x[1] for x in items]
    # Make sure that all the spatial references are the same
    if len(set(spatialReferences)) != 1: 
        raise ShapeDataError(x_differentSpatialReferences)
    spatialReference = spatialReferences[0]
    # Save
    save(validateShapePath(targetPath), points, spatialReference)

def getSpatialReferenceFromProj4(spatialReferenceAsProj4):
    spatialReference = osgeo.osr.SpatialReference()
    spatialReference.ImportFromProj4(spatialReferenceAsProj4)
    return spatialReference


# Validate

def validateShapePath(shapePath):
    return os.path.splitext(str(shapePath))[0] + '.shp'

def validateShapeData(shapeData):
    # Make sure the shapefile exists
    if not shapeData: raise ShapeDataError(x_badPath)
    # Make sure there is exactly one layer
    if shapeData.GetLayerCount() != 1: raise ShapeDataError(x_wrongLayerCount)


# Error

class ShapeDataError(Exception):
    def __init__(self, code): self.code = code
    def __str__(self): return messagePackByCode[self.code][0]
    def getMessagePack(self): return messagePackByCode[self.code]

x_badPath, x_wrongLayerCount, x_wrongGeometryType, x_differentSpatialReferences = xrange(4)

messagePackByCode = {
    x_badPath: (
        'The shapefile is invalid.',
        'Bad path',
    ),
    x_wrongLayerCount: (
        'The shapefile must have exactly one layer.',
        'Wrong number of layers',
    ),
    x_wrongGeometryType: (
        'The shapefile must contain only points.',
        'Wrong geometry',
    ),
    x_differentSpatialReferences: (
        'The shapefiles must have the same spatial reference.',
        'Different spatial references',
    ),
}

Table Of Contents

Previous topic

Install GeoDjango

Next topic

Save points to a shapefile with GDAL

This Page