JOIN
by Simon Charette
charettes@gmail.com github.com/charettes
INNER JOIN
LEFT OUTER JOIN
from django.db import models class Publisher(models.Model): name = models.CharField() class Book(models.Model): title = models.CharField() publisher = models.ForeignKey(Publisher, null=True)
>>> publisher = Publisher.objects.create() >>> Book.objects.create( ... title="Published book", ... publisher=publisher, ... ) >>> Book.objects.create( ... title="Independent book", ... )
SELECT * FROM book INNER JOIN publisher ON (publisher.id = book.publisher_id)
NULL
SELECT * FROM book LEFT OUTER JOIN publisher ON (publisher.id = book.publisher_id)
INNER
OUTER
__
filter
exclude
F
select_related
related_name
Why is it important to pick one over the other?
SELECT NULL = NULL -> false
When does Django picks one over the other?
OUTER JOIN
class Room(models.Model): number = models.IntegerField() class Reservation(models.Model): room = models.ForeignKey( Room, related_name="reservations" )
Reservation.objects.select_related("room")
SELECT * FROM reservation INNER JOIN room ON (reservation.room_id = room.id)
Room.objects.annotate( reservation=Latest("reservations"), )
SELECT * FROM room -- Reverse relationships are nullable thus -- an OUTER JOIN must be used. LEFT OUTER JOIN reservation ON (reservation.room_id = room.id) GROUP BY room.id ORDER BY reservation.id DESC
NULL != <ANYTHING>
col = <ANYTHING>
class Publisher(models.Model): name = models.CharField() class Author(models.Model): publisher = models.ForeignKey( Publisher, null=True, related_name="authors" )
Author.objects.filter(publisher__name="Foo")
SELECT * FROM author -- OUTER JOIN is not necessary as -- NULL != "Foo" and thus the relationship -- is "promoted" to not-NULLABLE. -- LEFT OUTER JOIN publisher ON (author.publisher_id = publisher.id) INNER JOIN publisher ON (author.publisher_id = publisher.id) WHERE publisher.name = "Foo"
Can you think of a few expressions that specialize NULL handling?
__isnull
IS NULL
Coalesce("nullable_field", ...)
COALESCE
IFNULL
Author.objects.filter( Q(publisher__name="Foo") | Q(publisher__name__isnull=True) )
SELECT author.* FROM author LEFT OUTER JOIN publisher ON (author.publisher_id = publisher.id) WHERE ( publisher.name = "Foo" OR publisher.name IS NULL )
Author.objects.filter( Q(publisher__name="Foo") | IsNull(F("publisher__name"), True) )
SELECT author.* FROM author -- this should be LEFT OUTER! INNER JOIN publisher ON (author.publisher_id = publisher.id) WHERE ( publisher.name = "Foo" OR publisher.name IS NULL )
class Location(models.Model): pass class Publisher(models.Model): name = models.CharField() location = models.ForeignKey(Location) class Author(models.Model): publisher = models.ForeignKey( Publisher, null=True, related_name="authors" )
Publisher.objects.select_related("location")
SELECT * FROM publisher -- publisher.location is not nullable thus -- rule #1 applies and an INNER JOIN is used INNER JOIN location ON (publisher.location_id = location.id)
Author.objects.select_related("publisher__location")
SELECT * FROM author -- author.publisher_id is NULL-able thus -- an OUTER JOIN must be used. LEFT OUTER JOIN publisher ON (author.publisher_id = publisher.id) -- publisher.location is not NULL-able *but* -- it's JOIN'ing from publisher which is -- NULL-able thus rule #4 applies. LEFT OUTER JOIN location ON (publisher.location_id = location.id)
If you are curious about how these rules are implemented you can refer to the following django.db.models.sql.Query methods
django.db.models.sql.Query
setup_join
join
JOINs can be reused between __ references
Queryset
class Location(models.Model): pass class Publisher(models.Model): name = models.CharField() location = models.ForeignKey(Location) class Author(models.Model): name = models.CharField() dob = models.DateField() publisher = models.ForeignKey( Publisher, null=True, related_name="authors" )
Publisher.objects.filter( authors__name__startswith="Simon", authors__dob__year=1987, )
SELECT * FROM publisher INNER JOIN author ON (publisher.id = author.publisher_id) WHERE ( name LIKE 'Simon%' AND dob BETWEEN '1987-01-01' AND '1987-12-31' )
Publisher.objects.filter( authors__name__startswith="Simon", ).filter(authors__dob__year=1987)
SELECT * FROM publisher INNER JOIN author ON (publisher.id = author.publisher_id) INNER JOIN author T0 ON (publisher.id = author.publisher_id) WHERE ( author.name LIKE 'Simon%' AND T1.dob BETWEEN '1987-01-01' AND '1987-12-31' )
Author.objects.filter(publisher__name="foo") .filter(publisher__location__id=123) .annotate(location_name=models.F("publisher__location__name"))
SELECT author.*, location.name AS location_name FROM author INNER JOIN publisher ON (publisher.id = author.publisher_id) WHERE ( publisher.name = 'Foo' AND publisher.location_id = 123 )
Thanks Priya!
Multi-valued relationship in same `filter` call
Multi-valued relationship in distinct `filter` call